aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2021-06-23 13:20:04 -0700
committerTim Barron <tjbarron@google.com>2021-06-23 13:33:58 -0700
commit4d283b778e269315193245d4f6460d17119042e1 (patch)
tree637287af89eeb5568bd0dc1193f2dceb1d5c3b2b
parent63545535ac2423e7761dbdf977bd003f21a581e9 (diff)
downloadicing-4d283b778e269315193245d4f6460d17119042e1.tar.gz
Sync from upstream.
Descriptions: ========== Switch DocumentStore to using PortableFileBackedProtoLog instead of FileBackedProtoLog, and migrate any existing FileBackedProtoLog's to the Portable* version. ========== Add more fields to SchemaStats to capture the types that were newly added, modified and modified with an index rebuild. ========== Create a new DeleteByQueryStatsProto to capture query-specific stats related to the performance of DeleteByQuery. ========== Fix bugs in SetSchema: 1. Property id reassignments when a property is deleted and a new one added was not being detected 2. Property id reassignments due to changes to the index_nested_properties field were not detected. 3. Backward incompatible changes should invalidate dependent types. ========== Fix IcingSearchEngineTest.CJKSnippets test by choosing a simpler multi-byte character (when encoded in UTF-8) string that ICU will always segment in the same way regardless of api level. Bug: 185806837 Bug: 187205195 Bug: 187726282 Change-Id: I07cc8bf3fc5b3a7c5e8f0d24d1724297f3bfba5d
-rw-r--r--icing/file/file-backed-proto-log.h51
-rw-r--r--icing/file/file-backed-proto-log_benchmark.cc40
-rw-r--r--icing/file/portable-file-backed-proto-log.h40
-rw-r--r--icing/file/portable-file-backed-proto-log_benchmark.cc40
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc20
-rw-r--r--icing/icing-search-engine.cc61
-rw-r--r--icing/icing-search-engine_benchmark.cc114
-rw-r--r--icing/icing-search-engine_test.cc1030
-rw-r--r--icing/portable/platform.h16
-rw-r--r--icing/schema/schema-store.cc18
-rw-r--r--icing/schema/schema-store.h18
-rw-r--r--icing/schema/schema-store_test.cc106
-rw-r--r--icing/schema/schema-util.cc376
-rw-r--r--icing/schema/schema-util.h47
-rw-r--r--icing/schema/schema-util_test.cc542
-rw-r--r--icing/store/document-log-creator.cc196
-rw-r--r--icing/store/document-log-creator.h77
-rw-r--r--icing/store/document-store.cc48
-rw-r--r--icing/store/document-store.h29
-rw-r--r--icing/store/document-store_benchmark.cc87
-rw-r--r--icing/store/document-store_test.cc239
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/corpus_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/namespace_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/lite.lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/main/main-lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_arm/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/corpus_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/namespace_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/lite.lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/main/main-lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_android_x86/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/corpus_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/namespace_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/lite.lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/main/main-lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_ios/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/corpus_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/namespace_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/lite.lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/main/main-lexicon.hbin0 -> 4096 bytes
-rw-r--r--icing/testdata/not_portable_log/icing_search_engine_linux/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.hbin0 -> 4096 bytes
-rw-r--r--icing/testing/common-matchers.h69
-rw-r--r--icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc6
-rw-r--r--java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java19
-rw-r--r--proto/icing/proto/document.proto6
-rw-r--r--proto/icing/proto/logging.proto38
-rw-r--r--proto/icing/proto/schema.proto17
-rw-r--r--synced_AOSP_CL_number.txt2
52 files changed, 2956 insertions, 396 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 9ccd81b..4302dce 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -80,23 +80,6 @@
namespace icing {
namespace lib {
-namespace {
-
-bool IsEmptyBuffer(const char* buffer, int size) {
- return std::all_of(buffer, buffer + size,
- [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-} // namespace
-
template <typename ProtoT>
class FileBackedProtoLog {
public:
@@ -402,6 +385,29 @@ class FileBackedProtoLog {
const Filesystem* filesystem, const std::string& file_path,
Crc32 initial_crc, int64_t start, int64_t end);
+ static bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+ }
+
+ // Helper function to get stored proto size from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+ // Helper function to get stored proto magic from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+ // Reads out the metadata of a proto located at file_offset from the file.
+ //
+ // Returns:
+ // Proto's metadata on success
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+ // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+ static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+ MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+ std::unique_ptr<Header> header_;
+
// Magic number added in front of every proto. Used when reading out protos
// as a first check for corruption in each entry in the file. Even if there is
// a corruption, the best we can do is roll back to our last recovery point
@@ -429,16 +435,6 @@ class FileBackedProtoLog {
ScopedFd fd_;
const Filesystem* const filesystem_;
const std::string file_path_;
-
- // Reads out the metadata of a proto located at file_offset from the file.
- //
- // Returns:
- // Proto's metadata on success
- // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
- // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
- static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
- MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
- std::unique_ptr<Header> header_;
};
template <typename ProtoT>
@@ -573,6 +569,7 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
ComputeChecksum(filesystem, file_path, Crc32(),
sizeof(Header), file_size));
+
// Double check that the log checksum is the same as the one that was
// persisted last time. If not, we start recovery logic.
if (header->log_checksum != calculated_log_checksum.Get()) {
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 766cc64..c09fd5a 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,46 @@ BENCHMARK(BM_Read)
// 16MiB, and we need some extra space for the
// rest of the document properties
+static void BM_Erase(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s", GetTestTempDir().c_str(), "/proto.log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log =
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+ proto_log->WriteProto(document));
+ state.ResumeTiming();
+
+ testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
+
static void BM_ComputeChecksum(benchmark::State& state) {
const Filesystem filesystem;
const std::string file_path = GetTestTempDir() + "/proto.log";
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 000ab3d..825b763 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -83,28 +83,6 @@
namespace icing {
namespace lib {
-namespace {
-
-// Number of bytes we reserve for the heading at the beginning of the proto log.
-// We reserve this so the header can grow without running into the contents of
-// the proto log, triggering an unnecessary migration of the data.
-constexpr int kHeaderReservedBytes = 256;
-
-bool IsEmptyBuffer(const char* buffer, int size) {
- return std::all_of(buffer, buffer + size,
- [](const char byte) { return byte == 0; });
-}
-
-// Helper function to get stored proto size from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
-
-// Helper function to get stored proto magic from the metadata.
-// Metadata format: 8 bits magic + 24 bits size
-uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
-
-} // namespace
-
template <typename ProtoT>
class PortableFileBackedProtoLog {
public:
@@ -135,6 +113,11 @@ class PortableFileBackedProtoLog {
: compress(compress_in), max_proto_size(max_proto_size_in) {}
};
+ // Number of bytes we reserve for the heading at the beginning of the proto
+ // log. We reserve this so the header can grow without running into the
+ // contents of the proto log, triggering an unnecessary migration of the data.
+ static constexpr int kHeaderReservedBytes = 256;
+
// Header stored at the beginning of the file before the rest of the log
// contents. Stores metadata on the log.
class Header {
@@ -541,6 +524,19 @@ class PortableFileBackedProtoLog {
static libtextclassifier3::Status WriteProtoMetadata(
const Filesystem* filesystem, int fd, int32_t host_order_metadata);
+ static bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+ }
+
+ // Helper function to get stored proto size from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+ // Helper function to get stored proto magic from the metadata.
+ // Metadata format: 8 bits magic + 24 bits size
+ static uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
// Magic number added in front of every proto. Used when reading out protos
// as a first check for corruption in each entry in the file. Even if there is
// a corruption, the best we can do is roll back to our last recovery point
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index b1dfe12..04ccab0 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -163,6 +163,46 @@ BENCHMARK(BM_Read)
->Arg(15 * 1024 * 1024); // We do 15MiB here since our max proto size is
// 16MiB, and we need some extra space for the
// rest of the document properties
+ //
+static void BM_Erase(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s", GetTestTempDir().c_str(), "/proto.log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str = RandomString(kAlNumAlphabet, /*len=*/1, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+ proto_log->WriteProto(document));
+ state.ResumeTiming();
+
+ testing::DoNotOptimize(proto_log->EraseProto(write_offset));
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Erase);
static void BM_ComputeChecksum(benchmark::State& state) {
const Filesystem filesystem;
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index 69b8a1a..b5fee4b 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -113,7 +113,8 @@ TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
// With no protos written yet, the log should be minimum the size of the
// reserved header space.
- ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), kHeaderReservedBytes);
+ ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+ PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes);
}
TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
@@ -417,8 +418,9 @@ TEST_F(PortableFileBackedProtoLogTest,
// We still have the corrupted content in our file, we didn't throw
// everything out.
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Gt(kHeaderReservedBytes));
+ EXPECT_THAT(
+ filesystem_.GetFileSize(file_path_.c_str()),
+ Gt(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
}
}
@@ -456,9 +458,10 @@ TEST_F(PortableFileBackedProtoLogTest,
DocumentProto document =
DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
std::string serialized_document = document.SerializeAsString();
- ASSERT_TRUE(filesystem_.PWrite(file_path_.c_str(), kHeaderReservedBytes,
- serialized_document.data(),
- serialized_document.size()));
+ ASSERT_TRUE(filesystem_.PWrite(
+ file_path_.c_str(),
+ PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes,
+ serialized_document.data(), serialized_document.size()));
Header header = ReadHeader(filesystem_, file_path_);
@@ -484,8 +487,9 @@ TEST_F(PortableFileBackedProtoLogTest,
EXPECT_TRUE(create_result.recalculated_checksum);
// We lost everything, file size is back down to the header.
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kHeaderReservedBytes));
+ EXPECT_THAT(
+ filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(PortableFileBackedProtoLog<DocumentProto>::kHeaderReservedBytes));
// At least the log is no longer dirty.
Header header = ReadHeader(filesystem_, file_path_);
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index e9865e4..55f59f2 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -502,21 +502,18 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
StatusProto* result_status = result_proto.mutable_status();
absl_ports::unique_lock l(&mutex_);
+ std::unique_ptr<Timer> timer = clock_->GetNewTimer();
if (!initialized_) {
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("IcingSearchEngine has not been initialized!");
- return result_proto;
- }
-
- libtextclassifier3::Status status = SchemaUtil::Validate(new_schema);
- if (!status.ok()) {
- TransformStatus(status, result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
auto lost_previous_schema_or = LostPreviousSchema();
if (!lost_previous_schema_or.ok()) {
TransformStatus(lost_previous_schema_or.status(), result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
@@ -534,10 +531,11 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
std::move(new_schema), ignore_errors_and_delete_documents);
if (!set_schema_result_or.ok()) {
TransformStatus(set_schema_result_or.status(), result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
- const SchemaStore::SetSchemaResult set_schema_result =
- set_schema_result_or.ValueOrDie();
+ SchemaStore::SetSchemaResult set_schema_result =
+ std::move(set_schema_result_or).ValueOrDie();
for (const std::string& deleted_type :
set_schema_result.schema_types_deleted_by_name) {
@@ -549,6 +547,26 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
result_proto.add_incompatible_schema_types(incompatible_type);
}
+ for (const std::string& new_type :
+ set_schema_result.schema_types_new_by_name) {
+ result_proto.add_new_schema_types(std::move(new_type));
+ }
+
+ for (const std::string& compatible_type :
+ set_schema_result.schema_types_changed_fully_compatible_by_name) {
+ result_proto.add_fully_compatible_changed_schema_types(
+ std::move(compatible_type));
+ }
+
+ bool index_incompatible =
+ !set_schema_result.schema_types_index_incompatible_by_name.empty();
+ for (const std::string& index_incompatible_type :
+ set_schema_result.schema_types_index_incompatible_by_name) {
+ result_proto.add_index_incompatible_changed_schema_types(
+ std::move(index_incompatible_type));
+ }
+
+ libtextclassifier3::Status status;
if (set_schema_result.success) {
if (lost_previous_schema) {
// No previous schema to calculate a diff against. We have to go through
@@ -556,6 +574,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
status = document_store_->UpdateSchemaStore(schema_store_.get());
if (!status.ok()) {
TransformStatus(status, result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
} else if (!set_schema_result.old_schema_type_ids_changed.empty() ||
@@ -565,15 +584,17 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
set_schema_result);
if (!status.ok()) {
TransformStatus(status, result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
}
- if (lost_previous_schema || set_schema_result.index_incompatible) {
+ if (lost_previous_schema || index_incompatible) {
// Clears all index files
status = index_->Reset();
if (!status.ok()) {
TransformStatus(status, result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
@@ -584,6 +605,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
if (!restore_result.status.ok() &&
!absl_ports::IsDataLoss(restore_result.status)) {
TransformStatus(status, result_status);
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
}
@@ -594,6 +616,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
result_status->set_message("Schema is incompatible.");
}
+ result_proto.set_latency_ms(timer->GetElapsedMilliseconds());
return result_proto;
}
@@ -908,8 +931,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
return result_proto;
}
- DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
- delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY);
+ DeleteByQueryStatsProto* delete_stats =
+ result_proto.mutable_delete_by_query_stats();
+ delete_stats->set_query_length(search_spec.query().length());
+ delete_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ delete_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
libtextclassifier3::Status status =
@@ -919,6 +947,7 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
return result_proto;
}
+ std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), language_segmenter_.get(), normalizer_.get(),
@@ -937,10 +966,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
}
QueryProcessor::QueryResults query_results =
std::move(query_results_or).ValueOrDie();
+ delete_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
ICING_VLOG(2) << "Deleting the docs that matched the query.";
int num_deleted = 0;
+ component_timer = clock_->GetNewTimer();
while (query_results.root_iterator->Advance().ok()) {
ICING_VLOG(3) << "Deleting doc "
<< query_results.root_iterator->doc_hit_info().document_id();
@@ -952,6 +984,13 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
return result_proto;
}
}
+ delete_stats->set_document_removal_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ int term_count = 0;
+ for (const auto& section_and_terms : query_results.query_terms) {
+ term_count += section_and_terms.second.size();
+ }
+ delete_stats->set_num_terms(term_count);
if (num_deleted > 0) {
result_proto.mutable_status()->set_code(StatusProto::OK);
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index b437724..316b74f 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -577,6 +577,120 @@ void BM_RepeatedPut(benchmark::State& state) {
// cap the limit to 1 << 18.
BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+// This is different from BM_RepeatedPut since we're just trying to benchmark
+// one Put call, not thousands of them at once.
+void BM_Put(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(icing->Put(document));
+ }
+}
+BENCHMARK(BM_Put);
+
+void BM_Get(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ for (auto s : state) {
+ benchmark::DoNotOptimize(
+ icing->Get("namespace", "uri", GetResultSpecProto::default_instance()));
+ }
+}
+BENCHMARK(BM_Get);
+
+void BM_Delete(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message"))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document
+ DocumentProto document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .SetUri("uri")
+ .Build();
+
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ for (auto s : state) {
+ state.PauseTiming();
+ icing->Put(document);
+ state.ResumeTiming();
+
+ benchmark::DoNotOptimize(icing->Delete("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_Delete);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index c1de0f0..5846e8b 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -42,6 +42,7 @@
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
+#include "icing/store/document-log-creator.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
@@ -100,9 +101,26 @@ constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
@@ -720,7 +738,13 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
}
@@ -738,12 +762,20 @@ TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
property->set_property_name("title");
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property = type->add_properties();
property->set_property_name("body");
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
// 3. SetSchema should succeed and the version number should be updated.
- EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema, true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_fully_compatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
}
@@ -929,7 +961,12 @@ TEST_F(IcingSearchEngineTest,
}
TEST_F(IcingSearchEngineTest, SetSchema) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(1000);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
auto message_document = CreateMessageDocument("namespace", "uri");
@@ -958,26 +995,31 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
empty_type->set_schema_type("");
// Make sure we can't set invalid schemas
- EXPECT_THAT(icing.SetSchema(invalid_schema).status(),
+ SetSchemaResultProto set_schema_result = icing.SetSchema(invalid_schema);
+ EXPECT_THAT(set_schema_result.status(),
ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
// Can add an document of a set schema
- EXPECT_THAT(icing.SetSchema(schema_with_message).status(), ProtoIsOk());
+ set_schema_result = icing.SetSchema(schema_with_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Schema with Email doesn't have Message, so would result incompatible
// data
- EXPECT_THAT(icing.SetSchema(schema_with_email).status(),
+ set_schema_result = icing.SetSchema(schema_with_email);
+ EXPECT_THAT(set_schema_result.status(),
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
// Can expand the set of schema types and add an document of a new
// schema type
- EXPECT_THAT(icing.SetSchema(SchemaProto(schema_with_email_and_message))
- .status()
- .code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
+ set_schema_result = icing.SetSchema(schema_with_email_and_message);
+ EXPECT_THAT(set_schema_result.status(), ProtoStatusIs(StatusProto::OK));
+ EXPECT_THAT(set_schema_result.latency_ms(), Eq(1000));
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Can't add an document whose schema isn't set
auto photo_document = DocumentBuilder()
.SetKey("namespace", "uri")
@@ -990,7 +1032,8 @@ TEST_F(IcingSearchEngineTest, SetSchema) {
HasSubstr("'Photo' not found"));
}
-TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
+TEST_F(IcingSearchEngineTest,
+ SetSchemaNewIndexedPropertyTriggersIndexRestorationAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -999,8 +1042,15 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
->mutable_properties(0)
->clear_string_indexing_config();
- EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(),
- ProtoIsOk());
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(schema_with_no_indexed_property);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
// Nothing will be index and Search() won't return anything.
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
@@ -1021,8 +1071,14 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
SchemaProto schema_with_indexed_property = CreateMessageSchema();
// Index restoration should be triggered here because new schema requires more
// properties to be indexed.
- EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status(),
- ProtoIsOk());
+ set_schema_result = icing.SetSchema(schema_with_indexed_property);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Message");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1034,6 +1090,439 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
expected_search_result_proto));
}
+TEST_F(IcingSearchEngineTest,
+ SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto person_proto =
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Bill Lundbergh")
+ .Build())
+ .Build();
+
+ // "sender.name" should get assigned property id 0 and subject should get
+ // property id 1.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // document should match a query for 'Bill' in 'sender.name', but not in
+ // 'subject'
+ SearchSpecProto search_spec;
+ search_spec.set_query("sender.name:Bill");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ SearchResultProto empty_result;
+ empty_result.mutable_status()->set_code(StatusProto::OK);
+ search_spec.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ // Now update the schema with index_nested_properties=false. This should
+ // reassign property ids, lead to an index rebuild and ensure that nothing
+ // match a query for "Bill".
+ SchemaProto no_nested_schema =
+ SchemaBuilder()
+ .AddType(person_proto)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(no_nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // document shouldn't match a query for 'Bill' in either 'sender.name' or
+ // 'subject'
+ search_spec.set_query("sender.name:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+
+ search_spec.set_query("subject:Bill");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
+}
+
+TEST_F(IcingSearchEngineTest,
+ ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 'body' should have a property id of 0 and 'subject' should have a property
+ // id of 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only a subject property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec;
+ search_spec.set_query("subject:tps");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ // Now update the schema to remove the 'body' field. This is backwards
+ // incompatible, but document should be preserved because it doesn't contain a
+ // 'body' field. If the index is correctly rebuilt, then 'subject' will now
+ // have a property id of 0. If not, then the hits in the index will still have
+ // have a property id of 1 and therefore it won't be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec.set_query("subject:tps");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(
+ IcingSearchEngineTest,
+ ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // 'body' should have a property id of 0 and 'subject' should have a property
+ // id of 1.
+ SchemaProto email_with_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(email_with_body_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create a document with only a subject property.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ SearchSpecProto search_spec;
+ search_spec.set_query("subject:tps");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+ SearchResultProto result;
+ result.mutable_status()->set_code(StatusProto::OK);
+ *result.mutable_results()->Add()->mutable_document() = document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+
+ // Now update the schema to remove the 'body' field. This is backwards
+ // incompatible, but document should be preserved because it doesn't contain a
+ // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to'
+ // will now have property ids of 0 and 1 respectively. If not, then the hits
+ // in the index will still have have a property id of 1 and therefore it won't
+ // be found.
+ SchemaProto email_no_body_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("to")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ email_no_body_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // We should be able to retrieve the document by searching for 'tps' in
+ // 'subject'.
+ search_spec.set_query("subject:tps");
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result));
+}
+
+TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaTypeConfigProto email_schema_type =
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument("Person",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("company")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ SetSchemaResultProto expected_set_schema_result;
+ expected_set_schema_result.mutable_new_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_new_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Create two documents - a person document and an email document - both docs
+ // should be deleted when we remove the 'company' field from the person type.
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("Person")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("name", "Bill Lundbergh")
+ .AddStringProperty("company", "Initech Corp.")
+ .Build();
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ DocumentProto email_document =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("Email")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("subject",
+ "Did you get the memo about TPS reports?")
+ .AddDocumentProperty("sender", person_document)
+ .Build();
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+
+ // We should be able to retrieve both documents.
+ GetResultProto get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(person_document));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoIsOk());
+ EXPECT_THAT(get_result.document(), EqualsProto(email_document));
+
+ // Now update the schema to remove the 'company' field. This is backwards
+ // incompatible, *both* documents should be deleted because both fail
+ // validation (they each contain a 'Person' that has a non-existent property).
+ nested_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_schema_type)
+ .Build();
+
+ set_schema_result = icing.SetSchema(
+ nested_schema, /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
+ expected_set_schema_result = SetSchemaResultProto();
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person");
+ expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Email");
+ expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
+ ->Add("Person");
+ expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
+
+ // Both documents should be deleted now.
+ get_result =
+ icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ get_result =
+ icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance());
+ EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -1079,6 +1568,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
// Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result =
+ icing.SetSchema(schema_with_required_subject);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
SetSchemaResultProto expected_set_schema_result_proto;
expected_set_schema_result_proto.mutable_status()->set_code(
StatusProto::FAILED_PRECONDITION);
@@ -1086,15 +1579,17 @@ TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
"Schema is incompatible.");
expected_set_schema_result_proto.add_incompatible_schema_types("email");
- EXPECT_THAT(icing.SetSchema(schema_with_required_subject),
- EqualsProto(expected_set_schema_result_proto));
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
// Force set it
+ set_schema_result =
+ icing.SetSchema(schema_with_required_subject,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
expected_set_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
expected_set_schema_result_proto.mutable_status()->clear_message();
- EXPECT_THAT(icing.SetSchema(schema_with_required_subject,
- /*ignore_errors_and_delete_documents=*/true),
- EqualsProto(expected_set_schema_result_proto));
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result_proto));
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1151,19 +1646,25 @@ TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
type->set_schema_type("email");
// Can't set the schema since it's incompatible
+ SetSchemaResultProto set_schema_result = icing.SetSchema(new_schema);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
SetSchemaResultProto expected_result;
expected_result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
expected_result.mutable_status()->set_message("Schema is incompatible.");
expected_result.add_deleted_schema_types("message");
- EXPECT_THAT(icing.SetSchema(new_schema), EqualsProto(expected_result));
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
// Force set it
+ set_schema_result =
+ icing.SetSchema(new_schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ // Ignore latency numbers. They're covered elsewhere.
+ set_schema_result.clear_latency_ms();
expected_result.mutable_status()->set_code(StatusProto::OK);
expected_result.mutable_status()->clear_message();
- EXPECT_THAT(icing.SetSchema(new_schema,
- /*ignore_errors_and_delete_documents=*/true),
- EqualsProto(expected_result));
+ EXPECT_THAT(set_schema_result, EqualsProto(expected_result));
// "email" document is still there
GetResultProto expected_get_result_proto;
@@ -2074,7 +2575,8 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
// Deletes document1
ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
const std::string document_log_path =
- icing_options.base_dir() + "/document_dir/document_log";
+ icing_options.base_dir() + "/document_dir/" +
+ DocumentLogCreator::GetDocumentLogFilename();
int64_t document_log_size_before =
filesystem()->GetFileSize(document_log_path.c_str());
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
@@ -2765,11 +3267,16 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- DeleteStatsProto exp_stats;
- exp_stats.set_delete_type(DeleteStatsProto::DeleteType::QUERY);
+ DeleteByQueryStatsProto exp_stats;
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(1);
- EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
+ exp_stats.set_query_length(search_spec.query().length());
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(0);
+ exp_stats.set_num_schema_types_filtered(0);
+ exp_stats.set_parse_query_latency_ms(7);
+ exp_stats.set_document_removal_latency_ms(7);
+ EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -3438,8 +3945,8 @@ TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
EqualsProto(expected_get_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
const std::string corrupt_data = "1234";
EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
corrupt_data.data(), corrupt_data.size()));
@@ -5010,74 +5517,230 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
EqualsSearchResultIgnoreStatsAndScores(empty_result));
}
-TEST_F(IcingSearchEngineTest, PersistToDisk) {
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
+TEST_F(IcingSearchEngineTest, ImplicitPersistToDiskFullSavesEverything) {
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ } // Destructing calls a PersistToDisk(FULL)
- // Persisting shouldn't affect anything
- EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- } // Destructing persists as well
+ // There should be no recovery since everything should be saved properly.
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Schema is still intact.
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+ EXPECT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+ // Documents are still intact.
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(
icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
EqualsProto(expected_get_result_proto));
+
+ // Index is still intact.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message"); // Content in the Message document.
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, ExplicitPersistToDiskFullSavesEverything) {
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+
+ // Add schema and documents to our first icing1 instance.
+ IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing1.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ // Initialize a second icing2 instance which should have it's own memory
+ // space. If data from icing1 isn't being persisted to the files, then icing2
+ // won't be able to see those changes.
+ IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+
+ // There should be no recovery since everything should be saved properly.
+ InitializeResultProto init_result = icing2.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Schema is still intact.
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+ EXPECT_THAT(icing2.GetSchema(),
+ EqualsProto(expected_get_schema_result_proto));
+
+ // Documents are still intact.
+ GetResultProto expected_get_result_proto;
+ expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_result_proto.mutable_document() = document;
+
+ EXPECT_THAT(
+ icing2.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+ EqualsProto(expected_get_result_proto));
+
+ // Index is still intact.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message"); // Content in the Message document.
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto actual_results =
+ icing2.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
-TEST_F(IcingSearchEngineTest, NoPersistToDiskLiteDoesntPersistPut) {
+TEST_F(IcingSearchEngineTest, NoPersistToDiskLosesAllDocumentsAndIndex) {
IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- DocumentProto document1 = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
EXPECT_THAT(
icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
.document(),
- EqualsProto(document1));
+ EqualsProto(document));
+
+ // It's intentional that no PersistToDisk call is made before initializing a
+ // second instance of icing.
IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
+ InitializeResultProto init_result = icing2.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::PARTIAL_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
// The document shouldn't be found because we forgot to call
// PersistToDisk(LITE)!
EXPECT_THAT(
icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
.status(),
ProtoStatusIs(StatusProto::NOT_FOUND));
+
+ // Searching also shouldn't get us anything because the index wasn't
+ // recovered.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message"); // Content in the Message document.
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+
+ SearchResultProto actual_results =
+ icing2.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
-TEST_F(IcingSearchEngineTest, PersistToDiskLitePersistsPut) {
+TEST_F(IcingSearchEngineTest, PersistToDiskLiteSavesGroundTruth) {
+ DocumentProto document = CreateMessageDocument("namespace", "uri");
+
IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- DocumentProto document1 = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing1.Put(document).status(), ProtoIsOk());
EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk());
EXPECT_THAT(
icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
.document(),
- EqualsProto(document1));
+ EqualsProto(document));
IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
+ InitializeResultProto init_result = icing2.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // A checksum mismatch gets reported as an IO error. The document store and
+ // index didn't have their derived files included in the checksum previously,
+ // so reinitializing will trigger a checksum mismatch.
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+
+ // Schema is still intact.
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = CreateMessageSchema();
+
+ EXPECT_THAT(icing2.GetSchema(),
+ EqualsProto(expected_get_schema_result_proto));
+
// The document should be found because we called PersistToDisk(LITE)!
EXPECT_THAT(
icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
.document(),
- EqualsProto(document1));
+ EqualsProto(document));
+
+ // Recovered index is still intact.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message"); // Content in the Message document.
+
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document;
+
+ SearchResultProto actual_results =
+ icing2.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, ResetOk) {
@@ -5616,15 +6279,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
// 2. Delete the last document from the document log
{
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
- FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
std::move(create_result.proto_log);
document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5689,15 +6353,16 @@ TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
// 2. Delete the last two documents from the document log.
{
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
filesystem()->DeleteFile(document_log_file.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
- FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem(), document_log_file.c_str(),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true)));
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log =
std::move(create_result.proto_log);
document = DocumentBuilder(document).SetUri("fake_type/0").Build();
@@ -5994,8 +6659,8 @@ TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
// Append a non-checksummed document. This will mess up the checksum of the
// proto log, forcing it to rewind and later return a DATA_LOSS error.
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
filesystem()->PWrite(document_log_file.c_str(), file_size,
@@ -6045,31 +6710,47 @@ TEST_F(IcingSearchEngineTest,
.SetSchema("Message")
.AddStringProperty("body", "message body")
.Build();
+
+ const std::string document_log_file = absl_ports::StrCat(
+ GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
+ int64_t corruptible_offset;
+
{
// Initialize and put a document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // There's some space at the beginning of the file (e.g. header, kmagic,
+ // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+ // corrupt that region, so we need to figure out the offset at which
+ // documents will be written to - which is the file size after
+ // initialization.
+ corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
+
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
}
{
- // Modify the document log checksum to trigger a complete document log
- // rewind.
- const std::string document_log_file =
- absl_ports::StrCat(GetDocumentDir(), "/document_log");
-
- FileBackedProtoLog<DocumentWrapper>::Header document_log_header;
- filesystem()->PRead(document_log_file.c_str(), &document_log_header,
- sizeof(FileBackedProtoLog<DocumentWrapper>::Header),
- /*offset=*/0);
- // Set a garbage checksum.
- document_log_header.log_checksum = 10;
- document_log_header.header_checksum =
- document_log_header.CalculateHeaderChecksum();
- filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0,
- &document_log_header,
- sizeof(FileBackedProtoLog<DocumentWrapper>::Header));
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem()->PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(*filesystem(), document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(*filesystem(), document_log_file, header);
}
{
@@ -7182,6 +7863,171 @@ TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
}
+#ifndef ICING_JNI_TEST
+// We skip this test case when we're running in a jni_test since the data files
+// will be stored in the android-instrumented storage location, rather than the
+// normal cc_library runfiles directory. To get that storage location, it's
+// recommended to use the TestStorage APIs which handles different API
+// levels/absolute vs relative/etc differences. Since that's only accessible on
+// the java-side, and I haven't figured out a way to pass that directory path to
+// this native side yet, we're just going to disable this. The functionality is
+// already well-tested across 4 different emulated OS's so we're not losing much
+// test coverage here.
+TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
+ // Copy the testdata files into our IcingSearchEngine directory
+ std::string dir_without_portable_log;
+ if (IsAndroidX86()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_x86");
+ } else if (IsAndroidArm()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_arm");
+ } else if (IsIosPlatform()) {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_ios");
+ } else {
+ dir_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_linux");
+ }
+
+ // Create dst directory that we'll initialize the IcingSearchEngine over.
+ std::string base_dir = GetTestBaseDir() + "_migrate";
+ ASSERT_THAT(filesystem()->DeleteDirectoryRecursively(base_dir.c_str()), true);
+ ASSERT_THAT(filesystem()->CreateDirectoryRecursively(base_dir.c_str()), true);
+
+ ASSERT_TRUE(filesystem()->CopyDirectory(dir_without_portable_log.c_str(),
+ base_dir.c_str(),
+ /*recursive=*/true));
+
+ IcingSearchEngineOptions icing_options;
+ icing_options.set_base_dir(base_dir);
+
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+
+ // Set up schema, this is the one used to validate documents in the testdata
+ // files. Do not change unless you're also updating the testdata files.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ // Make sure our schema is still the same as we expect. If not, there's
+ // definitely no way we're getting the documents back that we expect.
+ GetSchemaResultProto expected_get_schema_result_proto;
+ expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_get_schema_result_proto.mutable_schema() = schema;
+ ASSERT_THAT(icing.GetSchema(), EqualsProto(expected_get_schema_result_proto));
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(321)
+ .AddStringProperty("body", "baz bat")
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ // Document 1 and 3 were put normally, and document 2 was deleted in our
+ // testdata files.
+ EXPECT_THAT(icing
+ .Get(document1.namespace_(), document1.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document1));
+ EXPECT_THAT(icing
+ .Get(document2.namespace_(), document2.uri(),
+ GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing
+ .Get(document3.namespace_(), document3.uri(),
+ GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document3));
+
+ // Searching for "foo" should get us document1.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("foo");
+
+ SearchResultProto expected_document1;
+ expected_document1.mutable_status()->set_code(StatusProto::OK);
+ *expected_document1.mutable_results()->Add()->mutable_document() = document1;
+
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document1));
+
+ // Searching for "baz" would've gotten us document2, except it got deleted.
+ // Make sure that it's cleared from our index too.
+ search_spec.set_query("baz");
+
+ SearchResultProto expected_no_documents;
+ expected_no_documents.mutable_status()->set_code(StatusProto::OK);
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_no_documents));
+
+ // Searching for "phoo" should get us document3.
+ search_spec.set_query("phoo");
+
+ SearchResultProto expected_document3;
+ expected_document3.mutable_status()->set_code(StatusProto::OK);
+ *expected_document3.mutable_results()->Add()->mutable_document() = document3;
+
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(expected_document3));
+}
+#endif // !ICING_JNI_TEST
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/portable/platform.h b/icing/portable/platform.h
index 8712835..150eede 100644
--- a/icing/portable/platform.h
+++ b/icing/portable/platform.h
@@ -34,11 +34,19 @@ inline bool IsReverseJniTokenization() {
return false;
}
-// Whether the running test is an Android test.
-inline bool IsAndroidPlatform() {
-#if defined(__ANDROID__)
+// Whether we're running on android_x86
+inline bool IsAndroidX86() {
+#if defined(__ANDROID__) && defined(__i386__)
return true;
-#endif // defined(__ANDROID__)
+#endif // defined(__ANDROID__) && defined(__i386__)
+ return false;
+}
+
+// Whether we're running on android_armeabi-v7a
+inline bool IsAndroidArm() {
+#if defined(__ANDROID__) && defined(__arm__)
+ return true;
+#endif // defined(__ANDROID__) && defined(__arm__)
return false;
}
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 7040a31..3307638 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -322,12 +322,18 @@ SchemaStore::SetSchema(const SchemaProto& new_schema,
libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
SchemaStore::SetSchema(SchemaProto&& new_schema,
bool ignore_errors_and_delete_documents) {
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap new_dependency_map,
+ SchemaUtil::Validate(new_schema));
+
SetSchemaResult result;
auto schema_proto_or = GetSchema();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
// We don't have a pre-existing schema, so anything is valid.
result.success = true;
+ for (const SchemaTypeConfigProto& type_config : new_schema.types()) {
+ result.schema_types_new_by_name.insert(type_config.schema_type());
+ }
} else if (!schema_proto_or.ok()) {
// Real error
return schema_proto_or.status();
@@ -345,10 +351,14 @@ SchemaStore::SetSchema(SchemaProto&& new_schema,
// Different schema, track the differences and see if we can still write it
SchemaUtil::SchemaDelta schema_delta =
- SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema);
-
- // An incompatible index is fine, we can just reindex
- result.index_incompatible = schema_delta.index_incompatible;
+ SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ new_dependency_map);
+
+ result.schema_types_new_by_name = std::move(schema_delta.schema_types_new);
+ result.schema_types_changed_fully_compatible_by_name =
+ std::move(schema_delta.schema_types_changed_fully_compatible);
+ result.schema_types_index_incompatible_by_name =
+ std::move(schema_delta.schema_types_index_incompatible);
for (const auto& schema_type : schema_delta.schema_types_deleted) {
// We currently don't support deletions, so mark this as not possible.
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index dd1edb8..b9be6c0 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -68,9 +68,6 @@ class SchemaStore {
// to file.
bool success = false;
- // Whether the new schema changes invalidate the index.
- bool index_incompatible = false;
-
// SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if:
// 1. Schema types are added in the middle of the SchemaProto
// 2. Schema types are removed from the middle of the SchemaProto
@@ -100,6 +97,21 @@ class SchemaStore {
// SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId
// assigned to this SchemaTypeConfigProto in the *old* schema.
std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id;
+
+ // Schema types that were added in the new schema. Represented by the
+ // `schema_type` field in the SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_new_by_name;
+
+ // Schema types that were changed in a way that was backwards compatible and
+ // didn't invalidate the index. Represented by the `schema_type` field in
+ // the SchemaTypeConfigProto.
+ std::unordered_set<std::string>
+ schema_types_changed_fully_compatible_by_name;
+
+ // Schema types that were changed in a way that was backwards compatible,
+ // but invalidated the index. Represented by the `schema_type` field in the
+ // SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_index_incompatible_by_name;
};
// Factory function to create a SchemaStore which does not take ownership
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 69663b5..be7170f 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -49,6 +49,8 @@ using ::testing::Pointee;
constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
StringIndexingConfig_TokenizerType_Code_PLAIN;
@@ -101,6 +103,7 @@ TEST_F(SchemaStoreTest, CorruptSchemaError) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -136,6 +139,7 @@ TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -173,6 +177,7 @@ TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -244,6 +249,7 @@ TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
@@ -265,6 +271,7 @@ TEST_F(SchemaStoreTest, MultipleCreateOk) {
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
@@ -301,6 +308,7 @@ TEST_F(SchemaStoreTest, SetNewSchemaOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -316,6 +324,7 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -323,6 +332,8 @@ TEST_F(SchemaStoreTest, SetSameSchemaOk) {
EXPECT_THAT(*actual_schema, EqualsProto(schema_));
// And one more for fun
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -337,6 +348,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(schema_.types(0).schema_type());
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -347,6 +359,7 @@ TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
schema_.clear_types();
// Set the incompatible schema
+ result = SchemaStore::SetSchemaResult();
result.success = false;
result.schema_types_deleted_by_name.emplace("email");
result.schema_types_deleted_by_id.emplace(0);
@@ -366,6 +379,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -378,6 +392,9 @@ TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
.Build();
// Set the compatible schema
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
+ result.schema_types_new_by_name.insert("new_type");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
@@ -398,6 +415,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
+ result.schema_types_new_by_name.insert("message");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -454,6 +473,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
+ result.schema_types_new_by_name.insert("message");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -468,6 +489,8 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
// Since we assign SchemaTypeIds based on order in the SchemaProto, this will
// cause SchemaTypeIds to change
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
result.old_schema_type_ids_changed.emplace(0); // Old SchemaTypeId of "email"
result.old_schema_type_ids_changed.emplace(
1); // Old SchemaTypeId of "message"
@@ -479,7 +502,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
-TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
+TEST_F(SchemaStoreTest, IndexedPropertyChangeRequiresReindexingOk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -497,6 +520,7 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -512,16 +536,85 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
- // With a new indexed property, we'll need to reindex
- result.index_incompatible = true;
-
// Set the compatible schema
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
+ result.schema_types_index_incompatible_by_name.insert("email");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
+TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ // Make two schemas. One that sets index_nested_properties to false and one
+ // that sets it to true.
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto no_nested_index_schema =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument("email",
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto nested_index_schema =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument("email",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Set schema with index_nested_properties=false to start.
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ result.schema_types_new_by_name.insert("email");
+ result.schema_types_new_by_name.insert("person");
+ EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+ schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
+
+ // Set schema with index_nested_properties=true and confirm that the change to
+ // 'person' is index incompatible.
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
+ result.schema_types_index_incompatible_by_name.insert("person");
+ EXPECT_THAT(schema_store->SetSchema(nested_index_schema),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema));
+
+ // Set schema with index_nested_properties=false and confirm that the change
+ // to 'person' is index incompatible.
+ result = SchemaStore::SetSchemaResult();
+ result.success = true;
+ result.schema_types_index_incompatible_by_name.insert("person");
+ EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+ ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+ EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema));
+}
+
TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -540,6 +633,7 @@ TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -602,6 +696,8 @@ TEST_F(SchemaStoreTest, GetSchemaTypeId) {
// Set it for the first time
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert(first_type);
+ result.schema_types_new_by_name.insert(second_type);
EXPECT_THAT(schema_store->SetSchema(schema_),
IsOkAndHolds(EqualsSetSchemaResult(result)));
@@ -760,6 +856,8 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
SchemaStore::SetSchemaResult result;
result.success = true;
+ result.schema_types_new_by_name.insert("email");
+ result.schema_types_new_by_name.insert("fullSectionsType");
EXPECT_THAT(schema_store->SetSchema(schema),
IsOkAndHolds(EqualsSetSchemaResult(result)));
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 49e7096..22bc3f6 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -37,6 +37,20 @@ namespace lib {
namespace {
+bool ArePropertiesEqual(const PropertyConfigProto& old_property,
+ const PropertyConfigProto& new_property) {
+ return old_property.property_name() == new_property.property_name() &&
+ old_property.data_type() == new_property.data_type() &&
+ old_property.schema_type() == new_property.schema_type() &&
+ old_property.cardinality() == new_property.cardinality() &&
+ old_property.string_indexing_config().term_match_type() ==
+ new_property.string_indexing_config().term_match_type() &&
+ old_property.string_indexing_config().tokenizer_type() ==
+ new_property.string_indexing_config().tokenizer_type() &&
+ old_property.document_indexing_config().index_nested_properties() ==
+ new_property.document_indexing_config().index_nested_properties();
+}
+
bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
const PropertyConfigProto& new_property) {
if (old_property.cardinality() < new_property.cardinality()) {
@@ -95,43 +109,175 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
} // namespace
-libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
- // Tracks SchemaTypeConfigs that we've validated already.
- std::unordered_set<std::string_view> known_schema_types;
+libtextclassifier3::Status ExpandTranstiveDependencies(
+ const SchemaUtil::DependencyMap& child_to_direct_parent_map,
+ std::string_view type,
+ SchemaUtil::DependencyMap* expanded_child_to_parent_map,
+ std::unordered_set<std::string_view>* pending_expansions,
+ std::unordered_set<std::string_view>* orphaned_types) {
+ auto expanded_itr = expanded_child_to_parent_map->find(type);
+ if (expanded_itr != expanded_child_to_parent_map->end()) {
+ // We've already expanded this type. Just return.
+ return libtextclassifier3::Status::OK;
+ }
+ auto itr = child_to_direct_parent_map.find(type);
+ if (itr == child_to_direct_parent_map.end()) {
+ // It's an orphan. Just return.
+ orphaned_types->insert(type);
+ return libtextclassifier3::Status::OK;
+ }
+ pending_expansions->insert(type);
+ std::unordered_set<std::string_view> expanded_dependencies;
+
+ // Add all of the direct parent dependencies.
+ expanded_dependencies.reserve(itr->second.size());
+ expanded_dependencies.insert(itr->second.begin(), itr->second.end());
+
+ // Iterate through each direct parent and add their indirect parents.
+ for (std::string_view dep : itr->second) {
+ // 1. Check if we're in the middle of expanding this type - IOW there's a
+ // cycle!
+ if (pending_expansions->count(dep) > 0) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Infinite loop detected in type configs. '", type,
+ "' references itself."));
+ }
- // Tracks SchemaTypeConfigs that have been mentioned (by other
- // SchemaTypeConfigs), but we haven't validated yet.
- std::unordered_set<std::string_view> unknown_schema_types;
+ // 2. Expand this type as needed.
+ ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
+ child_to_direct_parent_map, dep, expanded_child_to_parent_map,
+ pending_expansions, orphaned_types));
+ if (orphaned_types->count(dep) > 0) {
+ // Dep is an orphan. Just skip to the next dep.
+ continue;
+ }
- // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
- // already.
- std::unordered_set<std::string_view> known_property_names;
+ // 3. Dep has been fully expanded. Add all of its dependencies to this
+ // type's dependencies.
+ auto dep_expanded_itr = expanded_child_to_parent_map->find(dep);
+ expanded_dependencies.reserve(expanded_dependencies.size() +
+ dep_expanded_itr->second.size());
+ expanded_dependencies.insert(dep_expanded_itr->second.begin(),
+ dep_expanded_itr->second.end());
+ }
+ expanded_child_to_parent_map->insert(
+ {type, std::move(expanded_dependencies)});
+ pending_expansions->erase(type);
+ return libtextclassifier3::Status::OK;
+}
- // Tracks which schemas reference other schemas. This is used to detect
- // infinite loops between indexed schema references (e.g. A -> B -> C -> A).
- // We could get into an infinite loop while trying to assign section ids.
- //
- // The key is the "child" schema that is being referenced within another
- // schema.
- // The value is a set of all the direct/indirect "parent" schemas that
- // reference the "child" schema.
- //
- // For example, if A has a nested document property of type B, then A is the
- // "parent" and B is the "child" and so schema_references will contain
- // schema_references[B] == {A}.
- std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
- schema_references;
+// Expands the dependencies represented by the child_to_direct_parent_map to
+// also include indirect parents.
+//
+// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
+// type B and B has a property of type C. C and D only have non-document
+// properties.
+//
+// The child to direct parent dependency map for this schema would be:
+// C -> B
+// B -> A
+//
+// This function would expand it so that A is also present as an indirect parent
+// of C.
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
+ExpandTranstiveDependencies(
+ const SchemaUtil::DependencyMap& child_to_direct_parent_map) {
+ SchemaUtil::DependencyMap expanded_child_to_parent_map;
+
+ // Types that we are expanding.
+ std::unordered_set<std::string_view> pending_expansions;
+
+ // Types that have no parents that depend on them.
+ std::unordered_set<std::string_view> orphaned_types;
+ for (const auto& kvp : child_to_direct_parent_map) {
+ ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies(
+ child_to_direct_parent_map, kvp.first, &expanded_child_to_parent_map,
+ &pending_expansions, &orphaned_types));
+ }
+ return expanded_child_to_parent_map;
+}
+// Builds a transitive child-parent dependency map. 'Orphaned' types (types with
+// no parents) will not be present in the map.
+//
+// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of
+// type B and B has a property of type C. C and D only have non-document
+// properties.
+//
+// The transitive child-parent dependency map for this schema would be:
+// C -> A, B
+// B -> A
+//
+// A and D would be considered orphaned properties because no type refers to
+// them.
+//
+// RETURNS:
+// On success, a transitive child-parent dependency map of all types in the
+// schema.
+// INVALID_ARGUMENT if the schema contains a cycle or an undefined type.
+// ALREADY_EXISTS if a schema type is specified more than once in the schema
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap>
+BuildTransitiveDependencyGraph(const SchemaProto& schema) {
+ // Child to parent map.
+ SchemaUtil::DependencyMap child_to_direct_parent_map;
+
+ // Add all first-order dependencies.
+ std::unordered_set<std::string_view> known_types;
+ std::unordered_set<std::string_view> unknown_types;
for (const auto& type_config : schema.types()) {
std::string_view schema_type(type_config.schema_type());
- ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
-
- // We can't have duplicate schema_types
- if (!known_schema_types.insert(schema_type).second) {
+ if (known_types.count(schema_type) > 0) {
return absl_ports::AlreadyExistsError(absl_ports::StrCat(
"Field 'schema_type' '", schema_type, "' is already defined"));
}
- unknown_schema_types.erase(schema_type);
+ known_types.insert(schema_type);
+ unknown_types.erase(schema_type);
+ for (const auto& property_config : type_config.properties()) {
+ if (property_config.data_type() ==
+ PropertyConfigProto::DataType::DOCUMENT) {
+ // Need to know what schema_type these Document properties should be
+ // validated against
+ std::string_view property_schema_type(property_config.schema_type());
+ if (property_schema_type == schema_type) {
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Infinite loop detected in type configs. '",
+ schema_type, "' references itself."));
+ }
+ if (known_types.count(property_schema_type) == 0) {
+ unknown_types.insert(property_schema_type);
+ }
+ auto itr = child_to_direct_parent_map.find(property_schema_type);
+ if (itr == child_to_direct_parent_map.end()) {
+ child_to_direct_parent_map.insert(
+ {property_schema_type, std::unordered_set<std::string_view>()});
+ itr = child_to_direct_parent_map.find(property_schema_type);
+ }
+ itr->second.insert(schema_type);
+ }
+ }
+ }
+ if (!unknown_types.empty()) {
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ",")));
+ }
+ return ExpandTranstiveDependencies(child_to_direct_parent_map);
+}
+
+libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate(
+ const SchemaProto& schema) {
+ // 1. Build the dependency map. This will detect any cycles, non-existent or
+ // duplicate types in the schema.
+ ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap dependency_map,
+ BuildTransitiveDependencyGraph(schema));
+
+ // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
+ // already.
+ std::unordered_set<std::string_view> known_property_names;
+
+ // 2. Validate the properties of each type.
+ for (const auto& type_config : schema.types()) {
+ std::string_view schema_type(type_config.schema_type());
+ ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
// We only care about properties being unique within one type_config
known_property_names.clear();
@@ -164,56 +310,6 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
"data_types in schema property '",
schema_type, ".", property_name, "'"));
}
-
- if (property_schema_type == schema_type) {
- // The schema refers to itself. This also causes a infinite loop.
- //
- // TODO(b/171996137): When clients can opt out of indexing document
- // properties, then we don't need to do this if the document property
- // isn't indexed. We only care about infinite loops while we're trying
- // to assign section ids for indexing.
- return absl_ports::InvalidArgumentError(
- absl_ports::StrCat("Infinite loop detected in type configs. '",
- schema_type, "' references itself."));
- }
-
- // Need to make sure we eventually see/validate this schema_type
- if (known_schema_types.count(property_schema_type) == 0) {
- unknown_schema_types.insert(property_schema_type);
- }
-
- // Start tracking the parent schemas that references this nested schema
- // for infinite loop detection.
- //
- // TODO(b/171996137): When clients can opt out of indexing document
- // properties, then we don't need to do this if the document property
- // isn't indexed. We only care about infinite loops while we're trying
- // to assign section ids for indexing.
- std::unordered_set<std::string_view> parent_schemas;
- parent_schemas.insert(schema_type);
-
- for (const auto& parent : parent_schemas) {
- // Check for any indirect parents
- auto indirect_parents_iter = schema_references.find(parent);
- if (indirect_parents_iter == schema_references.end()) {
- continue;
- }
-
- // Our "parent" schema has parents as well. They're our indirect
- // parents now.
- for (const std::string_view& indirect_parent :
- indirect_parents_iter->second) {
- if (indirect_parent == property_schema_type) {
- // We're our own indirect parent! Infinite loop found.
- return absl_ports::InvalidArgumentError(absl_ports::StrCat(
- "Infinite loop detected in type configs. '",
- property_schema_type, "' references itself."));
- }
- parent_schemas.insert(indirect_parent);
- }
- }
-
- schema_references.insert({property_schema_type, parent_schemas});
}
ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
@@ -227,15 +323,7 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
}
}
- // A Document property claimed to be of a schema_type that we never
- // saw/validated
- if (!unknown_schema_types.empty()) {
- return absl_ports::UnknownError(
- absl_ports::StrCat("Undefined 'schema_type's: ",
- absl_ports::StrJoin(unknown_schema_types, ",")));
- }
-
- return libtextclassifier3::Status::OK;
+ return dependency_map;
}
libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
@@ -355,9 +443,9 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
}
const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
- const SchemaProto& old_schema, const SchemaProto& new_schema) {
+ const SchemaProto& old_schema, const SchemaProto& new_schema,
+ const DependencyMap& new_schema_dependency_map) {
SchemaDelta schema_delta;
- schema_delta.index_incompatible = false;
TypeConfigMap new_type_config_map;
BuildTypeConfigMap(new_schema, &new_type_config_map);
@@ -385,7 +473,29 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
// be reindexed.
int32_t old_required_properties = 0;
int32_t old_indexed_properties = 0;
+
+ // If there is a different number of properties, then there must have been a
+ // change.
+ bool has_property_changed =
+ old_type_config.properties_size() !=
+ new_schema_type_and_config->second.properties_size();
+ bool is_incompatible = false;
+ bool is_index_incompatible = false;
for (const auto& old_property_config : old_type_config.properties()) {
+ if (old_property_config.cardinality() ==
+ PropertyConfigProto::Cardinality::REQUIRED) {
+ ++old_required_properties;
+ }
+
+ // A non-default term_match_type indicates that this property is meant to
+ // be indexed.
+ bool is_indexed_property =
+ old_property_config.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN;
+ if (is_indexed_property) {
+ ++old_indexed_properties;
+ }
+
auto new_property_name_and_config =
new_parsed_property_configs.property_config_map.find(
old_property_config.property_name());
@@ -397,39 +507,35 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
"Previously defined property type '", old_type_config.schema_type(),
".", old_property_config.property_name(),
"' was not defined in new schema");
- schema_delta.schema_types_incompatible.insert(
- old_type_config.schema_type());
+ is_incompatible = true;
+ is_index_incompatible |= is_indexed_property;
continue;
}
const PropertyConfigProto* new_property_config =
new_property_name_and_config->second;
+ if (!has_property_changed &&
+ !ArePropertiesEqual(old_property_config, *new_property_config)) {
+ // Finally found a property that changed.
+ has_property_changed = true;
+ }
if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
ICING_VLOG(1) << absl_ports::StrCat(
"Property '", old_type_config.schema_type(), ".",
old_property_config.property_name(), "' is incompatible.");
- schema_delta.schema_types_incompatible.insert(
- old_type_config.schema_type());
- }
-
- if (old_property_config.cardinality() ==
- PropertyConfigProto::Cardinality::REQUIRED) {
- ++old_required_properties;
- }
-
- // A non-default term_match_type indicates that this property is meant to
- // be indexed.
- if (old_property_config.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- ++old_indexed_properties;
+ is_incompatible = true;
}
// Any change in the indexed property requires a reindexing
if (!IsTermMatchTypeCompatible(
old_property_config.string_indexing_config(),
- new_property_config->string_indexing_config())) {
- schema_delta.index_incompatible = true;
+ new_property_config->string_indexing_config()) ||
+ old_property_config.document_indexing_config()
+ .index_nested_properties() !=
+ new_property_config->document_indexing_config()
+ .index_nested_properties()) {
+ is_index_incompatible = true;
}
}
@@ -444,8 +550,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
"New schema '", old_type_config.schema_type(),
"' has REQUIRED properties that are not "
"present in the previously defined schema");
- schema_delta.schema_types_incompatible.insert(
- old_type_config.schema_type());
+ is_incompatible = true;
}
// If we've gained any new indexed properties, then the section ids may
@@ -457,8 +562,59 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
"Set of indexed properties in schema type '",
old_type_config.schema_type(),
"' has changed, required reindexing.");
- schema_delta.index_incompatible = true;
+ is_index_incompatible = true;
+ }
+
+ if (is_incompatible) {
+ // If this type is incompatible, then every type that depends on it might
+ // also be incompatible. Use the dependency map to mark those ones as
+ // incompatible too.
+ schema_delta.schema_types_incompatible.insert(
+ old_type_config.schema_type());
+ auto parent_types_itr =
+ new_schema_dependency_map.find(old_type_config.schema_type());
+ if (parent_types_itr != new_schema_dependency_map.end()) {
+ schema_delta.schema_types_incompatible.reserve(
+ schema_delta.schema_types_incompatible.size() +
+ parent_types_itr->second.size());
+ schema_delta.schema_types_incompatible.insert(
+ parent_types_itr->second.begin(), parent_types_itr->second.end());
+ }
+ }
+
+ if (is_index_incompatible) {
+ // If this type is index incompatible, then every type that depends on it
+ // might also be index incompatible. Use the dependency map to mark those
+ // ones as index incompatible too.
+ schema_delta.schema_types_index_incompatible.insert(
+ old_type_config.schema_type());
+ auto parent_types_itr =
+ new_schema_dependency_map.find(old_type_config.schema_type());
+ if (parent_types_itr != new_schema_dependency_map.end()) {
+ schema_delta.schema_types_index_incompatible.reserve(
+ schema_delta.schema_types_index_incompatible.size() +
+ parent_types_itr->second.size());
+ schema_delta.schema_types_index_incompatible.insert(
+ parent_types_itr->second.begin(), parent_types_itr->second.end());
+ }
}
+
+ if (!is_incompatible && !is_index_incompatible && has_property_changed) {
+ schema_delta.schema_types_changed_fully_compatible.insert(
+ old_type_config.schema_type());
+ }
+
+ // Lastly, remove this type from the map. We know that this type can't
+ // come up in future iterations through the old schema types because the old
+ // type config has unique types.
+ new_type_config_map.erase(old_type_config.schema_type());
+ }
+
+ // Any types that are still present in the new_type_config_map are newly added
+ // types.
+ schema_delta.schema_types_new.reserve(new_type_config_map.size());
+ for (auto& kvp : new_type_config_map) {
+ schema_delta.schema_types_new.insert(std::move(kvp.first));
}
return schema_delta;
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index 7b989a8..fa80b15 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -22,6 +22,7 @@
#include <unordered_set>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/schema.pb.h"
namespace icing {
@@ -32,13 +33,14 @@ class SchemaUtil {
using TypeConfigMap =
std::unordered_map<std::string, const SchemaTypeConfigProto>;
- struct SchemaDelta {
- // Whether an indexing config has changed, requiring the index to be
- // regenerated. We don't list out all the types that make the index
- // incompatible because our index isn't optimized for that. It's much easier
- // to reset the entire index and reindex every document.
- bool index_incompatible = false;
+ // Maps from a child type to the parent types that depend on it.
+ // Ex. type A has a single property of type B
+ // The dependency map will be { { "B", { "A" } } }
+ using DependencyMap =
+ std::unordered_map<std::string_view,
+ std::unordered_set<std::string_view>>;
+ struct SchemaDelta {
// Which schema types were present in the old schema, but were deleted from
// the new schema.
std::unordered_set<std::string> schema_types_deleted;
@@ -47,10 +49,28 @@ class SchemaUtil {
// could invalidate existing Documents of that schema type.
std::unordered_set<std::string> schema_types_incompatible;
+ // Schema types that were added in the new schema. Represented by the
+ // `schema_type` field in the SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_new;
+
+ // Schema types that were changed in a way that was backwards compatible and
+ // didn't invalidate the index. Represented by the `schema_type` field in
+ // the SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_changed_fully_compatible;
+
+ // Schema types that were changed in a way that was backwards compatible,
+ // but invalidated the index. Represented by the `schema_type` field in the
+ // SchemaTypeConfigProto.
+ std::unordered_set<std::string> schema_types_index_incompatible;
+
bool operator==(const SchemaDelta& other) const {
- return index_incompatible == other.index_incompatible &&
- schema_types_deleted == other.schema_types_deleted &&
- schema_types_incompatible == other.schema_types_incompatible;
+ return schema_types_deleted == other.schema_types_deleted &&
+ schema_types_incompatible == other.schema_types_incompatible &&
+ schema_types_new == other.schema_types_new &&
+ schema_types_changed_fully_compatible ==
+ other.schema_types_changed_fully_compatible &&
+ schema_types_index_incompatible ==
+ other.schema_types_index_incompatible;
}
};
@@ -90,10 +110,12 @@ class SchemaUtil {
// document properties can be opted out of indexing.
//
// Returns:
+ // On success, a dependency map from each child types to all parent types
+ // that depend on it directly or indirectly.
// ALREADY_EXISTS for case 1 and 2
// INVALID_ARGUMENT for 3-13
- // OK otherwise
- static libtextclassifier3::Status Validate(const SchemaProto& schema);
+ static libtextclassifier3::StatusOr<DependencyMap> Validate(
+ const SchemaProto& schema);
// Creates a mapping of schema type -> schema type config proto. The
// type_config_map is cleared, and then each schema-type_config_proto pair is
@@ -142,7 +164,8 @@ class SchemaUtil {
//
// Returns a SchemaDelta that captures the aforementioned differences.
static const SchemaDelta ComputeCompatibilityDelta(
- const SchemaProto& old_schema, const SchemaProto& new_schema);
+ const SchemaProto& old_schema, const SchemaProto& new_schema,
+ const DependencyMap& new_schema_dependency_map);
// Validates the 'property_name' field.
// 1. Can't be an empty string
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index a2fc8d9..26ef4c7 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -17,6 +17,7 @@
#include <cstdint>
#include <string>
#include <string_view>
+#include <unordered_set>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -34,6 +35,7 @@ using ::testing::HasSubstr;
// Properties/fields in a schema type
constexpr char kEmailType[] = "EmailMessage";
+constexpr char kMessageType[] = "Text";
constexpr char kPersonType[] = "Person";
constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT =
@@ -63,6 +65,367 @@ constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN;
constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
+ // Create a schema with the following dependencies:
+ // C
+ // / \
+ // A - B E - F
+ // \ /
+ // D
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ // Provide these in alphabetical (also parent-child) order: A, B, C, D, E, F
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_c)
+ .AddType(type_d)
+ .AddType(type_e)
+ .AddType(type_f)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, testing::SizeIs(5));
+ EXPECT_THAT(d_map["F"],
+ testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+ EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+ EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
+
+TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) {
+ // Create a schema with the following dependencies:
+ // C
+ // / \
+ // A - B E - F
+ // \ /
+ // D
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ // Provide these in reverse alphabetical (also child-parent) order:
+ // F, E, D, C, B, A
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_f)
+ .AddType(type_e)
+ .AddType(type_d)
+ .AddType(type_c)
+ .AddType(type_b)
+ .AddType(type_a)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, testing::SizeIs(5));
+ EXPECT_THAT(d_map["F"],
+ testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+ EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+ EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
+
+TEST(SchemaUtilTest, DependencyGraphMixedOrder) {
+ // Create a schema with the following dependencies:
+ // C
+ // / \
+ // A - B E - F
+ // \ /
+ // D
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("d")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("D", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_d =
+ SchemaTypeConfigBuilder()
+ .SetType("D")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("e")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("E", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_e =
+ SchemaTypeConfigBuilder()
+ .SetType("E")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("f")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("F", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_f =
+ SchemaTypeConfigBuilder()
+ .SetType("F")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("text")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN))
+ .Build();
+
+ // Provide these in a random order: C, E, F, A, B, D
+ SchemaProto schema = SchemaBuilder()
+ .AddType(type_c)
+ .AddType(type_e)
+ .AddType(type_f)
+ .AddType(type_a)
+ .AddType(type_b)
+ .AddType(type_d)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map,
+ SchemaUtil::Validate(schema));
+ EXPECT_THAT(d_map, testing::SizeIs(5));
+ EXPECT_THAT(d_map["F"],
+ testing::UnorderedElementsAre("A", "B", "C", "D", "E"));
+ EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D"));
+ EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B"));
+ EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A"));
+}
+
+TEST(SchemaUtilTest, TopLevelCycle) {
+ // Create a schema with the following dependencies:
+ // A - B - B - B - B....
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST(SchemaUtilTest, MultiLevelCycle) {
+ // Create a schema with the following dependencies:
+ // A - B - C - A - B - C - A ...
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("A", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(SchemaUtilTest, NonExistentType) {
+ // Create a schema with the following dependencies:
+ // A - B - C - X (does not exist)
+ SchemaTypeConfigProto type_a =
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("b")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("B", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_b =
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("c")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("C", /*index_nested_properties=*/true))
+ .Build();
+ SchemaTypeConfigProto type_c =
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("x")
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .SetDataTypeDocument("X", /*index_nested_properties=*/true))
+ .Build();
+
+ SchemaProto schema =
+ SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build();
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
SchemaProto schema;
ICING_ASSERT_OK(SchemaUtil::Validate(schema));
@@ -309,7 +672,7 @@ TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
.Build();
ASSERT_THAT(SchemaUtil::Validate(schema),
- StatusIs(libtextclassifier3::StatusCode::UNKNOWN,
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("Undefined 'schema_type'")));
}
@@ -342,8 +705,10 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
.Build();
SchemaUtil::SchemaDelta schema_delta;
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
- new_schema_with_optional),
+ schema_delta.schema_types_changed_fully_compatible.insert(kEmailType);
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema_with_optional, no_dependencies_map),
Eq(schema_delta));
}
@@ -377,8 +742,9 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
- new_schema_with_required),
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema_with_required, no_dependencies_map),
Eq(schema_delta));
}
@@ -412,7 +778,9 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependencies_map),
Eq(schema_delta));
}
@@ -442,16 +810,19 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// We can't have a new schema be more restrictive, REPEATED->OPTIONAL
SchemaUtil::SchemaDelta incompatible_schema_delta;
incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
+ SchemaUtil::DependencyMap no_dependencies_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/less_restrictive_schema,
- /*new_schema=*/more_restrictive_schema),
+ /*new_schema=*/more_restrictive_schema, no_dependencies_map),
Eq(incompatible_schema_delta));
// We can have the new schema be less restrictive, OPTIONAL->REPEATED;
SchemaUtil::SchemaDelta compatible_schema_delta;
+ compatible_schema_delta.schema_types_changed_fully_compatible.insert(
+ kEmailType);
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/more_restrictive_schema,
- /*new_schema=*/less_restrictive_schema),
+ /*new_schema=*/less_restrictive_schema, no_dependencies_map),
Eq(compatible_schema_delta));
}
@@ -480,7 +851,9 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependencies_map),
Eq(schema_delta));
}
@@ -495,6 +868,12 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
.SetDataType(TYPE_INT)
.SetCardinality(CARDINALITY_REPEATED)))
.AddType(SchemaTypeConfigBuilder()
+ .SetType(kMessageType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
.SetType(kEmailType)
.AddProperty(PropertyConfigBuilder()
.SetName("Property")
@@ -514,19 +893,31 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
.SetDataType(TYPE_INT)
.SetCardinality(CARDINALITY_REPEATED)))
.AddType(SchemaTypeConfigBuilder()
+ .SetType(kMessageType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
.SetType(kEmailType)
- .AddProperty(
- PropertyConfigBuilder()
- .SetName("Property")
- .SetDataTypeDocument(
- kEmailType, /*index_nested_properties=*/true)
- .SetCardinality(CARDINALITY_REPEATED)))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeDocument(
+ kMessageType,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
.Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
- Eq(schema_delta));
+ // kEmailType depends on kMessageType
+ SchemaUtil::DependencyMap dependencies_map = {{kMessageType, {kEmailType}}};
+ SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema, dependencies_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+ EXPECT_THAT(actual.schema_types_incompatible,
+ testing::ElementsAre(kEmailType));
+ EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty());
}
TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
@@ -555,16 +946,19 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
.Build();
SchemaUtil::SchemaDelta schema_delta;
- schema_delta.index_incompatible = true;
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
// New schema gained a new indexed property.
+ SchemaUtil::DependencyMap no_dependencies_map;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- schema_with_indexed_property, schema_with_unindexed_property),
+ schema_with_indexed_property, schema_with_unindexed_property,
+ no_dependencies_map),
Eq(schema_delta));
// New schema lost an indexed property.
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
- schema_with_indexed_property, schema_with_unindexed_property),
+ schema_with_indexed_property, schema_with_unindexed_property,
+ no_dependencies_map),
Eq(schema_delta));
}
@@ -599,8 +993,10 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
.Build();
SchemaUtil::SchemaDelta schema_delta;
- schema_delta.index_incompatible = true;
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ schema_delta.schema_types_index_incompatible.insert(kPersonType);
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependencies_map),
Eq(schema_delta));
}
@@ -637,7 +1033,10 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) {
.Build();
SchemaUtil::SchemaDelta schema_delta;
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ schema_delta.schema_types_new.insert(kEmailType);
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependencies_map),
Eq(schema_delta));
}
@@ -676,10 +1075,105 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) {
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_deleted.emplace(kPersonType);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ SchemaUtil::DependencyMap no_dependencies_map;
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema,
+ no_dependencies_map),
Eq(schema_delta));
}
+TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) {
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property2")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+
+ // Remove Property2 and make Property1 indexed now. Removing Property2 should
+ // be incompatible.
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property1")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_incompatible.emplace(kEmailType);
+ schema_delta.schema_types_index_incompatible.emplace(kEmailType);
+ SchemaUtil::DependencyMap no_dependencies_map;
+ SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+ old_schema, new_schema, no_dependencies_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
+TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) {
+ // Make two schemas. One that sets index_nested_properties to false and one
+ // that sets it to true.
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto no_nested_index_schema =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType,
+ /*index_nested_properties=*/false)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ SchemaProto nested_index_schema =
+ SchemaBuilder()
+ .AddType(email_type_config)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emails")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+
+ // Going from index_nested_properties=false to index_nested_properties=true
+ // should make kPersonType index_incompatible. kEmailType should be
+ // unaffected.
+ SchemaUtil::SchemaDelta schema_delta;
+ schema_delta.schema_types_index_incompatible.emplace(kPersonType);
+ SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}};
+ SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta(
+ no_nested_index_schema, nested_index_schema, dependencies_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+
+ // Going from index_nested_properties=true to index_nested_properties=false
+ // should also make kPersonType index_incompatible. kEmailType should be
+ // unaffected.
+ actual = SchemaUtil::ComputeCompatibilityDelta(
+ nested_index_schema, no_nested_index_schema, dependencies_map);
+ EXPECT_THAT(actual, Eq(schema_delta));
+}
+
TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
SchemaProto schema =
SchemaBuilder()
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
new file mode 100644
index 0000000..5e0426e
--- /dev/null
+++ b/icing/store/document-log-creator.cc
@@ -0,0 +1,196 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-log-creator.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "icing/text_classifier/lib3/utils/base/logging.h"
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr char kDocumentLogFilename[] = "document_log";
+
+std::string DocumentLogFilenameV0() {
+ // Originally only had this one version, no suffix.
+ return kDocumentLogFilename;
+}
+
+std::string DocumentLogFilenameV1() {
+ return absl_ports::StrCat(kDocumentLogFilename, "_v1");
+}
+
+std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
+}
+
+std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
+}
+
+} // namespace
+
+std::string DocumentLogCreator::GetDocumentLogFilename() {
+ // This should always return the latest version of the document log in use.
+ // The current latest version is V1.
+ return DocumentLogFilenameV1();
+}
+
+libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
+DocumentLogCreator::Create(const Filesystem* filesystem,
+ const std::string& base_dir) {
+ bool v0_exists =
+ filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
+ bool v1_exists =
+ filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
+
+ bool regen_derived_files = false;
+ if (v0_exists && !v1_exists) {
+ ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
+
+ // Need to regenerate derived files since documents may be written to a
+ // different file offset in the log.
+ regen_derived_files = true;
+ } else if (!v1_exists) {
+ // First time initializing a v1 log. There are no existing derived files at
+ // this point, so we should generate some. "regenerate" here also means
+ // "generate for the first time", i.e. we shouldn't expect there to be any
+ // existing derived files.
+ regen_derived_files = true;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
+ log_create_result,
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV1(base_dir),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+
+ CreateResult create_result = {std::move(log_create_result),
+ regen_derived_files};
+ return create_result;
+}
+
+libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
+ const Filesystem* filesystem, const std::string& base_dir) {
+ ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
+
+ // Our v0 proto log was non-portable, create it so we can read protos out from
+ // it.
+ auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV0(base_dir),
+ FileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true));
+ if (!v0_create_result_or.ok()) {
+ return absl_ports::Annotate(
+ v0_create_result_or.status(),
+ "Failed to initialize v0 document log while migrating.");
+ return v0_create_result_or.status();
+ }
+ FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
+ std::move(v0_create_result_or).ValueOrDie();
+ std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
+ std::move(v0_create_result.proto_log);
+
+ // Create a v1 portable proto log that we will write our protos to.
+ auto v1_create_result_or =
+ PortableFileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem, MakeDocumentLogFilenameV1(base_dir),
+ PortableFileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true));
+ if (!v1_create_result_or.ok()) {
+ return absl_ports::Annotate(
+ v1_create_result_or.status(),
+ "Failed to initialize v1 document log while migrating.");
+ }
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
+ std::move(v1_create_result_or).ValueOrDie();
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
+ std::move(v1_create_result.proto_log);
+
+ // Dummy empty document to be used when copying over deleted documents.
+ DocumentProto empty_document;
+
+ // Start reading out from the old log and putting them in the new log.
+ auto iterator = v0_proto_log->GetIterator();
+ auto iterator_status = iterator.Advance();
+ while (iterator_status.ok()) {
+ libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
+ v0_proto_log->ReadProto(iterator.GetOffset());
+
+ bool deleted_document = false;
+ DocumentWrapper document_wrapper;
+ if (absl_ports::IsNotFound(document_wrapper_or.status())) {
+ // Proto was erased, we can skip copying this into our new log.
+ *document_wrapper.mutable_document() = empty_document;
+ deleted_document = true;
+ } else if (!document_wrapper_or.ok()) {
+ // Some real error, pass up
+ return document_wrapper_or.status();
+ } else {
+ document_wrapper = std::move(document_wrapper_or).ValueOrDie();
+ }
+
+ auto offset_or = v1_proto_log->WriteProto(document_wrapper);
+ if (!offset_or.ok()) {
+ return absl_ports::Annotate(
+ offset_or.status(),
+ "Failed to write proto to v1 document log while migrating.");
+ }
+
+ // If the original document was deleted, erase the proto we just wrote.
+ // We do this to maintain the document_ids, i.e. we still want document_id 2
+ // to point to a deleted document even though we may not have the document
+ // contents anymore. DocumentStore guarantees that the document_ids don't
+ // change unless an Optimize is triggered.
+ if (deleted_document) {
+ int64_t offset = offset_or.ValueOrDie();
+ auto erased_status = v1_proto_log->EraseProto(offset);
+ if (!erased_status.ok()) {
+ return absl_ports::Annotate(
+ erased_status,
+ "Failed to erase proto in v1 document log while migrating.");
+ }
+ }
+
+ iterator_status = iterator.Advance();
+ }
+
+ // Close out our file log pointers.
+ v0_proto_log.reset();
+ v1_proto_log.reset();
+
+ return libtextclassifier3::Status::OK;
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
new file mode 100644
index 0000000..51cf497
--- /dev/null
+++ b/icing/store/document-log-creator.h
@@ -0,0 +1,77 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+#define ICING_STORE_DOCUMENT_LOG_CREATOR_H_
+
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/document_wrapper.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Handles creation of the document log and any underlying migrations that may
+// be necessary.
+class DocumentLogCreator {
+ public:
+ struct CreateResult {
+ // The create result passed up from the PortableFileBackedProtoLog::Create.
+ // Contains the document log.
+ PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
+
+ // Whether the caller needs to also regenerate/generate any derived files
+ // based off of the initialized document log.
+ bool regen_derived_files;
+ };
+
+ // Creates the document log in the base_dir. Will create one if it doesn't
+ // already exist.
+ //
+ // This also handles any potential migrations from old document log versions.
+ // At the end of this call, the most up-to-date log will be returned and will
+ // be usable.
+ //
+ // Returns:
+ // CreateResult on success.
+ // INTERNAL on any I/O error.
+ static libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult> Create(
+ const Filesystem* filesystem, const std::string& base_dir);
+
+ // Returns the filename of the document log, without any directory prefixes.
+ // Used mainly for testing purposes.
+ static std::string GetDocumentLogFilename();
+
+ private:
+ // Handles migrating a v0 document log (not portable) to a v1 document log
+ // (portable). This will initialize the log in the beginning, and close it
+ // when migration is done. Callers will need to reinitialize the log on their
+ // own.
+ //
+ // Returns:
+ // OK on success.
+ // INVALID_ARGUMENT if some invalid option was passed to the document log.
+ // INTERNAL on I/O error.
+ static libtextclassifier3::Status MigrateFromV0ToV1(
+ const Filesystem* filesystem, const std::string& base_dir);
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_DOCUMENT_LOG_CREATOR_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 4e63b90..fc797b4 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -33,6 +33,7 @@
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
@@ -44,6 +45,7 @@
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
#include "icing/store/usage-store.h"
@@ -93,10 +95,6 @@ std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
}
-std::string MakeDocumentLogFilename(const std::string& base_dir) {
- return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
-}
-
std::string MakeScoreCacheFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
}
@@ -224,30 +222,36 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
- auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
- filesystem_, MakeDocumentLogFilename(base_dir_),
- FileBackedProtoLog<DocumentWrapper>::Options(
- /*compress_in=*/true));
+ auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_);
+
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
if (!create_result_or.ok()) {
ICING_LOG(ERROR) << create_result_or.status().error_message()
- << "\nFailed to initialize DocumentLog";
+ << "\nFailed to initialize DocumentLog.";
return create_result_or.status();
}
- FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+ DocumentLogCreator::CreateResult create_result =
std::move(create_result_or).ValueOrDie();
- document_log_ = std::move(create_result.proto_log);
- if (force_recovery_and_revalidate_documents ||
- create_result.has_data_loss()) {
- if (create_result.has_data_loss() && initialize_stats != nullptr) {
+ document_log_ = std::move(create_result.log_create_result.proto_log);
+
+ if (create_result.regen_derived_files ||
+ force_recovery_and_revalidate_documents ||
+ create_result.log_create_result.has_data_loss()) {
+ // We can't rely on any existing derived files. Recreate them from scratch.
+ // Currently happens if:
+ // 1) This is a new log and we don't have derived files yet
+ // 2) Client wanted us to force a regeneration.
+ // 3) Log has some data loss, can't rely on existing derived data.
+ if (create_result.log_create_result.has_data_loss() &&
+ initialize_stats != nullptr) {
ICING_LOG(WARNING)
<< "Data loss in document log, regenerating derived files.";
initialize_stats->set_document_store_recovery_cause(
InitializeStatsProto::DATA_LOSS);
- if (create_result.data_loss == DataLoss::PARTIAL) {
+ if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
InitializeStatsProto::PARTIAL_LOSS);
@@ -257,10 +261,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
InitializeStatsProto::COMPLETE_LOSS);
}
}
+
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status =
RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
- if (initialize_stats != nullptr) {
+ if (initialize_stats != nullptr &&
+ (force_recovery_and_revalidate_documents ||
+ create_result.log_create_result.has_data_loss())) {
+ // Only consider it a recovery if the client forced a recovery or there
+ // was data loss. Otherwise, this could just be the first time we're
+ // initializing and generating derived files.
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
}
@@ -270,7 +280,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
return status;
}
} else {
- if (!InitializeDerivedFiles().ok()) {
+ if (!InitializeExistingDerivedFiles().ok()) {
ICING_VLOG(1)
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
@@ -296,10 +306,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
initialize_stats->set_num_documents(document_id_mapper_->num_elements());
}
- return create_result.data_loss;
+ return create_result.log_create_result.data_loss;
}
-libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
if (!HeaderExists()) {
// Without a header, we don't know if things are consistent between each
// other so the caller should just regenerate everything from ground
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b0cd1ce..79d99d4 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -26,6 +26,7 @@
#include "icing/file/file-backed-proto-log.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
@@ -438,7 +439,7 @@ class DocumentStore {
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
- std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+ std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
// Key (namespace + uri) to DocumentId mapping
std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
@@ -495,11 +496,35 @@ class DocumentStore {
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
+ // Initializes a new DocumentStore and sets up any underlying files.
+ //
+ // Returns:
+ // Data loss status on success, effectively always DataLoss::NONE
+ // INTERNAL on I/O error
+ libtextclassifier3::StatusOr<DataLoss> InitializeNewStore(
+ InitializeStatsProto* initialize_stats);
+
+ // Initializes a DocumentStore over an existing directory of files.
+ //
+ // stats will be set if non-null
+ //
+ // Returns:
+ // Data loss status on success
+ // INTERNAL on I/O error
+ libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore(
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats);
+
+ libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1(
+ InitializeStatsProto* initialize_stats);
+
// Creates sub-components and verifies the integrity of each sub-component.
+ // This assumes that the the underlying files already exist, and will return
+ // an error if it doesn't find what it's expecting.
//
// Returns an error if subcomponents failed to initialize successfully.
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status InitializeDerivedFiles();
+ libtextclassifier3::Status InitializeExistingDerivedFiles();
// Re-generates all files derived from the ground truth: the document log.
//
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index f68e115..ce608fc 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -168,6 +168,93 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
}
BENCHMARK(BM_DoesDocumentExistBenchmark);
+void BM_Put(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+
+ for (auto s : state) {
+ // It's ok that this is the same document over and over. We'll create a new
+ // document_id for it and still insert the proto into the underlying log.
+ benchmark::DoNotOptimize(document_store->Put(document));
+ }
+}
+BENCHMARK(BM_Put);
+
+void BM_GetSameDocument(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK(document_store->Put(CreateDocument("namespace", "uri")));
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(document_store->Get("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_GetSameDocument);
+
+void BM_Delete(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+
+ for (auto s : state) {
+ state.PauseTiming();
+ ICING_ASSERT_OK(document_store->Put(document));
+ state.ResumeTiming();
+
+ benchmark::DoNotOptimize(document_store->Delete("namespace", "uri"));
+ }
+}
+BENCHMARK(BM_Delete);
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index ad3b7c4..b1a6357 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -15,6 +15,7 @@
#include "icing/store/document-store.h"
#include <cstdint>
+#include <filesystem>
#include <limits>
#include <memory>
#include <string>
@@ -40,6 +41,7 @@
#include "icing/store/corpus-id.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-log-creator.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -105,6 +107,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
return usage_report;
}
+PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path) {
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header;
+ filesystem.PRead(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
+ /*offset=*/0);
+ return header;
+}
+
+void WriteDocumentLogHeader(
+ Filesystem filesystem, const std::string& file_path,
+ PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
+ filesystem.Write(file_path.c_str(), &header,
+ sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
+}
+
class DocumentStoreTest : public ::testing::Test {
protected:
DocumentStoreTest()
@@ -452,14 +470,18 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(
document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -538,13 +560,17 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -607,7 +633,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -621,7 +649,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -730,13 +760,17 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
@@ -809,7 +843,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -823,7 +859,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
@@ -901,7 +939,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
} // Destructors should update checksum and persist all data to file.
CorruptDocStoreHeaderChecksumFile();
@@ -923,7 +963,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str());
EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
@@ -968,7 +1010,9 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK(doc_store->Put(document2));
ICING_ASSERT_OK(doc_store->Put(document3));
- std::string original_document_log = document_store_dir_ + "/document_log";
+ std::string original_document_log = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
int64_t original_size =
filesystem_.GetFileSize(original_document_log.c_str());
@@ -979,7 +1023,8 @@ TEST_F(DocumentStoreTest, OptimizeInto) {
HasSubstr("directory is the same")));
std::string optimized_dir = document_store_dir_ + "_optimize";
- std::string optimized_document_log = optimized_dir + "/document_log";
+ std::string optimized_document_log =
+ optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
// Validates that the optimized document log has the same size if nothing is
// deleted
@@ -1067,8 +1112,8 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -2919,8 +2964,8 @@ TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
const std::string serialized_document = document.SerializeAsString();
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -3043,7 +3088,9 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
const std::string serialized_document = document.SerializeAsString();
const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ absl_ports::StrCat(document_store_dir_, "/",
+ DocumentLogCreator::GetDocumentLogFilename())
+ .c_str();
int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
filesystem_.PWrite(document_log_file.c_str(), file_size,
serialized_document.data(), serialized_document.size());
@@ -3060,8 +3107,8 @@ TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
int64_t corruptible_offset;
- const std::string document_log_file =
- absl_ports::StrCat(document_store_dir_, "/document_log");
+ const std::string document_log_file = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3088,8 +3135,30 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// "Corrupt" the persisted content written in the log. We can't recover if
// the persisted data was corrupted.
std::string corruption = "abc";
- filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset,
- corruption.data(), corruption.size());
+ filesystem_.PWrite(document_log_file.c_str(),
+ /*offset=*/corruptible_offset, corruption.data(),
+ corruption.size());
+
+ {
+ // "Corrupt" the content written in the log. Make the corrupt document
+ // smaller than our original one so we don't accidentally write past our
+ // file.
+ DocumentProto document =
+ DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
+ std::string serialized_document = document.SerializeAsString();
+ ASSERT_TRUE(filesystem_.PWrite(
+ document_log_file.c_str(), corruptible_offset,
+ serialized_document.data(), serialized_document.size()));
+
+ PortableFileBackedProtoLog<DocumentWrapper>::Header header =
+ ReadDocumentLogHeader(filesystem_, document_log_file);
+
+ // Set dirty bit to true to reflect that something changed in the log.
+ header.SetDirtyFlag(true);
+ header.SetHeaderChecksum(header.CalculateHeaderChecksum());
+
+ WriteDocumentLogHeader(filesystem_, document_log_file, header);
+ }
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3106,8 +3175,8 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
// the document store header.
//
// This causes a problem now because this cl changes behavior to not consider an
-// InitializeDerivedFiles failure to be a recovery if there is nothing to
-// recover because the doocument store is empty.
+// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
+// to recover because the doocument store is empty.
#define DISABLE_BACKWARDS_COMPAT_TEST
#ifndef DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
@@ -3667,6 +3736,126 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
}
}
+TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
+ // Set up schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+
+ std::string schema_store_dir = schema_store_dir_ + "_migrate";
+ filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ // Create dst directory that we'll initialize the DocumentStore over.
+ std::string document_store_dir = document_store_dir_ + "_migrate";
+ ASSERT_THAT(
+ filesystem_.DeleteDirectoryRecursively(document_store_dir.c_str()), true);
+ ASSERT_THAT(
+ filesystem_.CreateDirectoryRecursively(document_store_dir.c_str()), true);
+
+ // Copy the testdata files into our DocumentStore directory
+ std::string document_store_without_portable_log;
+ if (IsAndroidX86()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_x86/document_dir");
+ } else if (IsAndroidArm()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_android_arm/document_dir");
+ } else if (IsIosPlatform()) {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_ios/document_dir");
+ } else {
+ document_store_without_portable_log = GetTestFilePath(
+ "icing/testdata/not_portable_log/"
+ "icing_search_engine_linux/document_dir");
+ }
+
+ ASSERT_TRUE(filesystem_.CopyDirectory(
+ document_store_without_portable_log.c_str(), document_store_dir.c_str(),
+ /*recursive=*/true));
+
+ // Initialize the DocumentStore over our copied files.
+ InitializeStatsProto initialize_stats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
+ schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ &initialize_stats));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ // These are the documents that are stored in the testdata files. Do not
+ // change unless you're also updating the testdata files.
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .AddStringProperty("subject", "foo")
+ .AddStringProperty("body", "bar")
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(20)
+ .SetScore(321)
+ .AddStringProperty("body", "baz bat")
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(30)
+ .SetScore(123)
+ .AddStringProperty("subject", "phoo")
+ .Build();
+
+ // Check that we didn't lose anything. A migration also doesn't technically
+ // count as a recovery.
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
+
+ // Document 1 and 3 were put normally, and document 2 was deleted in our
+ // testdata files.
+ //
+ // Check by namespace, uri
+ EXPECT_THAT(document_store->Get(document1.namespace_(), document1.uri()),
+ IsOkAndHolds(EqualsProto(document1)));
+ EXPECT_THAT(document_store->Get(document2.namespace_(), document2.uri()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(document_store->Get(document3.namespace_(), document3.uri()),
+ IsOkAndHolds(EqualsProto(document3)));
+
+ // Check by document_id
+ EXPECT_THAT(document_store->Get(/*document_id=*/0),
+ IsOkAndHolds(EqualsProto(document1)));
+ EXPECT_THAT(document_store->Get(/*document_id=*/1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(document_store->Get(/*document_id=*/2),
+ IsOkAndHolds(EqualsProto(document3)));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..67a5bbd
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..70ec244
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..6177f48
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/lite.lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/lite.lexicon.h
new file mode 100644
index 0000000..624444e
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/lite.lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/main/main-lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/main/main-lexicon.h
new file mode 100644
index 0000000..82d6712
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/index_dir/idx/main/main-lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_arm/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_arm/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..d1b1cc1
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_arm/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..67a5bbd
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..70ec244
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..6177f48
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/lite.lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/lite.lexicon.h
new file mode 100644
index 0000000..624444e
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/lite.lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/main/main-lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/main/main-lexicon.h
new file mode 100644
index 0000000..82d6712
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/index_dir/idx/main/main-lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_android_x86/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_android_x86/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..d1b1cc1
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_android_x86/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..67a5bbd
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..70ec244
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..6177f48
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/lite.lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/lite.lexicon.h
new file mode 100644
index 0000000..624444e
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/lite.lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/main/main-lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/main/main-lexicon.h
new file mode 100644
index 0000000..82d6712
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/index_dir/idx/main/main-lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_ios/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_ios/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..d1b1cc1
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_ios/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..67a5bbd
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/corpus_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..70ec244
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..6177f48
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/document_dir/namespace_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/lite.lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/lite.lexicon.h
new file mode 100644
index 0000000..c95dba8
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/lite.lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/main/main-lexicon.h b/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/main/main-lexicon.h
new file mode 100644
index 0000000..6b87b9c
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/index_dir/idx/main/main-lexicon.h
Binary files differ
diff --git a/icing/testdata/not_portable_log/icing_search_engine_linux/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h b/icing/testdata/not_portable_log/icing_search_engine_linux/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
new file mode 100644
index 0000000..d1b1cc1
--- /dev/null
+++ b/icing/testdata/not_portable_log/icing_search_engine_linux/schema_dir/schema_type_mapper/key_mapper_dir/key_mapper.h
Binary files differ
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 8d8bdf2..f83fe0a 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -121,7 +121,6 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
const SchemaStore::SetSchemaResult& actual = arg;
if (actual.success == expected.success &&
- actual.index_incompatible == expected.index_incompatible &&
actual.old_schema_type_ids_changed ==
expected.old_schema_type_ids_changed &&
actual.schema_types_deleted_by_name ==
@@ -131,7 +130,12 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
actual.schema_types_incompatible_by_name ==
expected.schema_types_incompatible_by_name &&
actual.schema_types_incompatible_by_id ==
- expected.schema_types_incompatible_by_id) {
+ expected.schema_types_incompatible_by_id &&
+ actual.schema_types_new_by_name == expected.schema_types_new_by_name &&
+ actual.schema_types_changed_fully_compatible_by_name ==
+ expected.schema_types_changed_fully_compatible_by_name &&
+ actual.schema_types_index_incompatible_by_name ==
+ expected.schema_types_index_incompatible_by_name) {
return true;
}
@@ -191,37 +195,82 @@ MATCHER_P(EqualsSetSchemaResult, expected, "") {
absl_ports::NumberFormatter()),
"]");
+ // Format schema_types_new_by_name
+ std::string actual_schema_types_new_by_name = absl_ports::StrCat(
+ "[", absl_ports::StrJoin(actual.schema_types_new_by_name, ","), "]");
+
+ std::string expected_schema_types_new_by_name = absl_ports::StrCat(
+ "[", absl_ports::StrJoin(expected.schema_types_new_by_name, ","), "]");
+
+ // Format schema_types_changed_fully_compatible_by_name
+ std::string actual_schema_types_changed_fully_compatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(
+ actual.schema_types_changed_fully_compatible_by_name, ","),
+ "]");
+
+ std::string expected_schema_types_changed_fully_compatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(
+ expected.schema_types_changed_fully_compatible_by_name, ","),
+ "]");
+
+ // Format schema_types_deleted_by_id
+ std::string actual_schema_types_index_incompatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(actual.schema_types_index_incompatible_by_name,
+ ","),
+ "]");
+
+ std::string expected_schema_types_index_incompatible_by_name =
+ absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(expected.schema_types_index_incompatible_by_name,
+ ","),
+ "]");
+
*result_listener << IcingStringUtil::StringPrintf(
"\nExpected {\n"
"\tsuccess=%d,\n"
- "\tindex_incompatible=%d,\n"
"\told_schema_type_ids_changed=%s,\n"
"\tschema_types_deleted_by_name=%s,\n"
"\tschema_types_deleted_by_id=%s,\n"
"\tschema_types_incompatible_by_name=%s,\n"
"\tschema_types_incompatible_by_id=%s\n"
+ "\tschema_types_new_by_name=%s,\n"
+ "\tschema_types_index_incompatible_by_name=%s,\n"
+ "\tschema_types_changed_fully_compatible_by_name=%s\n"
"}\n"
"Actual {\n"
"\tsuccess=%d,\n"
- "\tindex_incompatible=%d,\n"
"\told_schema_type_ids_changed=%s,\n"
"\tschema_types_deleted_by_name=%s,\n"
"\tschema_types_deleted_by_id=%s,\n"
"\tschema_types_incompatible_by_name=%s,\n"
"\tschema_types_incompatible_by_id=%s\n"
+ "\tschema_types_new_by_name=%s,\n"
+ "\tschema_types_index_incompatible_by_name=%s,\n"
+ "\tschema_types_changed_fully_compatible_by_name=%s\n"
"}\n",
- expected.success, expected.index_incompatible,
- expected_old_schema_type_ids_changed.c_str(),
+ expected.success, expected_old_schema_type_ids_changed.c_str(),
expected_schema_types_deleted_by_name.c_str(),
expected_schema_types_deleted_by_id.c_str(),
expected_schema_types_incompatible_by_name.c_str(),
- expected_schema_types_incompatible_by_id.c_str(), actual.success,
- actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(),
+ expected_schema_types_incompatible_by_id.c_str(),
+ expected_schema_types_new_by_name.c_str(),
+ expected_schema_types_changed_fully_compatible_by_name.c_str(),
+ expected_schema_types_index_incompatible_by_name.c_str(), actual.success,
+ actual_old_schema_type_ids_changed.c_str(),
actual_schema_types_deleted_by_name.c_str(),
actual_schema_types_deleted_by_id.c_str(),
actual_schema_types_incompatible_by_name.c_str(),
- actual_schema_types_incompatible_by_id.c_str());
-
+ actual_schema_types_incompatible_by_id.c_str(),
+ actual_schema_types_new_by_name.c_str(),
+ actual_schema_types_changed_fully_compatible_by_name.c_str(),
+ actual_schema_types_index_incompatible_by_name.c_str());
return false;
}
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index ae2a372..76219b5 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -99,11 +99,13 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
return text_.substr(term_start_.utf8_index(), term_length);
}
- libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart() {
+ libtextclassifier3::StatusOr<CharacterIterator> CalculateTermStart()
+ override {
return term_start_;
}
- libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive() {
+ libtextclassifier3::StatusOr<CharacterIterator> CalculateTermEndExclusive()
+ override {
return term_end_exclusive_;
}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 2019033..0cee80c 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -496,12 +496,13 @@ public final class IcingSearchEngineTest {
assertStatusOk(
icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
- // String: "我每天走路去上班。"
- // ^ ^ ^ ^^
- // UTF16 idx: 0 1 3 5 6
- // Breaks into segments: "我", "每天", "走路", "去", "上班"
- String chinese = "我每天走路去上班。";
- assertThat(chinese.length()).isEqualTo(9);
+ // String: "天是蓝的"
+ // ^ ^^ ^
+ // UTF16 idx: 0 1 2 3
+ // Breaks into segments: "天", "是", "蓝", "的"
+ // "The sky is blue"
+ String chinese = "天是蓝的";
+ assertThat(chinese.length()).isEqualTo(4);
DocumentProto emailDocument1 =
createEmailDocument("namespace", "uri1").toBuilder()
.addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese))
@@ -511,7 +512,7 @@ public final class IcingSearchEngineTest {
// Search and request snippet matching but no windowing.
SearchSpecProto searchSpec =
SearchSpecProto.newBuilder()
- .setQuery("每")
+ .setQuery("是")
.setTermMatchType(TermMatchType.Code.PREFIX)
.build();
ResultSpecProto resultSpecProto =
@@ -550,9 +551,9 @@ public final class IcingSearchEngineTest {
int matchStart = matchProto.getExactMatchUtf16Position();
int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
assertThat(matchStart).isEqualTo(1);
- assertThat(matchEnd).isEqualTo(3);
+ assertThat(matchEnd).isEqualTo(2);
String match = content.substring(matchStart, matchEnd);
- assertThat(match).isEqualTo("每天");
+ assertThat(match).isEqualTo("是");
}
@Test
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index 9a4e5b9..2e8321b 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -209,7 +209,7 @@ message DeleteBySchemaTypeResultProto {
}
// Result of a call to IcingSearchEngine.DeleteByQuery
-// Next tag: 3
+// Next tag: 4
message DeleteByQueryResultProto {
// Status code can be one of:
// OK
@@ -224,5 +224,7 @@ message DeleteByQueryResultProto {
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional DeleteStatsProto delete_stats = 2;
+ optional DeleteByQueryStatsProto delete_by_query_stats = 3;
+
+ reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 29f7f80..4dcfecf 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -181,8 +181,7 @@ message QueryStatsProto {
}
// Stats of the top-level functions IcingSearchEngine::Delete,
-// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
-// IcingSearchEngine::DeleteByQuery.
+// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType.
// Next tag: 4
message DeleteStatsProto {
// Overall time used for the function call.
@@ -196,8 +195,10 @@ message DeleteStatsProto {
// Delete one document.
SINGLE = 1;
- // Delete by query.
- QUERY = 2;
+ // Delete by query. This value is deprecated.
+ // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto
+ // rather than a DeleteStatsProto.
+ DEPRECATED_QUERY = 2 [deprecated = true];
// Delete by namespace.
NAMESPACE = 3;
@@ -211,3 +212,32 @@ message DeleteStatsProto {
// Number of documents deleted by this call.
optional int32 num_documents_deleted = 3;
}
+
+// Stats of the top-level functions IcingSearchEngine::DeleteByQuery.
+// Next tag: 9
+message DeleteByQueryStatsProto {
+ // Overall time used for the function call.
+ optional int32 latency_ms = 1;
+
+ // Number of documents deleted by this call.
+ optional int32 num_documents_deleted = 2;
+
+ // The UTF-8 length of the query string
+ optional int32 query_length = 3;
+
+ // Number of terms in the query string.
+ optional int32 num_terms = 4;
+
+ // Number of namespaces filtered.
+ optional int32 num_namespaces_filtered = 5;
+
+ // Number of schema types filtered.
+ optional int32 num_schema_types_filtered = 6;
+
+ // Time used to parse the query, including 2 parts: tokenizing and
+ // transforming tokens into an iterator tree.
+ optional int32 parse_query_latency_ms = 7;
+
+ // Time used to delete each document.
+ optional int32 document_removal_latency_ms = 8;
+}
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 4188a8c..c611cbf 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -197,7 +197,7 @@ message SchemaProto {
}
// Result of a call to IcingSearchEngine.SetSchema
-// Next tag: 4
+// Next tag: 8
message SetSchemaResultProto {
// Status code can be one of:
// OK
@@ -221,6 +221,21 @@ message SetSchemaResultProto {
// documents that fail validation against the new schema types would also be
// deleted.
repeated string incompatible_schema_types = 3;
+
+ // Schema types that did not exist in the previous schema and were added with
+ // the new schema type.
+ repeated string new_schema_types = 4;
+
+ // Schema types that were changed in a way that was backwards compatible and
+ // didn't invalidate the index.
+ repeated string fully_compatible_changed_schema_types = 5;
+
+ // Schema types that were changed in a way that was backwards compatible, but
+ // invalidated the index.
+ repeated string index_incompatible_changed_schema_types = 6;
+
+ // Overall time used for the function call.
+ optional int32 latency_ms = 7;
}
// Result of a call to IcingSearchEngine.GetSchema
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 35ad6d9..a431b89 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=378695940)
+set(synced_AOSP_CL_number=380898610)