diff options
author | Cassie Wang <cassiewang@google.com> | 2021-07-19 12:26:54 -0700 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2021-07-19 12:29:00 -0700 |
commit | ef3e85a90bf0e16b8e34802b7a3912484d185c84 (patch) | |
tree | 5129f3a74eb616eb2856c6db3392011c1ce384f8 | |
parent | 1b85c23555eab498907ef7527575c4fb1adb4f2b (diff) | |
download | icing-ef3e85a90bf0e16b8e34802b7a3912484d185c84.tar.gz |
Sync from upstream.
Descriptions:
==================
Switch to PRId64 string formatting.
==================
Conditionally recompute the checksum for PortableFileBackedProtoLog.
==================
Fix spammy error log encountered in hit iterator.
===================
Fix -Wunused-but-set-variable warnings
Bug: 193141896
Change-Id: I46ca2daa90efe442e9ebeac389e91df21ddc81d6
-rw-r--r-- | icing/file/file-backed-vector.h | 5 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log.h | 41 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log_benchmark.cc | 92 | ||||
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-and.cc | 1 | ||||
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.cc | 9 | ||||
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.h | 5 | ||||
-rw-r--r-- | icing/index/main/doc-hit-info-iterator-term-main.cc | 5 | ||||
-rw-r--r-- | icing/store/document-store_benchmark.cc | 69 | ||||
-rw-r--r-- | icing/store/document-store_test.cc | 10 | ||||
-rw-r--r-- | synced_AOSP_CL_number.txt | 2 |
10 files changed, 211 insertions, 28 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index 61f6923..0989935 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -56,6 +56,7 @@ #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_ #define ICING_FILE_FILE_BACKED_VECTOR_H_ +#include <inttypes.h> #include <stdint.h> #include <sys/mman.h> @@ -438,8 +439,8 @@ FileBackedVector<T>::InitializeExistingFile( int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header); if (min_file_size > file_size) { return absl_ports::InternalError(IcingStringUtil::StringPrintf( - "Inconsistent file size, expected %zd, actual %d", min_file_size, - file_size)); + "Inconsistent file size, expected %" PRId64 ", actual %" PRId64, + min_file_size, file_size)); } // Mmap the content of the vector, excluding the header so its easier to diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index 825b763..5284b6e 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -183,7 +183,7 @@ class PortableFileBackedProtoLog { void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); } - bool GetDirtyFlag() { return GetFlag(kDirtyBit); } + bool GetDirtyFlag() const { return GetFlag(kDirtyBit); } void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); } @@ -1186,21 +1186,7 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() { return libtextclassifier3::Status::OK; } - int64_t new_content_size = file_size - header_->GetRewindOffset(); - Crc32 crc; - if (new_content_size < 0) { - // File shrunk, recalculate the entire checksum. - ICING_ASSIGN_OR_RETURN( - crc, - ComputeChecksum(filesystem_, file_path_, Crc32(), - /*start=*/kHeaderReservedBytes, /*end=*/file_size)); - } else { - // Append new changes to the existing checksum. - ICING_ASSIGN_OR_RETURN( - crc, ComputeChecksum(filesystem_, file_path_, - Crc32(header_->GetLogChecksum()), - header_->GetRewindOffset(), file_size)); - } + ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum()); header_->SetLogChecksum(crc.Get()); header_->SetRewindOffset(file_size); @@ -1219,9 +1205,26 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() { template <typename ProtoT> libtextclassifier3::StatusOr<Crc32> PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() { - return PortableFileBackedProtoLog<ProtoT>::ComputeChecksum( - filesystem_, file_path_, Crc32(), /*start=*/kHeaderReservedBytes, - /*end=*/filesystem_->GetFileSize(file_path_.c_str())); + int64_t file_size = filesystem_->GetFileSize(file_path_.c_str()); + int64_t new_content_size = file_size - header_->GetRewindOffset(); + Crc32 crc; + if (new_content_size == 0) { + // No new protos appended, return cached checksum + return Crc32(header_->GetLogChecksum()); + } else if (new_content_size < 0) { + // File shrunk, recalculate the entire checksum. + ICING_ASSIGN_OR_RETURN( + crc, + ComputeChecksum(filesystem_, file_path_, Crc32(), + /*start=*/kHeaderReservedBytes, /*end=*/file_size)); + } else { + // Append new changes to the existing checksum. + ICING_ASSIGN_OR_RETURN( + crc, ComputeChecksum( + filesystem_, file_path_, Crc32(header_->GetLogChecksum()), + /*start=*/header_->GetRewindOffset(), /*end=*/file_size)); + } + return crc; } } // namespace lib diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc index 04ccab0..f83ccd6 100644 --- a/icing/file/portable-file-backed-proto-log_benchmark.cc +++ b/icing/file/portable-file-backed-proto-log_benchmark.cc @@ -246,6 +246,98 @@ static void BM_ComputeChecksum(benchmark::State& state) { } BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20); +static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = GetTestTempDir() + "/proto.log"; + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + // Make the document 1KiB + int string_length = 1024; + std::default_random_engine random; + const std::string rand_str = + RandomString(kAlNumAlphabet, string_length, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + // Write some content and persist. This should update our cached checksum to + // include the document. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + // This ComputeChecksum call shouldn't need to do any computation since we can + // reuse our cached checksum. + for (auto _ : state) { + testing::DoNotOptimize(proto_log->ComputeChecksum()); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_ComputeChecksumWithCachedChecksum); + +static void BM_ComputeChecksumOnlyForTail(benchmark::State& state) { + const Filesystem filesystem; + const std::string file_path = GetTestTempDir() + "/proto.log"; + int max_proto_size = (1 << 24) - 1; // 16 MiB + bool compress = true; + + // Make sure it doesn't already exist. + filesystem.DeleteFile(file_path.c_str()); + + auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create( + &filesystem, file_path, + PortableFileBackedProtoLog<DocumentProto>::Options( + compress, max_proto_size)) + .ValueOrDie() + .proto_log; + + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + + // Make the document 1KiB + int string_length = 1024; + std::default_random_engine random; + const std::string rand_str = + RandomString(kAlNumAlphabet, string_length, &random); + + auto document_properties = document.add_properties(); + document_properties->set_name("string property"); + document_properties->add_string_values(rand_str); + + // Write some content and persist. This should update our cached checksum to + // include the document. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + ICING_ASSERT_OK(proto_log->PersistToDisk()); + + // Write another proto into the tail, but it's not included in our cached + // checksum since we didn't call persist. + ICING_ASSERT_OK(proto_log->WriteProto(document)); + + // ComputeChecksum should be calculating the checksum of the tail and adding + // it to the cached checksum we have. + for (auto _ : state) { + testing::DoNotOptimize(proto_log->ComputeChecksum()); + } + + // Cleanup after ourselves + filesystem.DeleteFile(file_path.c_str()); +} +BENCHMARK(BM_ComputeChecksumOnlyForTail); + } // namespace } // namespace lib } // namespace icing diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc index 66f87bd..39aa969 100644 --- a/icing/index/iterator/doc-hit-info-iterator-and.cc +++ b/icing/index/iterator/doc-hit-info-iterator-and.cc @@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() { DocumentId unused; ICING_ASSIGN_OR_RETURN( unused, AdvanceTo(iterator.get(), potential_document_id)); + (void)unused; // Silence unused warning. } if (iterator->doc_hit_info().document_id() == potential_document_id) { diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc index d535d7f..08df4fc 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc @@ -45,8 +45,13 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() { if (cached_hits_idx_ == -1) { libtextclassifier3::Status status = RetrieveMoreHits(); if (!status.ok()) { - ICING_LOG(ERROR) << "Failed to retrieve more hits " - << status.error_message(); + if (!absl_ports::IsNotFound(status)) { + // NOT_FOUND is expected to happen (not every term will be in the main + // index!). Other errors are worth logging. + ICING_LOG(ERROR) + << "Encountered unexpected failure while retrieving hits " + << status.error_message(); + } return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h index 8dbe043..179fc93 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.h +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h @@ -82,6 +82,11 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator { protected: // Add DocHitInfos corresponding to term_ to cached_hits_. + // + // Returns: + // - OK, on success + // - NOT_FOUND if no term matching term_ was found in the lexicon. + // - INVALID_ARGUMENT if unable to properly encode the termid virtual libtextclassifier3::Status RetrieveMoreHits() = 0; const std::string term_; diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc index 5553c1e..98bc18e 100644 --- a/icing/index/main/doc-hit-info-iterator-term-main.cc +++ b/icing/index/main/doc-hit-info-iterator-term-main.cc @@ -57,8 +57,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() { if (!absl_ports::IsNotFound(status)) { // NOT_FOUND is expected to happen (not every term will be in the main // index!). Other errors are worth logging. - ICING_LOG(ERROR) << "Failed to retrieve more hits " - << status.error_message(); + ICING_LOG(ERROR) + << "Encountered unexpected failure while retrieving hits " + << status.error_message(); } return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index ce608fc..77da928 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -32,6 +32,7 @@ #include "icing/document-builder.h" #include "icing/file/filesystem.h" #include "icing/proto/document.pb.h" +#include "icing/proto/persist.pb.h" #include "icing/proto/schema.pb.h" #include "icing/schema-builder.h" #include "icing/schema/schema-store.h" @@ -255,6 +256,74 @@ void BM_Delete(benchmark::State& state) { } BENCHMARK(BM_Delete); +void BM_Create(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + std::string document_store_dir = directory + "/store"; + + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + // Create an initial document store and put some data in. + { + DestructibleDirectory ddir(filesystem, directory); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + ICING_ASSERT_OK(document_store->Put(document)); + ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL)); + } + + // Recreating it with some content to checksum over. + DestructibleDirectory ddir(filesystem, directory); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + + for (auto s : state) { + benchmark::DoNotOptimize(DocumentStore::Create( + &filesystem, document_store_dir, &clock, schema_store.get())); + } +} +BENCHMARK(BM_Create); + +void BM_ComputeChecksum(benchmark::State& state) { + Filesystem filesystem; + Clock clock; + + std::string directory = GetTestTempDir() + "/icing"; + DestructibleDirectory ddir(filesystem, directory); + + std::string document_store_dir = directory + "/store"; + std::unique_ptr<SchemaStore> schema_store = + CreateSchemaStore(filesystem, directory, &clock); + + filesystem.CreateDirectoryRecursively(document_store_dir.data()); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + DocumentProto document = CreateDocument("namespace", "uri"); + ICING_ASSERT_OK(document_store->Put(document)); + ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE)); + + for (auto s : state) { + benchmark::DoNotOptimize(document_store->ComputeChecksum()); + } +} +BENCHMARK(BM_ComputeChecksum); + } // namespace } // namespace lib diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index b1a6357..16d0ca0 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -3556,7 +3556,6 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); - DocumentId docid = kInvalidDocumentId; DocumentProto docWithBody = DocumentBuilder() .SetKey("icing", "email/1") @@ -3589,8 +3588,12 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); + DocumentId docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody)); + ASSERT_NE(docid, kInvalidDocumentId); + docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody)); + ASSERT_NE(docid, kInvalidDocumentId); ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()), IsOkAndHolds(EqualsProto(docWithBody))); @@ -3658,7 +3661,6 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); ASSERT_THAT(schema_store->SetSchema(schema), IsOk()); - DocumentId docid = kInvalidDocumentId; DocumentProto docWithBody = DocumentBuilder() .SetKey("icing", "email/1") @@ -3691,8 +3693,12 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); + DocumentId docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody)); + ASSERT_NE(docid, kInvalidDocumentId); + docid = kInvalidDocumentId; ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody)); + ASSERT_NE(docid, kInvalidDocumentId); ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()), IsOkAndHolds(EqualsProto(docWithBody))); diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index b5282cf..69dfc00 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=384818601) +set(synced_AOSP_CL_number=385604495) |