aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2021-07-19 12:26:54 -0700
committerCassie Wang <cassiewang@google.com>2021-07-19 12:29:00 -0700
commitef3e85a90bf0e16b8e34802b7a3912484d185c84 (patch)
tree5129f3a74eb616eb2856c6db3392011c1ce384f8
parent1b85c23555eab498907ef7527575c4fb1adb4f2b (diff)
downloadicing-ef3e85a90bf0e16b8e34802b7a3912484d185c84.tar.gz
Sync from upstream.
Descriptions: ================== Switch to PRId64 string formatting. ================== Conditionally recompute the checksum for PortableFileBackedProtoLog. ================== Fix spammy error log encountered in hit iterator. =================== Fix -Wunused-but-set-variable warnings Bug: 193141896 Change-Id: I46ca2daa90efe442e9ebeac389e91df21ddc81d6
-rw-r--r--icing/file/file-backed-vector.h5
-rw-r--r--icing/file/portable-file-backed-proto-log.h41
-rw-r--r--icing/file/portable-file-backed-proto-log_benchmark.cc92
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.cc1
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc9
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h5
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc5
-rw-r--r--icing/store/document-store_benchmark.cc69
-rw-r--r--icing/store/document-store_test.cc10
-rw-r--r--synced_AOSP_CL_number.txt2
10 files changed, 211 insertions, 28 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 61f6923..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,6 +56,7 @@
#ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
#define ICING_FILE_FILE_BACKED_VECTOR_H_
+#include <inttypes.h>
#include <stdint.h>
#include <sys/mman.h>
@@ -438,8 +439,8 @@ FileBackedVector<T>::InitializeExistingFile(
int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
if (min_file_size > file_size) {
return absl_ports::InternalError(IcingStringUtil::StringPrintf(
- "Inconsistent file size, expected %zd, actual %d", min_file_size,
- file_size));
+ "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+ min_file_size, file_size));
}
// Mmap the content of the vector, excluding the header so its easier to
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 825b763..5284b6e 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -183,7 +183,7 @@ class PortableFileBackedProtoLog {
void SetCompressFlag(bool compress) { SetFlag(kCompressBit, compress); }
- bool GetDirtyFlag() { return GetFlag(kDirtyBit); }
+ bool GetDirtyFlag() const { return GetFlag(kDirtyBit); }
void SetDirtyFlag(bool dirty) { SetFlag(kDirtyBit, dirty); }
@@ -1186,21 +1186,7 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
return libtextclassifier3::Status::OK;
}
- int64_t new_content_size = file_size - header_->GetRewindOffset();
- Crc32 crc;
- if (new_content_size < 0) {
- // File shrunk, recalculate the entire checksum.
- ICING_ASSIGN_OR_RETURN(
- crc,
- ComputeChecksum(filesystem_, file_path_, Crc32(),
- /*start=*/kHeaderReservedBytes, /*end=*/file_size));
- } else {
- // Append new changes to the existing checksum.
- ICING_ASSIGN_OR_RETURN(
- crc, ComputeChecksum(filesystem_, file_path_,
- Crc32(header_->GetLogChecksum()),
- header_->GetRewindOffset(), file_size));
- }
+ ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
header_->SetLogChecksum(crc.Get());
header_->SetRewindOffset(file_size);
@@ -1219,9 +1205,26 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
template <typename ProtoT>
libtextclassifier3::StatusOr<Crc32>
PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() {
- return PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
- filesystem_, file_path_, Crc32(), /*start=*/kHeaderReservedBytes,
- /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
+ int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+ int64_t new_content_size = file_size - header_->GetRewindOffset();
+ Crc32 crc;
+ if (new_content_size == 0) {
+ // No new protos appended, return cached checksum
+ return Crc32(header_->GetLogChecksum());
+ } else if (new_content_size < 0) {
+ // File shrunk, recalculate the entire checksum.
+ ICING_ASSIGN_OR_RETURN(
+ crc,
+ ComputeChecksum(filesystem_, file_path_, Crc32(),
+ /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+ } else {
+ // Append new changes to the existing checksum.
+ ICING_ASSIGN_OR_RETURN(
+ crc, ComputeChecksum(
+ filesystem_, file_path_, Crc32(header_->GetLogChecksum()),
+ /*start=*/header_->GetRewindOffset(), /*end=*/file_size));
+ }
+ return crc;
}
} // namespace lib
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index 04ccab0..f83ccd6 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -246,6 +246,98 @@ static void BM_ComputeChecksum(benchmark::State& state) {
}
BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = GetTestTempDir() + "/proto.log";
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Make the document 1KiB
+ int string_length = 1024;
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ // Write some content and persist. This should update our cached checksum to
+ // include the document.
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ // This ComputeChecksum call shouldn't need to do any computation since we can
+ // reuse our cached checksum.
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ComputeChecksum());
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumWithCachedChecksum);
+
+static void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = GetTestTempDir() + "/proto.log";
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Make the document 1KiB
+ int string_length = 1024;
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ // Write some content and persist. This should update our cached checksum to
+ // include the document.
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ // Write another proto into the tail, but it's not included in our cached
+ // checksum since we didn't call persist.
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+
+ // ComputeChecksum should be calculating the checksum of the tail and adding
+ // it to the cached checksum we have.
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ComputeChecksum());
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksumOnlyForTail);
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index 66f87bd..39aa969 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -162,6 +162,7 @@ libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
DocumentId unused;
ICING_ASSIGN_OR_RETURN(
unused, AdvanceTo(iterator.get(), potential_document_id));
+ (void)unused; // Silence unused warning.
}
if (iterator->doc_hit_info().document_id() == potential_document_id) {
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index d535d7f..08df4fc 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -45,8 +45,13 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
if (cached_hits_idx_ == -1) {
libtextclassifier3::Status status = RetrieveMoreHits();
if (!status.ok()) {
- ICING_LOG(ERROR) << "Failed to retrieve more hits "
- << status.error_message();
+ if (!absl_ports::IsNotFound(status)) {
+ // NOT_FOUND is expected to happen (not every term will be in the main
+ // index!). Other errors are worth logging.
+ ICING_LOG(ERROR)
+ << "Encountered unexpected failure while retrieving hits "
+ << status.error_message();
+ }
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index 8dbe043..179fc93 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -82,6 +82,11 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
protected:
// Add DocHitInfos corresponding to term_ to cached_hits_.
+ //
+ // Returns:
+ // - OK, on success
+ // - NOT_FOUND if no term matching term_ was found in the lexicon.
+ // - INVALID_ARGUMENT if unable to properly encode the termid
virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
const std::string term_;
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 5553c1e..98bc18e 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -57,8 +57,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
if (!absl_ports::IsNotFound(status)) {
// NOT_FOUND is expected to happen (not every term will be in the main
// index!). Other errors are worth logging.
- ICING_LOG(ERROR) << "Failed to retrieve more hits "
- << status.error_message();
+ ICING_LOG(ERROR)
+ << "Encountered unexpected failure while retrieving hits "
+ << status.error_message();
}
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index ce608fc..77da928 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -32,6 +32,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/proto/document.pb.h"
+#include "icing/proto/persist.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
@@ -255,6 +256,74 @@ void BM_Delete(benchmark::State& state) {
}
BENCHMARK(BM_Delete);
+void BM_Create(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ std::string document_store_dir = directory + "/store";
+
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ // Create an initial document store and put some data in.
+ {
+ DestructibleDirectory ddir(filesystem, directory);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+ ICING_ASSERT_OK(document_store->Put(document));
+ ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::FULL));
+ }
+
+ // Recreating it with some content to checksum over.
+ DestructibleDirectory ddir(filesystem, directory);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(DocumentStore::Create(
+ &filesystem, document_store_dir, &clock, schema_store.get()));
+ }
+}
+BENCHMARK(BM_Create);
+
+void BM_ComputeChecksum(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document = CreateDocument("namespace", "uri");
+ ICING_ASSERT_OK(document_store->Put(document));
+ ICING_ASSERT_OK(document_store->PersistToDisk(PersistType::LITE));
+
+ for (auto s : state) {
+ benchmark::DoNotOptimize(document_store->ComputeChecksum());
+ }
+}
+BENCHMARK(BM_ComputeChecksum);
+
} // namespace
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index b1a6357..16d0ca0 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -3556,7 +3556,6 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
- DocumentId docid = kInvalidDocumentId;
DocumentProto docWithBody =
DocumentBuilder()
.SetKey("icing", "email/1")
@@ -3589,8 +3588,12 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
+ DocumentId docid = kInvalidDocumentId;
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ASSERT_NE(docid, kInvalidDocumentId);
+ docid = kInvalidDocumentId;
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+ ASSERT_NE(docid, kInvalidDocumentId);
ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
IsOkAndHolds(EqualsProto(docWithBody)));
@@ -3658,7 +3661,6 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
- DocumentId docid = kInvalidDocumentId;
DocumentProto docWithBody =
DocumentBuilder()
.SetKey("icing", "email/1")
@@ -3691,8 +3693,12 @@ TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
+ DocumentId docid = kInvalidDocumentId;
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ASSERT_NE(docid, kInvalidDocumentId);
+ docid = kInvalidDocumentId;
ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+ ASSERT_NE(docid, kInvalidDocumentId);
ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
IsOkAndHolds(EqualsProto(docWithBody)));
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index b5282cf..69dfc00 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=384818601)
+set(synced_AOSP_CL_number=385604495)