aboutsummaryrefslogtreecommitdiff
path: root/icing/store
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2020-10-28 01:36:03 -0700
committerTerry Wang <tytytyww@google.com>2020-10-28 01:36:03 -0700
commit71b8eddc99c6337ff304a2f3cd0588c42239202f (patch)
tree7c54063a7f10e35a282cdfd4cafd369696672fbe /icing/store
parent5abfe5bcac00f4f188d3d8041fa97bf77206b577 (diff)
downloadicing-71b8eddc99c6337ff304a2f3cd0588c42239202f.tar.gz
Pull upstream changes.
Change-Id: I73ea5f80ccf16a02519f6f7ccfc993e9b0f39f86
Diffstat (limited to 'icing/store')
-rw-r--r--icing/store/document-store.cc87
-rw-r--r--icing/store/document-store.h30
-rw-r--r--icing/store/document-store_test.cc390
-rw-r--r--icing/store/usage-store.cc84
-rw-r--r--icing/store/usage-store.h30
-rw-r--r--icing/store/usage-store_test.cc347
6 files changed, 851 insertions, 117 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 7577f6b..1e47d59 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -59,6 +59,7 @@ constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
constexpr char kScoreCacheFilename[] = "score_cache";
constexpr char kFilterCacheFilename[] = "filter_cache";
constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
+constexpr char kUsageStoreDirectoryName[] = "usage_store";
constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB
@@ -123,6 +124,10 @@ std::string MakeNamespaceMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename);
}
+std::string MakeUsageStoreDirectoryName(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
+}
+
// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
// overhead per key. As we know that these fingerprints are always 8-bytes in
@@ -309,6 +314,14 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
MakeNamespaceMapperFilename(base_dir_),
kNamespaceMapperMaxSize));
+ ICING_ASSIGN_OR_RETURN(
+ usage_store_,
+ UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+
+ // Ensure the usage store is the correct size.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->TruncateTo(document_id_mapper_->num_elements()));
+
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
if (checksum.Get() != header.checksum) {
return absl_ports::InternalError(
@@ -325,6 +338,12 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
ICING_RETURN_IF_ERROR(ResetFilterCache());
ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+ // Creates a new UsageStore instance. Note that we don't reset the data in
+ // usage store here because we're not able to regenerate the usage scores.
+ ICING_ASSIGN_OR_RETURN(
+ usage_store_,
+ UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+
// Iterates through document log
auto iterator = document_log_->GetIterator();
auto iterator_status = iterator.Advance();
@@ -478,6 +497,10 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
"Failed to iterate through proto log.");
}
+ // Shrink usage_store_ to the correct size.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->TruncateTo(document_id_mapper_->num_elements()));
+
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -727,9 +750,19 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
expiration_timestamp_ms)));
if (old_document_id_or.ok()) {
- // Mark the old document id as deleted.
- ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
- old_document_id_or.ValueOrDie(), kDocDeletedFlag));
+ DocumentId old_document_id = old_document_id_or.ValueOrDie();
+ auto offset_or = DoesDocumentExistAndGetFileOffset(old_document_id);
+
+ if (offset_or.ok()) {
+ // The old document exists, copy over the usage scores.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
+ /*to_document_id=*/new_document_id));
+
+ // Hard delete the old document.
+ ICING_RETURN_IF_ERROR(
+ HardDelete(old_document_id, offset_or.ValueOrDie()));
+ }
}
return new_document_id;
@@ -887,8 +920,7 @@ libtextclassifier3::Status DocumentStore::Delete(
if (soft_delete) {
return SoftDelete(name_space, uri, document_id);
} else {
- uint64_t document_log_offset = file_offset_or.ValueOrDie();
- return HardDelete(document_id, document_log_offset);
+ return HardDelete(document_id, file_offset_or.ValueOrDie());
}
}
@@ -915,6 +947,7 @@ libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
}
}
+// TODO(b/169969469): Consider removing SoftDelete().
libtextclassifier3::Status DocumentStore::SoftDelete(
std::string_view name_space, std::string_view uri, DocumentId document_id) {
// Update ground truth first.
@@ -935,7 +968,7 @@ libtextclassifier3::Status DocumentStore::SoftDelete(
}
libtextclassifier3::Status DocumentStore::HardDelete(
- DocumentId document_id, uint64_t document_log_offset) {
+ DocumentId document_id, int64_t document_log_offset) {
// Erases document proto.
ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
return ClearDerivedData(document_id);
@@ -981,6 +1014,19 @@ DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
return document_filter_data;
}
+libtextclassifier3::StatusOr<UsageStore::UsageScores>
+DocumentStore::GetUsageScores(DocumentId document_id) const {
+ return usage_store_->GetUsageScores(document_id);
+}
+
+libtextclassifier3::Status DocumentStore::ReportUsage(
+ const UsageReport& usage_report) {
+ ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+ GetDocumentId(usage_report.document_namespace(),
+ usage_report.document_uri()));
+ return usage_store_->AddUsageReport(usage_report, document_id);
+}
+
libtextclassifier3::Status DocumentStore::DeleteByNamespace(
std::string_view name_space, bool soft_delete) {
auto namespace_id_or = namespace_mapper_->Get(name_space);
@@ -1132,6 +1178,7 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() {
ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk());
ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
// Update the combined checksum and write to header file.
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
@@ -1334,15 +1381,21 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
// Guaranteed to have a document now.
DocumentProto document_to_keep = document_or.ValueOrDie();
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- new_doc_store->Put(std::move(document_to_keep)).status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+ // that can support error logging.
+ auto new_document_id_or = new_doc_store->Put(std::move(document_to_keep));
+ if (!new_document_id_or.ok()) {
+ ICING_LOG(ERROR) << new_document_id_or.status().error_message()
<< "Failed to write into new document store";
- return status;
+ return new_document_id_or.status();
}
+
+ // Copy over usage scores.
+ ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
+ usage_store_->GetUsageScores(document_id));
+ DocumentId new_document_id = new_document_id_or.ValueOrDie();
+ ICING_RETURN_IF_ERROR(
+ new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
@@ -1430,7 +1483,13 @@ libtextclassifier3::Status DocumentStore::ClearDerivedData(
document_id, DocumentFilterData(kInvalidNamespaceId, kInvalidSchemaTypeId,
/*expiration_timestamp_ms=*/-1)));
- return libtextclassifier3::Status::OK;
+ // Clears the usage scores.
+ return usage_store_->DeleteUsageScores(document_id);
+}
+
+libtextclassifier3::Status DocumentStore::SetUsageScores(
+ DocumentId document_id, const UsageStore::UsageScores& usage_scores) {
+ return usage_store_->SetUsageScores(document_id, usage_scores);
}
} // namespace lib
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 2ac1c71..5c1b902 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -34,6 +34,7 @@
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
+#include "icing/store/usage-store.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
#include "icing/util/document-validator.h"
@@ -223,6 +224,24 @@ class DocumentStore {
libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
DocumentId document_id) const;
+ // Gets the usage scores of a document.
+ //
+ // Returns:
+ // UsageScores on success
+ // INVALID_ARGUMENT if document_id is invalid
+ // INTERNAL_ERROR on I/O errors
+ libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores(
+ DocumentId document_id) const;
+
+ // Reports usage. The corresponding usage scores of the specified document in
+ // the report will be updated.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
+ // INTERNAL_ERROR on I/O errors.
+ libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
+
// Deletes all documents belonging to the given namespace. The documents will
// be marked as deleted if 'soft_delete' is true, otherwise they will be
// erased immediately.
@@ -391,6 +410,11 @@ class DocumentStore {
// DocumentStore. Namespaces may be removed from the mapper during compaction.
std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
+ // A storage class that caches all usage scores. Usage scores are not
+ // considered as ground truth. Usage scores are associated with document ids
+ // so they need to be updated when document ids change.
+ std::unique_ptr<UsageStore> usage_store_;
+
// Used internally to indicate whether the class has been initialized. This is
// to guard against cases where the object has been created, but Initialize
// fails in the constructor. If we have successfully exited the constructor,
@@ -497,7 +521,7 @@ class DocumentStore {
// OK on success
// INTERNAL_ERROR on IO error
libtextclassifier3::Status HardDelete(DocumentId document_id,
- uint64_t document_log_offset);
+ int64_t document_log_offset);
// Helper method to find a DocumentId that is associated with the given
// namespace and uri.
@@ -539,6 +563,10 @@ class DocumentStore {
// Helper method to clear the derived data of a document
libtextclassifier3::Status ClearDerivedData(DocumentId document_id);
+
+ // Sets usage scores for the given document.
+ libtextclassifier3::Status SetUsageScores(
+ DocumentId document_id, const UsageStore::UsageScores& usage_scores);
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index f857481..301dbdd 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -42,6 +42,8 @@
namespace icing {
namespace lib {
+namespace {
+
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
using ::testing::Eq;
@@ -54,6 +56,17 @@ using ::testing::Not;
using ::testing::Return;
using ::testing::UnorderedElementsAre;
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
class DocumentStoreTest : public ::testing::Test {
protected:
DocumentStoreTest()
@@ -1297,7 +1310,7 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
// Bad file system
MockFilesystem mock_filesystem;
- ON_CALL(mock_filesystem, GetDiskUsage(A<const char *>()))
+ ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
.WillByDefault(Return(Filesystem::kBadFileSize));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem,
@@ -1465,6 +1478,63 @@ TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
+TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Soft delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
+
+ // The scores should be the same.
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Hard delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+
+ // The scores should be cleared.
+ expected_scores.usage_type1_count = 0;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
TEST_F(DocumentStoreTest,
ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
DocumentProto document = DocumentBuilder()
@@ -1572,7 +1642,7 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
// With default doc score 0
.Build();
DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "email/1")
+ .SetKey("icing", "email/2")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetScore(5)
@@ -2346,5 +2416,321 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) {
UnorderedElementsAre("namespace1"));
}
+TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1 and time 1.
+ UsageReport usage_report_type1_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 1 and time 5, time should be updated.
+ UsageReport usage_report_type1_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time5));
+
+ expected_scores.usage_type1_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2 and time 1.
+ UsageReport usage_report_type2_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time1));
+
+ expected_scores.usage_type2_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2 and time 5.
+ UsageReport usage_report_type2_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time5));
+
+ expected_scores.usage_type2_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3 and time 1.
+ UsageReport usage_report_type3_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time1));
+
+ expected_scores.usage_type3_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3 and time 5.
+ UsageReport usage_report_type3_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time5));
+
+ expected_scores.usage_type3_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2.
+ UsageReport usage_report_type2 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2));
+
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3.
+ UsageReport usage_report_type3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3));
+
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
+ UsageStore::UsageScores expected_scores;
+ DocumentId document_id;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+ }
+
+ // Change the DocStore's header combined checksum so that it won't match the
+ // recalculated checksum on initialization. This will force a regeneration of
+ // derived files from ground truth.
+ const std::string header_file =
+ absl_ports::StrCat(document_store_dir_, "/document_store_header");
+ DocumentStore::Header header;
+ header.magic = DocumentStore::Header::kMagic;
+ header.checksum = 10; // Arbitrary garbage checksum
+ filesystem_.DeleteFile(header_file.c_str());
+ filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+ // Successfully recover from a corrupt derived file issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should be the same.
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
+ UsageStore::UsageScores expected_scores;
+ DocumentId document_id;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id, document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+ }
+
+ // "Corrupt" the content written in the log by adding non-checksummed data to
+ // it. This will mess up the checksum of the proto log, forcing it to rewind
+ // to the last saved point.
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+ const std::string serialized_document = document.SerializeAsString();
+
+ const std::string document_log_file =
+ absl_ports::StrCat(document_store_dir_, "/document_log");
+ int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+ filesystem_.PWrite(document_log_file.c_str(), file_size,
+ serialized_document.data(), serialized_document.size());
+
+ // Successfully recover from a data loss issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should still be available.
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Update the document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId updated_document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+ // We should get a different document id.
+ ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
+
+ // Usage scores should be the same.
+ EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest,
+ UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Soft delete the doc.
+ ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true));
+
+ // Put the same document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId updated_document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+ // We should get a different document id.
+ ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
+
+ // Usage scores should be cleared.
+ EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store->Put(DocumentProto(test_document1_)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store->Put(DocumentProto(test_document2_)));
+ ICING_ASSERT_OK(document_store->Delete(document_id1));
+
+ // Report usage of document 2.
+ UsageReport usage_report = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id2),
+ IsOkAndHolds(expected_scores));
+
+ // Run optimize
+ std::string optimized_dir = document_store_dir_ + "/optimize_test";
+ filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
+ ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
+
+ // Get optimized document store
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> optimized_document_store,
+ DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should be the same.
+ // The original document_id2 should have become document_id2 - 1.
+ ASSERT_THAT(optimized_document_store->GetUsageScores(document_id2 - 1),
+ IsOkAndHolds(expected_scores));
+}
+
+} // namespace
+
} // namespace lib
} // namespace icing
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 911c45a..7a0af9c 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -31,10 +31,32 @@ libtextclassifier3::StatusOr<std::unique_ptr<UsageStore>> UsageStore::Create(
const Filesystem* filesystem, const std::string& base_dir) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
+ if (!filesystem->CreateDirectoryRecursively(base_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to create UsageStore directory: ", base_dir));
+ }
+
+ const std::string score_cache_filename =
+ MakeUsageScoreCacheFilename(base_dir);
+
auto usage_score_cache_or = FileBackedVector<UsageScores>::Create(
- *filesystem, MakeUsageScoreCacheFilename(base_dir),
+ *filesystem, score_cache_filename,
MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+ if (absl_ports::IsFailedPrecondition(usage_score_cache_or.status())) {
+ // File checksum doesn't match the stored checksum. Delete and recreate the
+ // file.
+ ICING_RETURN_IF_ERROR(
+ FileBackedVector<int64_t>::Delete(*filesystem, score_cache_filename));
+
+ ICING_VLOG(1) << "The score cache file in UsageStore is corrupted, all "
+ "scores have been reset.";
+
+ usage_score_cache_or = FileBackedVector<UsageScores>::Create(
+ *filesystem, score_cache_filename,
+ MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+ }
+
if (!usage_score_cache_or.ok()) {
ICING_LOG(ERROR) << usage_score_cache_or.status().error_message()
<< "Failed to initialize usage_score_cache";
@@ -111,9 +133,7 @@ libtextclassifier3::Status UsageStore::AddUsageReport(const UsageReport& report,
}
// Write updated usage scores to file.
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores));
-
- return libtextclassifier3::Status::OK;
+ return usage_score_cache_->Set(document_id, usage_scores);
}
libtextclassifier3::Status UsageStore::DeleteUsageScores(
@@ -123,10 +143,13 @@ libtextclassifier3::Status UsageStore::DeleteUsageScores(
"Document id %d is invalid.", document_id));
}
- // Clear all the scores of the document.
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, UsageScores()));
+ if (document_id >= usage_score_cache_->num_elements()) {
+ // Nothing to delete.
+ return libtextclassifier3::Status::OK;
+ }
- return libtextclassifier3::Status::OK;
+ // Clear all the scores of the document.
+ return usage_score_cache_->Set(document_id, UsageScores());
}
libtextclassifier3::StatusOr<UsageStore::UsageScores>
@@ -149,20 +172,55 @@ UsageStore::GetUsageScores(DocumentId document_id) {
}
libtextclassifier3::Status UsageStore::SetUsageScores(
- DocumentId document_id, UsageScores usage_scores) {
+ DocumentId document_id, const UsageScores& usage_scores) {
if (!IsDocumentIdValid(document_id)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Document id %d is invalid.", document_id));
}
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores));
+ return usage_score_cache_->Set(document_id, usage_scores);
+}
+
+libtextclassifier3::Status UsageStore::CloneUsageScores(
+ DocumentId from_document_id, DocumentId to_document_id) {
+ if (!IsDocumentIdValid(from_document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "from_document_id %d is invalid.", from_document_id));
+ }
+
+ if (!IsDocumentIdValid(to_document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "to_document_id %d is invalid.", to_document_id));
+ }
- return libtextclassifier3::Status::OK;
+ auto usage_scores_or = usage_score_cache_->Get(from_document_id);
+ if (usage_scores_or.ok()) {
+ return usage_score_cache_->Set(to_document_id,
+ *std::move(usage_scores_or).ValueOrDie());
+ } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
+ // No usage scores found. Set default scores to to_document_id.
+ return usage_score_cache_->Set(to_document_id, UsageScores());
+ }
+
+ // Real error
+ return usage_scores_or.status();
}
libtextclassifier3::Status UsageStore::PersistToDisk() {
- ICING_RETURN_IF_ERROR(usage_score_cache_->PersistToDisk());
- return libtextclassifier3::Status::OK;
+ return usage_score_cache_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> UsageStore::ComputeChecksum() {
+ return usage_score_cache_->ComputeChecksum();
+}
+
+libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
+ if (num_documents >= usage_score_cache_->num_elements()) {
+ // No need to truncate
+ return libtextclassifier3::Status::OK;
+ }
+ // "+1" because document ids start from 0.
+ return usage_score_cache_->TruncateTo(num_documents);
}
libtextclassifier3::Status UsageStore::Reset() {
@@ -186,7 +244,7 @@ libtextclassifier3::Status UsageStore::Reset() {
}
usage_score_cache_ = std::move(usage_score_cache_or).ValueOrDie();
- return libtextclassifier3::Status::OK;
+ return PersistToDisk();
}
} // namespace lib
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index 9a8c286..0a622a0 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -104,7 +104,6 @@ class UsageStore {
// Returns:
// UsageScores on success
// INVALID_ARGUMENT if document_id is invalid
- // NOT_FOUND if no scores are found for the document
// INTERNAL_ERROR on I/O errors
//
// TODO(b/169433395): return a pointer instead of an object.
@@ -121,7 +120,19 @@ class UsageStore {
// INVALID_ARGUMENT if document_id is invalid
// INTERNAL_ERROR on I/O errors
libtextclassifier3::Status SetUsageScores(DocumentId document_id,
- UsageScores usage_scores);
+ const UsageScores& usage_scores);
+
+ // Clones the usage scores from one document to another.
+ //
+ // Returns:
+ // OK on success
+ // INVALID_ARGUMENT if any of the document ids is invalid
+ // INTERNAL_ERROR on I/O errors
+ //
+ // TODO(b/169433395): We can remove this method once GetUsageScores() returns
+ // a pointer.
+ libtextclassifier3::Status CloneUsageScores(DocumentId from_document_id,
+ DocumentId to_document_id);
// Syncs data to disk.
//
@@ -130,6 +141,21 @@ class UsageStore {
// INTERNAL on I/O error
libtextclassifier3::Status PersistToDisk();
+ // Updates checksum of the usage scores and returns it.
+ //
+ // Returns:
+ // A Crc32 on success
+ // INTERNAL_ERROR if the internal state is inconsistent
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ // Resizes the storage so that only the usage scores of and before
+ // last_document_id are stored.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if num_documents is negative
+ libtextclassifier3::Status TruncateTo(DocumentId num_documents);
+
// Deletes all usage data and re-initialize the storage.
//
// Returns:
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 39985f0..f7fa778 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -53,6 +53,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri,
return usage_report;
}
+UsageStore::UsageScores CreateUsageScores(uint32_t type1_timestamp,
+ uint32_t type2_timestamp,
+ uint32_t type3_timestamp,
+ int type1_count, int type2_count,
+ int type3_count) {
+ UsageStore::UsageScores scores;
+ scores.usage_type1_last_used_timestamp_s = type1_timestamp;
+ scores.usage_type2_last_used_timestamp_s = type2_timestamp;
+ scores.usage_type3_last_used_timestamp_s = type3_timestamp;
+ scores.usage_type1_count = type1_count;
+ scores.usage_type2_count = type2_count;
+ scores.usage_type3_count = type3_count;
+
+ return scores;
+}
+
TEST_F(UsageStoreTest, CreationShouldSucceed) {
EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_), IsOk());
}
@@ -138,11 +154,9 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
// Report a usage with timestamp 5.
usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 5;
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
@@ -174,10 +188,10 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
// Report a usage with type 1.
usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
+ ;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 1.
@@ -222,13 +236,9 @@ TEST_F(UsageStoreTest, SetAndGetUsageScores) {
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
// Verify that set and get results are consistent.
ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
@@ -246,15 +256,8 @@ TEST_F(UsageStoreTest, ImplicitlyInitializedScoresShouldBeZero) {
// Now the scores of document 1 have been implicitly initialized. The scores
// should all be 0.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
}
TEST_F(UsageStoreTest, DeleteUsageScores) {
@@ -262,27 +265,59 @@ TEST_F(UsageStoreTest, DeleteUsageScores) {
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ;
ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
// Delete the usage scores of document 1, all the scores of document 1 should
// be 0.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
ICING_EXPECT_OK(usage_store->DeleteUsageScores(/*document_id=*/1));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, CloneUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers and assign them to document 1.
+ UsageStore::UsageScores scores_a = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ;
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores_a));
+
+ // Create another set of usage scores with some random numbers and assign them
+ // to document 2.
+ UsageStore::UsageScores scores_b = CreateUsageScores(
+ /*type1_timestamp=*/111, /*type2_timestamp=*/666, /*type3_timestamp=*/333,
+ /*type1_count=*/50, /*type2_count=*/30, /*type3_count=*/100);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores_b));
+
+ // Clone scores from document 1 to document 3.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/1,
+ /*to_document_id=*/3),
+ IsOk());
+
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(scores_a));
+
+ // Clone scores from document 2 to document 3.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/2,
+ /*to_document_id=*/3),
+ IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(scores_b));
+
+ // Clone scores from document 4 to document 3, scores should be set to
+ // default.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/4,
+ /*to_document_id=*/3),
+ IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(UsageStore::UsageScores()));
}
TEST_F(UsageStoreTest, PersistToDisk) {
@@ -290,49 +325,125 @@ TEST_F(UsageStoreTest, PersistToDisk) {
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
EXPECT_THAT(usage_store->PersistToDisk(), IsOk());
}
-TEST_F(UsageStoreTest, Reset) {
+TEST_F(UsageStoreTest, ComputeChecksum) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
UsageStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum1, usage_store->ComputeChecksum());
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
-
- // Set scores for document 1 and document 2.
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
-
- EXPECT_THAT(usage_store->Reset(), IsOk());
-
- // After resetting, all the scores are cleared.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum2, usage_store->ComputeChecksum());
+
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum3, usage_store->ComputeChecksum());
+
+ EXPECT_THAT(checksum1, Not(Eq(checksum2)));
+ EXPECT_THAT(checksum1, Not(Eq(checksum3)));
+ EXPECT_THAT(checksum2, Not(Eq(checksum3)));
+
+ // Without changing the store, checksum should be the same.
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum4, usage_store->ComputeChecksum());
+ EXPECT_THAT(checksum3, Eq(checksum4));
+}
+
+TEST_F(UsageStoreTest, TruncateTo) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers and set scores for document 0,
+ // 1, 2.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+
+ // Truncate number of documents to 2, scores of document 2 should be gone.
+ EXPECT_THAT(usage_store->TruncateTo(/*num_documents=*/2), IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(scores));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToALargeNumberShouldDoNothing) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers and set scores for document
+ // 0, 1.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+
+ // Truncate to a number that is greater than the number of documents. Scores
+ // should be the same.
+ EXPECT_THAT(usage_store->TruncateTo(1000), IsOk());
+
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(scores));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToNegativeNumberShouldReturnError) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ EXPECT_THAT(usage_store->TruncateTo(-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(UsageStoreTest, Reset) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+
+ // Set scores for document 1 and document 2.
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+
+ EXPECT_THAT(usage_store->Reset(), IsOk());
+
+ // After resetting, all the scores are cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(UsageStore::UsageScores()));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+ }
+
+ // New instances should be created successfully after Reset().
+ EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_).status(), IsOk());
}
TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
@@ -346,12 +457,10 @@ TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
// The stored timestamp in seconds should be the max value of uint32.
usage_store->AddUsageReport(usage_report, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s =
- std::numeric_limits<uint32_t>::max();
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/std::numeric_limits<uint32_t>::max(),
+ /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
}
@@ -361,13 +470,10 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) {
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with the max value of int.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 0;
- scores.usage_type2_last_used_timestamp_s = 0;
- scores.usage_type3_last_used_timestamp_s = 0;
- scores.usage_type1_count = std::numeric_limits<int>::max();
- scores.usage_type2_count = 0;
- scores.usage_type3_count = 0;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/std::numeric_limits<int>::max(), /*type2_count=*/0,
+ /*type3_count=*/0);
ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -383,6 +489,77 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) {
IsOkAndHolds(scores));
}
+TEST_F(UsageStoreTest, StoreShouldBeResetOnVectorChecksumMismatch) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ }
+
+ // Modify the header to trigger a vector checksum mismatch.
+ const std::string score_cache_file_path =
+ absl_ports::StrCat(test_dir_, "/usage-scores");
+ FileBackedVector<UsageStore::UsageScores>::Header header{};
+ filesystem_.PRead(
+ score_cache_file_path.c_str(), /*buf=*/&header,
+ /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+ /*offset=*/0);
+ header.vector_checksum = 10; // Arbitrary garbage checksum
+ header.header_checksum = header.CalculateHeaderChecksum();
+ filesystem_.PWrite(
+ score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+ /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+ // Recover from checksum mismatch.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+ // Previous data should be cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, StoreShouldBeResetOnHeaderChecksumMismatch) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ }
+
+ // Modify the header to trigger a header checksum mismatch.
+ const std::string score_cache_file_path =
+ absl_ports::StrCat(test_dir_, "/usage-scores");
+ FileBackedVector<UsageStore::UsageScores>::Header header{};
+ filesystem_.PRead(
+ score_cache_file_path.c_str(), /*buf=*/&header,
+ /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+ /*offset=*/0);
+ header.header_checksum = 10; // Arbitrary garbage checksum
+ filesystem_.PWrite(
+ score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+ /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+ // Recover from checksum mismatch.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+ // Previous data should be cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
} // namespace
} // namespace lib