diff options
author | Terry Wang <tytytyww@google.com> | 2020-10-28 01:36:03 -0700 |
---|---|---|
committer | Terry Wang <tytytyww@google.com> | 2020-10-28 01:36:03 -0700 |
commit | 71b8eddc99c6337ff304a2f3cd0588c42239202f (patch) | |
tree | 7c54063a7f10e35a282cdfd4cafd369696672fbe /icing/store | |
parent | 5abfe5bcac00f4f188d3d8041fa97bf77206b577 (diff) | |
download | icing-71b8eddc99c6337ff304a2f3cd0588c42239202f.tar.gz |
Pull upstream changes.
Change-Id: I73ea5f80ccf16a02519f6f7ccfc993e9b0f39f86
Diffstat (limited to 'icing/store')
-rw-r--r-- | icing/store/document-store.cc | 87 | ||||
-rw-r--r-- | icing/store/document-store.h | 30 | ||||
-rw-r--r-- | icing/store/document-store_test.cc | 390 | ||||
-rw-r--r-- | icing/store/usage-store.cc | 84 | ||||
-rw-r--r-- | icing/store/usage-store.h | 30 | ||||
-rw-r--r-- | icing/store/usage-store_test.cc | 347 |
6 files changed, 851 insertions, 117 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 7577f6b..1e47d59 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -59,6 +59,7 @@ constexpr char kDocumentStoreHeaderFilename[] = "document_store_header"; constexpr char kScoreCacheFilename[] = "score_cache"; constexpr char kFilterCacheFilename[] = "filter_cache"; constexpr char kNamespaceMapperFilename[] = "namespace_mapper"; +constexpr char kUsageStoreDirectoryName[] = "usage_store"; constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB @@ -123,6 +124,10 @@ std::string MakeNamespaceMapperFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename); } +std::string MakeUsageStoreDirectoryName(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName); +} + // TODO(adorokhine): This class internally uses an 8-byte fingerprint of the // Key and stores the key/value in a file-backed-trie that adds an ~80 byte // overhead per key. As we know that these fingerprints are always 8-bytes in @@ -309,6 +314,14 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() { MakeNamespaceMapperFilename(base_dir_), kNamespaceMapperMaxSize)); + ICING_ASSIGN_OR_RETURN( + usage_store_, + UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_))); + + // Ensure the usage store is the correct size. + ICING_RETURN_IF_ERROR( + usage_store_->TruncateTo(document_id_mapper_->num_elements())); + ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); if (checksum.Get() != header.checksum) { return absl_ports::InternalError( @@ -325,6 +338,12 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() { ICING_RETURN_IF_ERROR(ResetFilterCache()); ICING_RETURN_IF_ERROR(ResetNamespaceMapper()); + // Creates a new UsageStore instance. Note that we don't reset the data in + // usage store here because we're not able to regenerate the usage scores. + ICING_ASSIGN_OR_RETURN( + usage_store_, + UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_))); + // Iterates through document log auto iterator = document_log_->GetIterator(); auto iterator_status = iterator.Advance(); @@ -478,6 +497,10 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() { "Failed to iterate through proto log."); } + // Shrink usage_store_ to the correct size. + ICING_RETURN_IF_ERROR( + usage_store_->TruncateTo(document_id_mapper_->num_elements())); + // Write the header ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); ICING_RETURN_IF_ERROR(UpdateHeader(checksum)); @@ -727,9 +750,19 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put( expiration_timestamp_ms))); if (old_document_id_or.ok()) { - // Mark the old document id as deleted. - ICING_RETURN_IF_ERROR(document_id_mapper_->Set( - old_document_id_or.ValueOrDie(), kDocDeletedFlag)); + DocumentId old_document_id = old_document_id_or.ValueOrDie(); + auto offset_or = DoesDocumentExistAndGetFileOffset(old_document_id); + + if (offset_or.ok()) { + // The old document exists, copy over the usage scores. + ICING_RETURN_IF_ERROR( + usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id, + /*to_document_id=*/new_document_id)); + + // Hard delete the old document. + ICING_RETURN_IF_ERROR( + HardDelete(old_document_id, offset_or.ValueOrDie())); + } } return new_document_id; @@ -887,8 +920,7 @@ libtextclassifier3::Status DocumentStore::Delete( if (soft_delete) { return SoftDelete(name_space, uri, document_id); } else { - uint64_t document_log_offset = file_offset_or.ValueOrDie(); - return HardDelete(document_id, document_log_offset); + return HardDelete(document_id, file_offset_or.ValueOrDie()); } } @@ -915,6 +947,7 @@ libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id, } } +// TODO(b/169969469): Consider removing SoftDelete(). libtextclassifier3::Status DocumentStore::SoftDelete( std::string_view name_space, std::string_view uri, DocumentId document_id) { // Update ground truth first. @@ -935,7 +968,7 @@ libtextclassifier3::Status DocumentStore::SoftDelete( } libtextclassifier3::Status DocumentStore::HardDelete( - DocumentId document_id, uint64_t document_log_offset) { + DocumentId document_id, int64_t document_log_offset) { // Erases document proto. ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset)); return ClearDerivedData(document_id); @@ -981,6 +1014,19 @@ DocumentStore::GetDocumentFilterData(DocumentId document_id) const { return document_filter_data; } +libtextclassifier3::StatusOr<UsageStore::UsageScores> +DocumentStore::GetUsageScores(DocumentId document_id) const { + return usage_store_->GetUsageScores(document_id); +} + +libtextclassifier3::Status DocumentStore::ReportUsage( + const UsageReport& usage_report) { + ICING_ASSIGN_OR_RETURN(DocumentId document_id, + GetDocumentId(usage_report.document_namespace(), + usage_report.document_uri())); + return usage_store_->AddUsageReport(usage_report, document_id); +} + libtextclassifier3::Status DocumentStore::DeleteByNamespace( std::string_view name_space, bool soft_delete) { auto namespace_id_or = namespace_mapper_->Get(name_space); @@ -1132,6 +1178,7 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() { ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk()); ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk()); ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk()); + ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk()); // Update the combined checksum and write to header file. ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); @@ -1334,15 +1381,21 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( // Guaranteed to have a document now. DocumentProto document_to_keep = document_or.ValueOrDie(); - // TODO(b/144458732): Implement a more robust version of - // ICING_RETURN_IF_ERROR that can support error logging. - libtextclassifier3::Status status = - new_doc_store->Put(std::move(document_to_keep)).status(); - if (!status.ok()) { - ICING_LOG(ERROR) << status.error_message() + // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN + // that can support error logging. + auto new_document_id_or = new_doc_store->Put(std::move(document_to_keep)); + if (!new_document_id_or.ok()) { + ICING_LOG(ERROR) << new_document_id_or.status().error_message() << "Failed to write into new document store"; - return status; + return new_document_id_or.status(); } + + // Copy over usage scores. + ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores, + usage_store_->GetUsageScores(document_id)); + DocumentId new_document_id = new_document_id_or.ValueOrDie(); + ICING_RETURN_IF_ERROR( + new_doc_store->SetUsageScores(new_document_id, usage_scores)); } ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk()); @@ -1430,7 +1483,13 @@ libtextclassifier3::Status DocumentStore::ClearDerivedData( document_id, DocumentFilterData(kInvalidNamespaceId, kInvalidSchemaTypeId, /*expiration_timestamp_ms=*/-1))); - return libtextclassifier3::Status::OK; + // Clears the usage scores. + return usage_store_->DeleteUsageScores(document_id); +} + +libtextclassifier3::Status DocumentStore::SetUsageScores( + DocumentId document_id, const UsageStore::UsageScores& usage_scores) { + return usage_store_->SetUsageScores(document_id, usage_scores); } } // namespace lib diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 2ac1c71..5c1b902 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -34,6 +34,7 @@ #include "icing/store/document-id.h" #include "icing/store/key-mapper.h" #include "icing/store/namespace-id.h" +#include "icing/store/usage-store.h" #include "icing/util/clock.h" #include "icing/util/crc32.h" #include "icing/util/document-validator.h" @@ -223,6 +224,24 @@ class DocumentStore { libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData( DocumentId document_id) const; + // Gets the usage scores of a document. + // + // Returns: + // UsageScores on success + // INVALID_ARGUMENT if document_id is invalid + // INTERNAL_ERROR on I/O errors + libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores( + DocumentId document_id) const; + + // Reports usage. The corresponding usage scores of the specified document in + // the report will be updated. + // + // Returns: + // OK on success + // NOT_FOUND if the [namesapce + uri] key in the report doesn't exist + // INTERNAL_ERROR on I/O errors. + libtextclassifier3::Status ReportUsage(const UsageReport& usage_report); + // Deletes all documents belonging to the given namespace. The documents will // be marked as deleted if 'soft_delete' is true, otherwise they will be // erased immediately. @@ -391,6 +410,11 @@ class DocumentStore { // DocumentStore. Namespaces may be removed from the mapper during compaction. std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_; + // A storage class that caches all usage scores. Usage scores are not + // considered as ground truth. Usage scores are associated with document ids + // so they need to be updated when document ids change. + std::unique_ptr<UsageStore> usage_store_; + // Used internally to indicate whether the class has been initialized. This is // to guard against cases where the object has been created, but Initialize // fails in the constructor. If we have successfully exited the constructor, @@ -497,7 +521,7 @@ class DocumentStore { // OK on success // INTERNAL_ERROR on IO error libtextclassifier3::Status HardDelete(DocumentId document_id, - uint64_t document_log_offset); + int64_t document_log_offset); // Helper method to find a DocumentId that is associated with the given // namespace and uri. @@ -539,6 +563,10 @@ class DocumentStore { // Helper method to clear the derived data of a document libtextclassifier3::Status ClearDerivedData(DocumentId document_id); + + // Sets usage scores for the given document. + libtextclassifier3::Status SetUsageScores( + DocumentId document_id, const UsageStore::UsageScores& usage_scores); }; } // namespace lib diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index f857481..301dbdd 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -42,6 +42,8 @@ namespace icing { namespace lib { +namespace { + using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::_; using ::testing::Eq; @@ -54,6 +56,17 @@ using ::testing::Not; using ::testing::Return; using ::testing::UnorderedElementsAre; +UsageReport CreateUsageReport(std::string name_space, std::string uri, + int64 timestamp_ms, + UsageReport::UsageType usage_type) { + UsageReport usage_report; + usage_report.set_document_namespace(name_space); + usage_report.set_document_uri(uri); + usage_report.set_usage_timestamp_ms(timestamp_ms); + usage_report.set_usage_type(usage_type); + return usage_report; +} + class DocumentStoreTest : public ::testing::Test { protected: DocumentStoreTest() @@ -1297,7 +1310,7 @@ TEST_F(DocumentStoreTest, GetDiskUsage) { // Bad file system MockFilesystem mock_filesystem; - ON_CALL(mock_filesystem, GetDiskUsage(A<const char *>())) + ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>())) .WillByDefault(Return(Filesystem::kBadFileSize)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem, @@ -1465,6 +1478,63 @@ TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } +TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> doc_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + doc_store->Put(test_document1_)); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1)); + + UsageStore::UsageScores expected_scores; + expected_scores.usage_type1_count = 1; + ASSERT_THAT(doc_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Soft delete the document. + ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true)); + + // The scores should be the same. + ASSERT_THAT(doc_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> doc_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + doc_store->Put(test_document1_)); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1)); + + UsageStore::UsageScores expected_scores; + expected_scores.usage_type1_count = 1; + ASSERT_THAT(doc_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Hard delete the document. + ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false)); + + // The scores should be cleared. + expected_scores.usage_type1_count = 0; + ASSERT_THAT(doc_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + TEST_F(DocumentStoreTest, ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) { DocumentProto document = DocumentBuilder() @@ -1572,7 +1642,7 @@ TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) { // With default doc score 0 .Build(); DocumentProto document2 = DocumentBuilder() - .SetKey("icing", "email/1") + .SetKey("icing", "email/2") .SetSchema("email") .AddStringProperty("subject", "subject foo") .SetScore(5) @@ -2346,5 +2416,321 @@ TEST_F(DocumentStoreTest, GetAllNamespaces) { UnorderedElementsAre("namespace1")); } +TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store->Put(test_document1_)); + + // Report usage with type 1 and time 1. + UsageReport usage_report_type1_time1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time1)); + + UsageStore::UsageScores expected_scores; + expected_scores.usage_type1_last_used_timestamp_s = 1; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 1 and time 5, time should be updated. + UsageReport usage_report_type1_time5 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time5)); + + expected_scores.usage_type1_last_used_timestamp_s = 5; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 2 and time 1. + UsageReport usage_report_type2_time1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000, + UsageReport::USAGE_TYPE2); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time1)); + + expected_scores.usage_type2_last_used_timestamp_s = 1; + ++expected_scores.usage_type2_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 2 and time 5. + UsageReport usage_report_type2_time5 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000, + UsageReport::USAGE_TYPE2); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time5)); + + expected_scores.usage_type2_last_used_timestamp_s = 5; + ++expected_scores.usage_type2_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 3 and time 1. + UsageReport usage_report_type3_time1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000, + UsageReport::USAGE_TYPE3); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time1)); + + expected_scores.usage_type3_last_used_timestamp_s = 1; + ++expected_scores.usage_type3_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 3 and time 5. + UsageReport usage_report_type3_time5 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000, + UsageReport::USAGE_TYPE3); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time5)); + + expected_scores.usage_type3_last_used_timestamp_s = 5; + ++expected_scores.usage_type3_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store->Put(test_document1_)); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1)); + + UsageStore::UsageScores expected_scores; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 2. + UsageReport usage_report_type2 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE2); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2)); + + ++expected_scores.usage_type2_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Report usage with type 3. + UsageReport usage_report_type3 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE3); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3)); + + ++expected_scores.usage_type3_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) { + UsageStore::UsageScores expected_scores; + DocumentId document_id; + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN(document_id, + document_store->Put(test_document1_)); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1)); + + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + } + + // Change the DocStore's header combined checksum so that it won't match the + // recalculated checksum on initialization. This will force a regeneration of + // derived files from ground truth. + const std::string header_file = + absl_ports::StrCat(document_store_dir_, "/document_store_header"); + DocumentStore::Header header; + header.magic = DocumentStore::Header::kMagic; + header.checksum = 10; // Arbitrary garbage checksum + filesystem_.DeleteFile(header_file.c_str()); + filesystem_.Write(header_file.c_str(), &header, sizeof(header)); + + // Successfully recover from a corrupt derived file issue. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + + // Usage scores should be the same. + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) { + UsageStore::UsageScores expected_scores; + DocumentId document_id; + { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN( + document_id, document_store->Put(DocumentProto(test_document1_))); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1)); + + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + } + + // "Corrupt" the content written in the log by adding non-checksummed data to + // it. This will mess up the checksum of the proto log, forcing it to rewind + // to the last saved point. + DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); + const std::string serialized_document = document.SerializeAsString(); + + const std::string document_log_file = + absl_ports::StrCat(document_store_dir_, "/document_log"); + int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str()); + filesystem_.PWrite(document_log_file.c_str(), file_size, + serialized_document.data(), serialized_document.size()); + + // Successfully recover from a data loss issue. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + + // Usage scores should still be available. + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store->Put(DocumentProto(test_document1_))); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1)); + + UsageStore::UsageScores expected_scores; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Update the document. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId updated_document_id, + document_store->Put(DocumentProto(test_document1_))); + // We should get a different document id. + ASSERT_THAT(updated_document_id, Not(Eq(document_id))); + + // Usage scores should be the same. + EXPECT_THAT(document_store->GetUsageScores(updated_document_id), + IsOkAndHolds(expected_scores)); +} + +TEST_F(DocumentStoreTest, + UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store->Put(DocumentProto(test_document1_))); + + // Report usage with type 1. + UsageReport usage_report_type1 = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1)); + + UsageStore::UsageScores expected_scores; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id), + IsOkAndHolds(expected_scores)); + + // Soft delete the doc. + ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true)); + + // Put the same document. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId updated_document_id, + document_store->Put(DocumentProto(test_document1_))); + // We should get a different document id. + ASSERT_THAT(updated_document_id, Not(Eq(document_id))); + + // Usage scores should be cleared. + EXPECT_THAT(document_store->GetUsageScores(updated_document_id), + IsOkAndHolds(UsageStore::UsageScores())); +} + +TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> document_store, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store->Put(DocumentProto(test_document1_))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store->Put(DocumentProto(test_document2_))); + ICING_ASSERT_OK(document_store->Delete(document_id1)); + + // Report usage of document 2. + UsageReport usage_report = CreateUsageReport( + /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0, + UsageReport::USAGE_TYPE1); + ICING_ASSERT_OK(document_store->ReportUsage(usage_report)); + + UsageStore::UsageScores expected_scores; + ++expected_scores.usage_type1_count; + ASSERT_THAT(document_store->GetUsageScores(document_id2), + IsOkAndHolds(expected_scores)); + + // Run optimize + std::string optimized_dir = document_store_dir_ + "/optimize_test"; + filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()); + ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir)); + + // Get optimized document store + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocumentStore> optimized_document_store, + DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_, + schema_store_.get())); + + // Usage scores should be the same. + // The original document_id2 should have become document_id2 - 1. + ASSERT_THAT(optimized_document_store->GetUsageScores(document_id2 - 1), + IsOkAndHolds(expected_scores)); +} + +} // namespace + } // namespace lib } // namespace icing diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc index 911c45a..7a0af9c 100644 --- a/icing/store/usage-store.cc +++ b/icing/store/usage-store.cc @@ -31,10 +31,32 @@ libtextclassifier3::StatusOr<std::unique_ptr<UsageStore>> UsageStore::Create( const Filesystem* filesystem, const std::string& base_dir) { ICING_RETURN_ERROR_IF_NULL(filesystem); + if (!filesystem->CreateDirectoryRecursively(base_dir.c_str())) { + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to create UsageStore directory: ", base_dir)); + } + + const std::string score_cache_filename = + MakeUsageScoreCacheFilename(base_dir); + auto usage_score_cache_or = FileBackedVector<UsageScores>::Create( - *filesystem, MakeUsageScoreCacheFilename(base_dir), + *filesystem, score_cache_filename, MemoryMappedFile::READ_WRITE_AUTO_SYNC); + if (absl_ports::IsFailedPrecondition(usage_score_cache_or.status())) { + // File checksum doesn't match the stored checksum. Delete and recreate the + // file. + ICING_RETURN_IF_ERROR( + FileBackedVector<int64_t>::Delete(*filesystem, score_cache_filename)); + + ICING_VLOG(1) << "The score cache file in UsageStore is corrupted, all " + "scores have been reset."; + + usage_score_cache_or = FileBackedVector<UsageScores>::Create( + *filesystem, score_cache_filename, + MemoryMappedFile::READ_WRITE_AUTO_SYNC); + } + if (!usage_score_cache_or.ok()) { ICING_LOG(ERROR) << usage_score_cache_or.status().error_message() << "Failed to initialize usage_score_cache"; @@ -111,9 +133,7 @@ libtextclassifier3::Status UsageStore::AddUsageReport(const UsageReport& report, } // Write updated usage scores to file. - ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores)); - - return libtextclassifier3::Status::OK; + return usage_score_cache_->Set(document_id, usage_scores); } libtextclassifier3::Status UsageStore::DeleteUsageScores( @@ -123,10 +143,13 @@ libtextclassifier3::Status UsageStore::DeleteUsageScores( "Document id %d is invalid.", document_id)); } - // Clear all the scores of the document. - ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, UsageScores())); + if (document_id >= usage_score_cache_->num_elements()) { + // Nothing to delete. + return libtextclassifier3::Status::OK; + } - return libtextclassifier3::Status::OK; + // Clear all the scores of the document. + return usage_score_cache_->Set(document_id, UsageScores()); } libtextclassifier3::StatusOr<UsageStore::UsageScores> @@ -149,20 +172,55 @@ UsageStore::GetUsageScores(DocumentId document_id) { } libtextclassifier3::Status UsageStore::SetUsageScores( - DocumentId document_id, UsageScores usage_scores) { + DocumentId document_id, const UsageScores& usage_scores) { if (!IsDocumentIdValid(document_id)) { return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( "Document id %d is invalid.", document_id)); } - ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores)); + return usage_score_cache_->Set(document_id, usage_scores); +} + +libtextclassifier3::Status UsageStore::CloneUsageScores( + DocumentId from_document_id, DocumentId to_document_id) { + if (!IsDocumentIdValid(from_document_id)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "from_document_id %d is invalid.", from_document_id)); + } + + if (!IsDocumentIdValid(to_document_id)) { + return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( + "to_document_id %d is invalid.", to_document_id)); + } - return libtextclassifier3::Status::OK; + auto usage_scores_or = usage_score_cache_->Get(from_document_id); + if (usage_scores_or.ok()) { + return usage_score_cache_->Set(to_document_id, + *std::move(usage_scores_or).ValueOrDie()); + } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) { + // No usage scores found. Set default scores to to_document_id. + return usage_score_cache_->Set(to_document_id, UsageScores()); + } + + // Real error + return usage_scores_or.status(); } libtextclassifier3::Status UsageStore::PersistToDisk() { - ICING_RETURN_IF_ERROR(usage_score_cache_->PersistToDisk()); - return libtextclassifier3::Status::OK; + return usage_score_cache_->PersistToDisk(); +} + +libtextclassifier3::StatusOr<Crc32> UsageStore::ComputeChecksum() { + return usage_score_cache_->ComputeChecksum(); +} + +libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) { + if (num_documents >= usage_score_cache_->num_elements()) { + // No need to truncate + return libtextclassifier3::Status::OK; + } + // "+1" because document ids start from 0. + return usage_score_cache_->TruncateTo(num_documents); } libtextclassifier3::Status UsageStore::Reset() { @@ -186,7 +244,7 @@ libtextclassifier3::Status UsageStore::Reset() { } usage_score_cache_ = std::move(usage_score_cache_or).ValueOrDie(); - return libtextclassifier3::Status::OK; + return PersistToDisk(); } } // namespace lib diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h index 9a8c286..0a622a0 100644 --- a/icing/store/usage-store.h +++ b/icing/store/usage-store.h @@ -104,7 +104,6 @@ class UsageStore { // Returns: // UsageScores on success // INVALID_ARGUMENT if document_id is invalid - // NOT_FOUND if no scores are found for the document // INTERNAL_ERROR on I/O errors // // TODO(b/169433395): return a pointer instead of an object. @@ -121,7 +120,19 @@ class UsageStore { // INVALID_ARGUMENT if document_id is invalid // INTERNAL_ERROR on I/O errors libtextclassifier3::Status SetUsageScores(DocumentId document_id, - UsageScores usage_scores); + const UsageScores& usage_scores); + + // Clones the usage scores from one document to another. + // + // Returns: + // OK on success + // INVALID_ARGUMENT if any of the document ids is invalid + // INTERNAL_ERROR on I/O errors + // + // TODO(b/169433395): We can remove this method once GetUsageScores() returns + // a pointer. + libtextclassifier3::Status CloneUsageScores(DocumentId from_document_id, + DocumentId to_document_id); // Syncs data to disk. // @@ -130,6 +141,21 @@ class UsageStore { // INTERNAL on I/O error libtextclassifier3::Status PersistToDisk(); + // Updates checksum of the usage scores and returns it. + // + // Returns: + // A Crc32 on success + // INTERNAL_ERROR if the internal state is inconsistent + libtextclassifier3::StatusOr<Crc32> ComputeChecksum(); + + // Resizes the storage so that only the usage scores of and before + // last_document_id are stored. + // + // Returns: + // OK on success + // OUT_OF_RANGE_ERROR if num_documents is negative + libtextclassifier3::Status TruncateTo(DocumentId num_documents); + // Deletes all usage data and re-initialize the storage. // // Returns: diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc index 39985f0..f7fa778 100644 --- a/icing/store/usage-store_test.cc +++ b/icing/store/usage-store_test.cc @@ -53,6 +53,22 @@ UsageReport CreateUsageReport(std::string name_space, std::string uri, return usage_report; } +UsageStore::UsageScores CreateUsageScores(uint32_t type1_timestamp, + uint32_t type2_timestamp, + uint32_t type3_timestamp, + int type1_count, int type2_count, + int type3_count) { + UsageStore::UsageScores scores; + scores.usage_type1_last_used_timestamp_s = type1_timestamp; + scores.usage_type2_last_used_timestamp_s = type2_timestamp; + scores.usage_type3_last_used_timestamp_s = type3_timestamp; + scores.usage_type1_count = type1_count; + scores.usage_type2_count = type2_count; + scores.usage_type3_count = type3_count; + + return scores; +} + TEST_F(UsageStoreTest, CreationShouldSucceed) { EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_), IsOk()); } @@ -138,11 +154,9 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) { // Report a usage with timestamp 5. usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1); - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_last_used_timestamp_s = 5; - expected_scores.usage_type1_count = 1; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; + UsageStore::UsageScores expected_scores = CreateUsageScores( + /*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0, + /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0); EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); @@ -174,10 +188,10 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) { // Report a usage with type 1. usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1); - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_count = 1; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; + UsageStore::UsageScores expected_scores = CreateUsageScores( + /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0, + /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0); + ; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report another usage with type 1. @@ -222,13 +236,9 @@ TEST_F(UsageStoreTest, SetAndGetUsageScores) { UsageStore::Create(&filesystem_, test_dir_)); // Create usage scores with some random numbers. - UsageStore::UsageScores scores; - scores.usage_type1_last_used_timestamp_s = 7; - scores.usage_type2_last_used_timestamp_s = 9; - scores.usage_type3_last_used_timestamp_s = 11; - scores.usage_type1_count = 3; - scores.usage_type2_count = 4; - scores.usage_type3_count = 9; + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); // Verify that set and get results are consistent. ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); @@ -246,15 +256,8 @@ TEST_F(UsageStoreTest, ImplicitlyInitializedScoresShouldBeZero) { // Now the scores of document 1 have been implicitly initialized. The scores // should all be 0. - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_last_used_timestamp_s = 0; - expected_scores.usage_type2_last_used_timestamp_s = 0; - expected_scores.usage_type3_last_used_timestamp_s = 0; - expected_scores.usage_type1_count = 0; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), - IsOkAndHolds(expected_scores)); + IsOkAndHolds(UsageStore::UsageScores())); } TEST_F(UsageStoreTest, DeleteUsageScores) { @@ -262,27 +265,59 @@ TEST_F(UsageStoreTest, DeleteUsageScores) { UsageStore::Create(&filesystem_, test_dir_)); // Create usage scores with some random numbers. - UsageStore::UsageScores scores; - scores.usage_type1_last_used_timestamp_s = 7; - scores.usage_type2_last_used_timestamp_s = 9; - scores.usage_type3_last_used_timestamp_s = 11; - scores.usage_type1_count = 3; - scores.usage_type2_count = 4; - scores.usage_type3_count = 9; + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ; ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); // Delete the usage scores of document 1, all the scores of document 1 should // be 0. - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_last_used_timestamp_s = 0; - expected_scores.usage_type2_last_used_timestamp_s = 0; - expected_scores.usage_type3_last_used_timestamp_s = 0; - expected_scores.usage_type1_count = 0; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; ICING_EXPECT_OK(usage_store->DeleteUsageScores(/*document_id=*/1)); EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), - IsOkAndHolds(expected_scores)); + IsOkAndHolds(UsageStore::UsageScores())); +} + +TEST_F(UsageStoreTest, CloneUsageScores) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers and assign them to document 1. + UsageStore::UsageScores scores_a = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ; + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores_a)); + + // Create another set of usage scores with some random numbers and assign them + // to document 2. + UsageStore::UsageScores scores_b = CreateUsageScores( + /*type1_timestamp=*/111, /*type2_timestamp=*/666, /*type3_timestamp=*/333, + /*type1_count=*/50, /*type2_count=*/30, /*type3_count=*/100); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores_b)); + + // Clone scores from document 1 to document 3. + EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/1, + /*to_document_id=*/3), + IsOk()); + + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3), + IsOkAndHolds(scores_a)); + + // Clone scores from document 2 to document 3. + EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/2, + /*to_document_id=*/3), + IsOk()); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3), + IsOkAndHolds(scores_b)); + + // Clone scores from document 4 to document 3, scores should be set to + // default. + EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/4, + /*to_document_id=*/3), + IsOk()); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3), + IsOkAndHolds(UsageStore::UsageScores())); } TEST_F(UsageStoreTest, PersistToDisk) { @@ -290,49 +325,125 @@ TEST_F(UsageStoreTest, PersistToDisk) { UsageStore::Create(&filesystem_, test_dir_)); // Create usage scores with some random numbers. - UsageStore::UsageScores scores; - scores.usage_type1_last_used_timestamp_s = 7; - scores.usage_type2_last_used_timestamp_s = 9; - scores.usage_type3_last_used_timestamp_s = 11; - scores.usage_type1_count = 3; - scores.usage_type2_count = 4; - scores.usage_type3_count = 9; - ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); EXPECT_THAT(usage_store->PersistToDisk(), IsOk()); } -TEST_F(UsageStoreTest, Reset) { +TEST_F(UsageStoreTest, ComputeChecksum) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, UsageStore::Create(&filesystem_, test_dir_)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum1, usage_store->ComputeChecksum()); // Create usage scores with some random numbers. - UsageStore::UsageScores scores; - scores.usage_type1_last_used_timestamp_s = 7; - scores.usage_type2_last_used_timestamp_s = 9; - scores.usage_type3_last_used_timestamp_s = 11; - scores.usage_type1_count = 3; - scores.usage_type2_count = 4; - scores.usage_type3_count = 9; - - // Set scores for document 1 and document 2. - ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); - ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores)); - - EXPECT_THAT(usage_store->Reset(), IsOk()); - - // After resetting, all the scores are cleared. - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_last_used_timestamp_s = 0; - expected_scores.usage_type2_last_used_timestamp_s = 0; - expected_scores.usage_type3_last_used_timestamp_s = 0; - expected_scores.usage_type1_count = 0; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum2, usage_store->ComputeChecksum()); + + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum3, usage_store->ComputeChecksum()); + + EXPECT_THAT(checksum1, Not(Eq(checksum2))); + EXPECT_THAT(checksum1, Not(Eq(checksum3))); + EXPECT_THAT(checksum2, Not(Eq(checksum3))); + + // Without changing the store, checksum should be the same. + ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum4, usage_store->ComputeChecksum()); + EXPECT_THAT(checksum3, Eq(checksum4)); +} + +TEST_F(UsageStoreTest, TruncateTo) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers and set scores for document 0, + // 1, 2. + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores)); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores)); + + // Truncate number of documents to 2, scores of document 2 should be gone. + EXPECT_THAT(usage_store->TruncateTo(/*num_documents=*/2), IsOk()); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(scores)); EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), - IsOkAndHolds(expected_scores)); + IsOkAndHolds(scores)); EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2), - IsOkAndHolds(expected_scores)); + IsOkAndHolds(UsageStore::UsageScores())); +} + +TEST_F(UsageStoreTest, TruncateToALargeNumberShouldDoNothing) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers and set scores for document + // 0, 1. + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores)); + ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); + + ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(scores)); + ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1), + IsOkAndHolds(scores)); + ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/2), + IsOkAndHolds(UsageStore::UsageScores())); + + // Truncate to a number that is greater than the number of documents. Scores + // should be the same. + EXPECT_THAT(usage_store->TruncateTo(1000), IsOk()); + + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(scores)); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), + IsOkAndHolds(scores)); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2), + IsOkAndHolds(UsageStore::UsageScores())); +} + +TEST_F(UsageStoreTest, TruncateToNegativeNumberShouldReturnError) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + EXPECT_THAT(usage_store->TruncateTo(-1), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +TEST_F(UsageStoreTest, Reset) { + { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers. + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + + // Set scores for document 1 and document 2. + ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); + ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores)); + + EXPECT_THAT(usage_store->Reset(), IsOk()); + + // After resetting, all the scores are cleared. + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), + IsOkAndHolds(UsageStore::UsageScores())); + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2), + IsOkAndHolds(UsageStore::UsageScores())); + } + + // New instances should be created successfully after Reset(). + EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_).status(), IsOk()); } TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) { @@ -346,12 +457,10 @@ TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) { // The stored timestamp in seconds should be the max value of uint32. usage_store->AddUsageReport(usage_report, /*document_id=*/1); - UsageStore::UsageScores expected_scores; - expected_scores.usage_type1_last_used_timestamp_s = - std::numeric_limits<uint32_t>::max(); - expected_scores.usage_type1_count = 1; - expected_scores.usage_type2_count = 0; - expected_scores.usage_type3_count = 0; + UsageStore::UsageScores expected_scores = CreateUsageScores( + /*type1_timestamp=*/std::numeric_limits<uint32_t>::max(), + /*type2_timestamp=*/0, /*type3_timestamp=*/0, + /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0); EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); } @@ -361,13 +470,10 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) { UsageStore::Create(&filesystem_, test_dir_)); // Create usage scores with the max value of int. - UsageStore::UsageScores scores; - scores.usage_type1_last_used_timestamp_s = 0; - scores.usage_type2_last_used_timestamp_s = 0; - scores.usage_type3_last_used_timestamp_s = 0; - scores.usage_type1_count = std::numeric_limits<int>::max(); - scores.usage_type2_count = 0; - scores.usage_type3_count = 0; + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0, + /*type1_count=*/std::numeric_limits<int>::max(), /*type2_count=*/0, + /*type3_count=*/0); ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores)); ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1), @@ -383,6 +489,77 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) { IsOkAndHolds(scores)); } +TEST_F(UsageStoreTest, StoreShouldBeResetOnVectorChecksumMismatch) { + { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers. + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores)); + ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(scores)); + } + + // Modify the header to trigger a vector checksum mismatch. + const std::string score_cache_file_path = + absl_ports::StrCat(test_dir_, "/usage-scores"); + FileBackedVector<UsageStore::UsageScores>::Header header{}; + filesystem_.PRead( + score_cache_file_path.c_str(), /*buf=*/&header, + /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header), + /*offset=*/0); + header.vector_checksum = 10; // Arbitrary garbage checksum + header.header_checksum = header.CalculateHeaderChecksum(); + filesystem_.PWrite( + score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header, + /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header)); + + // Recover from checksum mismatch. + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + // Previous data should be cleared. + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(UsageStore::UsageScores())); +} + +TEST_F(UsageStoreTest, StoreShouldBeResetOnHeaderChecksumMismatch) { + { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // Create usage scores with some random numbers. + UsageStore::UsageScores scores = CreateUsageScores( + /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1, + /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9); + ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores)); + ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(scores)); + } + + // Modify the header to trigger a header checksum mismatch. + const std::string score_cache_file_path = + absl_ports::StrCat(test_dir_, "/usage-scores"); + FileBackedVector<UsageStore::UsageScores>::Header header{}; + filesystem_.PRead( + score_cache_file_path.c_str(), /*buf=*/&header, + /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header), + /*offset=*/0); + header.header_checksum = 10; // Arbitrary garbage checksum + filesystem_.PWrite( + score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header, + /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header)); + + // Recover from checksum mismatch. + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + // Previous data should be cleared. + EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0), + IsOkAndHolds(UsageStore::UsageScores())); +} + } // namespace } // namespace lib |