diff options
author | Cassie Wang <cassiewang@google.com> | 2021-02-26 08:04:01 -0800 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2021-03-02 15:29:44 -0800 |
commit | 85fd8c8521e338d2bab69f5482e3cc2cf312fd4e (patch) | |
tree | 929e118124b203997be393e4a1c5f5ee6da2de40 /icing/store | |
parent | a34db390d80f862bfaaa49dea3605c5fec3bca3d (diff) | |
download | icing-85fd8c8521e338d2bab69f5482e3cc2cf312fd4e.tar.gz |
Sync from upstream.
Descriptions:
==========
Add last optimized time to GetOptimizeInfo.
==========
Update the implementation of snippeting to return property paths with value indices and remove the values_index field.
==========
Create builders for SchemaProto, SchemaTypeConfigProto and PropertyConfigProto.
==========
Rename some protos with the rules:
- Remove "Native" prefix
- Add "Proto" suffix for consistency with other protos
==========
Upgrade your minimum iOS version to 11.4.
==========
Fix PersistToDisk definitions to ensure that they properly call datasync. This change is meant to address the first part of ptd doc - that certain functions that claim to persist data don't actually explicitly flush.
==========
Change function call from has_field() to field()
==========
Add IcingStorageInfo.
==========
Add IndexStorageStats.
==========
Add SchemaStoreStorageStats.
==========
Add DocumentStoreStorageStats.
==========
Implement OptimizeStats.
==========
Remove the max number of results per query limit (1000) and replace it with a more flexible way to limit memory use by the result-state-manager.
==========
Add a test case to ensure we don't add UsageStore's checksum in DocumentStore's ComputeChecksum.
==========
Account for UsageStore in GetDiskUsage.
==========
Ensure that SchemaStore properly handles function calls when the schema isn't set.
==========
Remove jlpl_strict_deps feature from package declarations.
==========
Qualifies std::string in 3p directories
==========
Section restricts should influence the relevance score.
==========
Apply fixes upstream that were necessary to sync changes downstream. Also added a METADATA check to prevent any accidental adds of foo.proto.h includes.
==========
Remove the 'com.google.protobuf' to 'com.google.android.icing.protobuf' translation in the export_to_aosp script.
==========
Include usage store size in GetOptimizeInfo. This helps clients get a better idea of what savings they could get back if they called Optimize.
Change-Id: Ia2339c7987267a73c49dadf1ced4a0a8ef001d4c
Diffstat (limited to 'icing/store')
-rw-r--r-- | icing/store/document-store.cc | 134 | ||||
-rw-r--r-- | icing/store/document-store.h | 49 | ||||
-rw-r--r-- | icing/store/document-store_test.cc | 148 | ||||
-rw-r--r-- | icing/store/usage-store.cc | 4 | ||||
-rw-r--r-- | icing/store/usage-store.h | 8 | ||||
-rw-r--r-- | icing/store/usage-store_test.cc | 35 |
6 files changed, 284 insertions, 94 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 72bf736..59944fe 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -189,6 +189,17 @@ int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms, return expiration_timestamp_ms; } +void IncrementDeletedOrExpired(FileBackedVector<int64_t>* document_id_mapper, + DocumentId document_id, int* num_deleted_out, + int* num_expired_out) { + auto location_or = document_id_mapper->Get(document_id); + if (location_or.ok() && *location_or.ValueOrDie() == kDocDeletedFlag) { + ++(*num_deleted_out); + } else { + ++(*num_expired_out); + } +} + } // namespace DocumentStore::DocumentStore(const Filesystem* filesystem, @@ -203,13 +214,13 @@ DocumentStore::DocumentStore(const Filesystem* filesystem, libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put( const DocumentProto& document, int32_t num_tokens, - NativePutDocumentStats* put_document_stats) { + PutDocumentStatsProto* put_document_stats) { return Put(DocumentProto(document), num_tokens, put_document_stats); } libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put( DocumentProto&& document, int32_t num_tokens, - NativePutDocumentStats* put_document_stats) { + PutDocumentStatsProto* put_document_stats) { document.mutable_internal_fields()->set_length_in_tokens(num_tokens); return InternalPut(document, put_document_stats); } @@ -226,7 +237,7 @@ DocumentStore::~DocumentStore() { libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, const SchemaStore* schema_store, - NativeInitializeStats* initialize_stats) { + InitializeStatsProto* initialize_stats) { ICING_RETURN_ERROR_IF_NULL(filesystem); ICING_RETURN_ERROR_IF_NULL(clock); ICING_RETURN_ERROR_IF_NULL(schema_store); @@ -243,7 +254,7 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( } libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( - NativeInitializeStats* initialize_stats) { + InitializeStatsProto* initialize_stats) { auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create( filesystem_, MakeDocumentLogFilename(base_dir_), FileBackedProtoLog<DocumentWrapper>::Options( @@ -264,16 +275,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( << "Data loss in document log, regenerating derived files."; if (initialize_stats != nullptr) { initialize_stats->set_document_store_recovery_cause( - NativeInitializeStats::DATA_LOSS); + InitializeStatsProto::DATA_LOSS); if (create_result.data_loss == DataLoss::PARTIAL) { // Ground truth is partially lost. initialize_stats->set_document_store_data_status( - NativeInitializeStats::PARTIAL_LOSS); + InitializeStatsProto::PARTIAL_LOSS); } else { // Ground truth is completely lost. initialize_stats->set_document_store_data_status( - NativeInitializeStats::COMPLETE_LOSS); + InitializeStatsProto::COMPLETE_LOSS); } } std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer(); @@ -294,7 +305,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( "regenerating derived files for DocumentStore."; if (initialize_stats != nullptr) { initialize_stats->set_document_store_recovery_cause( - NativeInitializeStats::IO_ERROR); + InitializeStatsProto::IO_ERROR); } std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer(); libtextclassifier3::Status status = RegenerateDerivedFiles(); @@ -788,6 +799,11 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const { } Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie(); + // NOTE: We purposely don't include usage_store checksum here because we can't + // regenerate it from ground truth documents. If it gets corrupted, we'll just + // clear all usage reports, but we shouldn't throw everything else in the + // document store out. + total_checksum.Append(std::to_string(document_log_checksum.Get())); total_checksum.Append(std::to_string(document_key_mapper_checksum.Get())); total_checksum.Append(std::to_string(document_id_mapper_checksum.Get())); @@ -819,8 +835,11 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) { header.checksum = checksum.Get(); // This should overwrite the header. - if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header, - sizeof(header))) { + ScopedFd sfd( + filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str())); + if (!sfd.is_valid() || + !filesystem_->Write(sfd.get(), &header, sizeof(header)) || + !filesystem_->DataSync(sfd.get())) { return absl_ports::InternalError(absl_ports::StrCat( "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_))); } @@ -828,7 +847,7 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) { } libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut( - DocumentProto& document, NativePutDocumentStats* put_document_stats) { + DocumentProto& document, PutDocumentStatsProto* put_document_stats) { std::unique_ptr<Timer> put_timer = clock_.GetNewTimer(); ICING_RETURN_IF_ERROR(document_validator_.Validate(document)); @@ -1404,30 +1423,62 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() { return libtextclassifier3::Status::OK; } -libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const { - ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage, - document_log_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage, - document_key_mapper_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage, - document_id_mapper_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage, - score_cache_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage, - filter_cache_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage, - namespace_mapper_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage, - corpus_mapper_->GetDiskUsage()); - ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage, - corpus_score_cache_->GetDiskUsage()); - - int64_t disk_usage = document_log_disk_usage + - document_key_mapper_disk_usage + - document_id_mapper_disk_usage + score_cache_disk_usage + - filter_cache_disk_usage + namespace_mapper_disk_usage + - corpus_mapper_disk_usage + corpus_score_cache_disk_usage; - return disk_usage; +int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or, + int64_t default_value) { + return (value_or.ok()) ? value_or.ValueOrDie() : default_value; +} + +DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const { + DocumentStorageInfoProto storage_info; + storage_info.set_document_log_size( + GetValueOrDefault(document_log_->GetDiskUsage(), -1)); + storage_info.set_key_mapper_size( + GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1)); + storage_info.set_document_id_mapper_size( + GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1)); + storage_info.set_score_cache_size( + GetValueOrDefault(score_cache_->GetDiskUsage(), -1)); + storage_info.set_filter_cache_size( + GetValueOrDefault(filter_cache_->GetDiskUsage(), -1)); + storage_info.set_namespace_id_mapper_size( + GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1)); + storage_info.set_corpus_mapper_size( + GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1)); + storage_info.set_corpus_score_cache_size( + GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1)); + return storage_info; +} + +DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts( + DocumentStorageInfoProto storage_info) const { + int num_alive = 0; + int num_expired = 0; + int num_deleted = 0; + for (DocumentId document_id = 0; + document_id < document_id_mapper_->num_elements(); ++document_id) { + if (DoesDocumentExist(document_id)) { + ++num_alive; + } else { + IncrementDeletedOrExpired(document_id_mapper_.get(), document_id, + &num_deleted, &num_expired); + } + } + storage_info.set_num_alive_documents(num_alive); + storage_info.set_num_deleted_documents(num_deleted); + storage_info.set_num_expired_documents(num_expired); + return storage_info; +} + +DocumentStorageInfoProto DocumentStore::GetStorageInfo() const { + DocumentStorageInfoProto storage_info = GetMemberStorageInfo(); + int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str()); + if (directory_size != Filesystem::kBadFileSize) { + storage_info.set_document_store_size(directory_size); + } else { + storage_info.set_document_store_size(-1); + } + storage_info.set_num_namespaces(namespace_mapper_->num_keys()); + return CalculateDocumentStatusCounts(std::move(storage_info)); } libtextclassifier3::Status DocumentStore::UpdateSchemaStore( @@ -1577,7 +1628,8 @@ libtextclassifier3::Status DocumentStore::Optimize() { } libtextclassifier3::Status DocumentStore::OptimizeInto( - const std::string& new_directory, const LanguageSegmenter* lang_segmenter) { + const std::string& new_directory, const LanguageSegmenter* lang_segmenter, + OptimizeStatsProto* stats) { // Validates directory if (new_directory == base_dir_) { return absl_ports::InvalidArgumentError( @@ -1592,10 +1644,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( // Writes all valid docs into new document store (new directory) int size = document_id_mapper_->num_elements(); + int num_deleted = 0; + int num_expired = 0; for (DocumentId document_id = 0; document_id < size; document_id++) { auto document_or = Get(document_id, /*clear_internal_fields=*/false); if (absl_ports::IsNotFound(document_or.status())) { // Skip nonexistent documents + IncrementDeletedOrExpired(document_id_mapper_.get(), document_id, + &num_deleted, &num_expired); continue; } else if (!document_or.ok()) { // Real error, pass up @@ -1640,7 +1696,11 @@ libtextclassifier3::Status DocumentStore::OptimizeInto( ICING_RETURN_IF_ERROR( new_doc_store->SetUsageScores(new_document_id, usage_scores)); } - + if (stats != nullptr) { + stats->set_num_original_documents(size); + stats->set_num_deleted_documents(num_deleted); + stats->set_num_expired_documents(num_expired); + } ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk()); return libtextclassifier3::Status::OK; } diff --git a/icing/store/document-store.h b/icing/store/document-store.h index b2908f0..3b8408d 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -29,6 +29,8 @@ #include "icing/proto/document.pb.h" #include "icing/proto/document_wrapper.pb.h" #include "icing/proto/logging.pb.h" +#include "icing/proto/optimize.pb.h" +#include "icing/proto/storage.pb.h" #include "icing/schema/schema-store.h" #include "icing/store/corpus-associated-scoring-data.h" #include "icing/store/corpus-id.h" @@ -122,7 +124,7 @@ class DocumentStore { static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create( const Filesystem* filesystem, const std::string& base_dir, const Clock* clock, const SchemaStore* schema_store, - NativeInitializeStats* initialize_stats = nullptr); + InitializeStatsProto* initialize_stats = nullptr); // Returns the maximum DocumentId that the DocumentStore has assigned. If // there has not been any DocumentIds assigned, i.e. the DocumentStore is @@ -152,10 +154,10 @@ class DocumentStore { // INTERNAL_ERROR on IO error libtextclassifier3::StatusOr<DocumentId> Put( const DocumentProto& document, int32_t num_tokens = 0, - NativePutDocumentStats* put_document_stats = nullptr); + PutDocumentStatsProto* put_document_stats = nullptr); libtextclassifier3::StatusOr<DocumentId> Put( DocumentProto&& document, int32_t num_tokens = 0, - NativePutDocumentStats* put_document_stats = nullptr); + PutDocumentStatsProto* put_document_stats = nullptr); // Finds and returns the document identified by the given key (namespace + // uri). If 'clear_internal_fields' is true, document level data that's @@ -351,16 +353,11 @@ class DocumentStore { // INTERNAL on I/O error libtextclassifier3::Status PersistToDisk(); - // Calculates and returns the disk usage in bytes. Rounds up to the nearest - // block size. + // Calculates the StorageInfo for the Document Store. // - // Returns: - // Disk usage on success - // INTERNAL_ERROR on IO error - // - // TODO(tjbarron): consider returning a struct which has the breakdown of each - // component. - libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const; + // If an IO error occurs while trying to calculate the value for a field, then + // that field will be set to -1. + DocumentStorageInfoProto GetStorageInfo() const; // Update any derived data off of the SchemaStore with the new SchemaStore. // This may include pointers, SchemaTypeIds, etc. @@ -407,6 +404,8 @@ class DocumentStore { // reassigned so any files / classes that are based on old document ids may be // outdated. // + // stats will be set if non-null. + // // NOTE: The tasks in this method are too expensive to be executed in // real-time. The caller should decide how frequently and when to call this // method based on device usage. @@ -416,8 +415,8 @@ class DocumentStore { // INVALID_ARGUMENT if new_directory is same as current base directory // INTERNAL_ERROR on IO error libtextclassifier3::Status OptimizeInto( - const std::string& new_directory, - const LanguageSegmenter* lang_segmenter); + const std::string& new_directory, const LanguageSegmenter* lang_segmenter, + OptimizeStatsProto* stats = nullptr); // Calculates status for a potential Optimize call. Includes how many docs // there are vs how many would be optimized away. And also includes an @@ -508,7 +507,7 @@ class DocumentStore { bool initialized_ = false; libtextclassifier3::StatusOr<DataLoss> Initialize( - NativeInitializeStats* initialize_stats); + InitializeStatsProto* initialize_stats); // Creates sub-components and verifies the integrity of each sub-component. // @@ -576,8 +575,8 @@ class DocumentStore { // if it doesn't exist. bool HeaderExists(); - // Update and replace the header file. Creates the header file if it doesn't - // exist. + // Update, replace and persist the header file. Creates the header file if it + // doesn't exist. // // Returns: // OK on success @@ -586,7 +585,7 @@ class DocumentStore { libtextclassifier3::StatusOr<DocumentId> InternalPut( DocumentProto& document, - NativePutDocumentStats* put_document_stats = nullptr); + PutDocumentStatsProto* put_document_stats = nullptr); // Helper function to do batch deletes. Documents with the given // "namespace_id" and "schema_type_id" will be deleted. If callers don't need @@ -688,6 +687,20 @@ class DocumentStore { // Sets usage scores for the given document. libtextclassifier3::Status SetUsageScores( DocumentId document_id, const UsageStore::UsageScores& usage_scores); + + // Returns: + // - on success, a DocumentStorageInfoProto with the fields relating to the + // size of Document Store member variables populated. + // - INTERNAL on failure to get file size + DocumentStorageInfoProto GetMemberStorageInfo() const; + + // Returns: + // - on success, the storage_info that was passed in but with the number of + // alive, deleted and expired documents also set. + // - OUT_OF_RANGE, this should never happen. This could only be returned if + // the document_id_mapper somehow became larger than the filter cache. + DocumentStorageInfoProto CalculateDocumentStatusCounts( + DocumentStorageInfoProto storage_info) const; }; } // namespace lib diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 7754373..440b48f 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -31,6 +31,7 @@ #include "icing/portable/equals-proto.h" #include "icing/proto/document.pb.h" #include "icing/proto/schema.pb.h" +#include "icing/proto/storage.pb.h" #include "icing/schema/schema-store.h" #include "icing/store/corpus-associated-scoring-data.h" #include "icing/store/corpus-id.h" @@ -55,6 +56,7 @@ namespace { using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::_; using ::testing::Eq; +using ::testing::Ge; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::IsEmpty; @@ -436,16 +438,16 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth - int64_t ground_truth_size_before = filesystem_.GetFileSize( + int64_t document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); EXPECT_THAT( document_store->Delete("nonexistent_namespace", "nonexistent_uri"), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after)); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) { @@ -566,7 +568,7 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth - int64_t ground_truth_size_before = filesystem_.GetFileSize( + int64_t document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); EXPECT_THAT(doc_store @@ -575,9 +577,9 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) { .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after)); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) { @@ -590,7 +592,7 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth - int64_t ground_truth_size_before = filesystem_.GetFileSize( + int64_t document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); EXPECT_THAT(doc_store @@ -599,9 +601,9 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) { .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after)); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) { @@ -665,7 +667,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { document4.set_namespace_("namespace.1"); document4.set_uri("uri2"); - int64_t ground_truth_size_before; + int64_t document_log_size_before; { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -686,7 +688,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { EXPECT_THAT(group_result.status, IsOk()); EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); - ground_truth_size_before = filesystem_.GetFileSize( + document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); } // Destructors should update checksum and persist all data to file. @@ -710,9 +712,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { std::move(create_result.document_store); // Make sure we didn't add anything to the ground truth after we recovered. - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_EQ(ground_truth_size_before, ground_truth_size_after); + EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -908,7 +910,7 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth - int64_t ground_truth_size_before = filesystem_.GetFileSize( + int64_t document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); EXPECT_THAT(document_store @@ -917,10 +919,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after)); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { @@ -933,7 +935,7 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { // Validates that deleting something non-existing won't append anything to // ground truth - int64_t ground_truth_size_before = filesystem_.GetFileSize( + int64_t document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); EXPECT_THAT(document_store @@ -942,10 +944,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after)); + EXPECT_THAT(document_log_size_before, Eq(document_log_size_after)); } TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) { @@ -1016,7 +1018,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { .SetSchema("message") .SetCreationTimestampMs(1) .Build(); - int64_t ground_truth_size_before; + int64_t document_log_size_before; { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -1036,7 +1038,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { EXPECT_THAT(group_result.status, IsOk()); EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); - ground_truth_size_before = filesystem_.GetFileSize( + document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); } // Destructors should update checksum and persist all data to file. @@ -1060,9 +1062,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { std::move(create_result.document_store); // Make sure we didn't add anything to the ground truth after we recovered. - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_EQ(ground_truth_size_before, ground_truth_size_after); + EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(document_store->Get(email_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -1100,7 +1102,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { .SetSchema("message") .SetCreationTimestampMs(1) .Build(); - int64_t ground_truth_size_before; + int64_t document_log_size_before; { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -1125,7 +1127,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { EXPECT_THAT(document_store->Get(message_document_id), IsOkAndHolds(EqualsProto(message_document))); - ground_truth_size_before = filesystem_.GetFileSize( + document_log_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); } // Destructors should update checksum and persist all data to file. @@ -1156,9 +1158,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { std::move(create_result.document_store); // Make sure we didn't add anything to the ground truth after we recovered. - int64_t ground_truth_size_after = filesystem_.GetFileSize( + int64_t document_log_size_after = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_EQ(ground_truth_size_before, ground_truth_size_after); + EXPECT_EQ(document_log_size_before, document_log_size_after); EXPECT_THAT(document_store->Get(email_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -1507,7 +1509,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) { /*num_docs=*/1, /*sum_length_in_tokens=*/4))); } -TEST_F(DocumentStoreTest, GetDiskUsage) { +TEST_F(DocumentStoreTest, GetStorageInfo) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, @@ -1515,8 +1517,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); - ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size, - doc_store->GetDiskUsage()); + DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo(); + int64_t empty_doc_store_size = doc_store_storage_info.document_store_size(); EXPECT_THAT(empty_doc_store_size, Gt(0)); DocumentProto document = DocumentBuilder() @@ -1525,15 +1527,16 @@ TEST_F(DocumentStoreTest, GetDiskUsage) { .AddStringProperty("subject", "foo") .Build(); - // Since our GetDiskUsage can only get sizes in increments of block_size, we + // Since GetStorageInfo can only get sizes in increments of block_size, we // need to insert enough documents so the disk usage will increase by at least // 1 block size. The number 100 is a bit arbitrary, gotten from manually // testing. for (int i = 0; i < 100; ++i) { ICING_ASSERT_OK(doc_store->Put(document)); } - EXPECT_THAT(doc_store->GetDiskUsage(), - IsOkAndHolds(Gt(empty_doc_store_size))); + doc_store_storage_info = doc_store->GetStorageInfo(); + EXPECT_THAT(doc_store_storage_info.document_store_size(), + Gt(empty_doc_store_size)); // Bad file system MockFilesystem mock_filesystem; @@ -1546,8 +1549,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) { std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem = std::move(create_result.document_store); - EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo(); + EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1)); } TEST_F(DocumentStoreTest, MaxDocumentId) { @@ -2231,7 +2234,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) { EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum)); } -TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) { +TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, @@ -2247,6 +2250,24 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) { IsOkAndHolds(Not(Eq(checksum)))); } +TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + ICING_EXPECT_OK(document_store->Put(test_document1_)); + ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum()); + + UsageReport usage_report = + CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(), + /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1); + ICING_EXPECT_OK(document_store->ReportUsage(usage_report)); + EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum))); +} + TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) { const std::string schema_store_dir = schema_store_dir_ + "_custom"; @@ -3438,17 +3459,66 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true); } - NativeInitializeStats initializeStats; + InitializeStatsProto initialize_stats; ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, - schema_store_.get(), &initializeStats)); + schema_store_.get(), &initialize_stats)); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); // The store_cache trigger regeneration because its element size is // inconsistent: expected 20 (current new size), actual 12 (as per the v0 // score_cache). - EXPECT_TRUE(initializeStats.has_document_store_recovery_cause()); + EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause()); +} + +TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // Add three documents. + DocumentProto document1 = test_document1_; + document1.set_namespace_("namespace.1"); + document1.set_uri("uri1"); + ICING_ASSERT_OK(doc_store->Put(document1)); + + DocumentProto document2 = test_document1_; + document2.set_namespace_("namespace.1"); + document2.set_uri("uri2"); + document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds()); + document2.set_ttl_ms(100); + ICING_ASSERT_OK(doc_store->Put(document2)); + + DocumentProto document3 = test_document1_; + document3.set_namespace_("namespace.1"); + document3.set_uri("uri3"); + ICING_ASSERT_OK(doc_store->Put(document3)); + + // Delete the first doc. + ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri())); + + // Expire the second doc. + fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() + + document2.ttl_ms() + 1); + + DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo(); + EXPECT_THAT(storage_info.num_alive_documents(), Eq(1)); + EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1)); + EXPECT_THAT(storage_info.num_expired_documents(), Eq(1)); + EXPECT_THAT(storage_info.document_store_size(), Ge(0)); + EXPECT_THAT(storage_info.document_log_size(), Ge(0)); + EXPECT_THAT(storage_info.key_mapper_size(), Ge(0)); + EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0)); + EXPECT_THAT(storage_info.score_cache_size(), Ge(0)); + EXPECT_THAT(storage_info.filter_cache_size(), Ge(0)); + EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0)); + EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0)); + EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0)); + EXPECT_THAT(storage_info.num_namespaces(), Eq(1)); } } // namespace diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc index 54896dc..7e5cebf 100644 --- a/icing/store/usage-store.cc +++ b/icing/store/usage-store.cc @@ -218,6 +218,10 @@ libtextclassifier3::StatusOr<int64_t> UsageStore::GetElementsFileSize() const { return usage_score_cache_->GetElementsFileSize(); } +libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const { + return usage_score_cache_->GetDiskUsage(); +} + libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) { if (num_documents >= usage_score_cache_->num_elements()) { // No need to truncate diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h index b7de970..fd77df4 100644 --- a/icing/store/usage-store.h +++ b/icing/store/usage-store.h @@ -157,6 +157,14 @@ class UsageStore { // INTERNAL_ERROR on IO error libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const; + // Calculates and returns the disk usage in bytes. Rounds up to the nearest + // block size. + // + // Returns: + // Disk usage on success + // INTERNAL_ERROR on IO error + libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const; + // Resizes the storage so that only the usage scores of and before // last_document_id are stored. // diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc index 220c226..b2dbe4b 100644 --- a/icing/store/usage-store_test.cc +++ b/icing/store/usage-store_test.cc @@ -577,6 +577,41 @@ TEST_F(UsageStoreTest, GetElementsFileSize) { IsOkAndHolds(Gt(empty_file_size))); } +TEST_F(UsageStoreTest, GetDiskUsageEmpty) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // There's some internal metadata, so our disk usage will round up to 1 block. + ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage, + usage_store->GetDiskUsage()); + EXPECT_THAT(empty_disk_usage, Gt(0)); +} + +TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) { + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, + UsageStore::Create(&filesystem_, test_dir_)); + + // There's some internal metadata, so our disk usage will round up to 1 block. + ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage, + usage_store->GetDiskUsage()); + + // Since our GetDiskUsage can only get sizes in increments of block_size, we + // need to insert enough usage reports so the disk usage will increase by at + // least 1 block size. The number 200 is a bit arbitrary, gotten from manually + // testing. + UsageReport usage_report = CreateUsageReport( + "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1); + for (int i = 0; i < 200; ++i) { + usage_store->AddUsageReport(usage_report, /*document_id=*/i); + } + + // We need to persist since iOS won't see the new disk allocations until after + // everything gets written. + usage_store->PersistToDisk(); + + EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage))); +} + } // namespace } // namespace lib |