aboutsummaryrefslogtreecommitdiff
path: root/icing/store
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2021-02-26 08:04:01 -0800
committerCassie Wang <cassiewang@google.com>2021-03-02 15:29:44 -0800
commit85fd8c8521e338d2bab69f5482e3cc2cf312fd4e (patch)
tree929e118124b203997be393e4a1c5f5ee6da2de40 /icing/store
parenta34db390d80f862bfaaa49dea3605c5fec3bca3d (diff)
downloadicing-85fd8c8521e338d2bab69f5482e3cc2cf312fd4e.tar.gz
Sync from upstream.
Descriptions: ========== Add last optimized time to GetOptimizeInfo. ========== Update the implementation of snippeting to return property paths with value indices and remove the values_index field. ========== Create builders for SchemaProto, SchemaTypeConfigProto and PropertyConfigProto. ========== Rename some protos with the rules: - Remove "Native" prefix - Add "Proto" suffix for consistency with other protos ========== Upgrade your minimum iOS version to 11.4. ========== Fix PersistToDisk definitions to ensure that they properly call datasync. This change is meant to address the first part of ptd doc - that certain functions that claim to persist data don't actually explicitly flush. ========== Change function call from has_field() to field() ========== Add IcingStorageInfo. ========== Add IndexStorageStats. ========== Add SchemaStoreStorageStats. ========== Add DocumentStoreStorageStats. ========== Implement OptimizeStats. ========== Remove the max number of results per query limit (1000) and replace it with a more flexible way to limit memory use by the result-state-manager. ========== Add a test case to ensure we don't add UsageStore's checksum in DocumentStore's ComputeChecksum. ========== Account for UsageStore in GetDiskUsage. ========== Ensure that SchemaStore properly handles function calls when the schema isn't set. ========== Remove jlpl_strict_deps feature from package declarations. ========== Qualifies std::string in 3p directories ========== Section restricts should influence the relevance score. ========== Apply fixes upstream that were necessary to sync changes downstream. Also added a METADATA check to prevent any accidental adds of foo.proto.h includes. ========== Remove the 'com.google.protobuf' to 'com.google.android.icing.protobuf' translation in the export_to_aosp script. ========== Include usage store size in GetOptimizeInfo. This helps clients get a better idea of what savings they could get back if they called Optimize. Change-Id: Ia2339c7987267a73c49dadf1ced4a0a8ef001d4c
Diffstat (limited to 'icing/store')
-rw-r--r--icing/store/document-store.cc134
-rw-r--r--icing/store/document-store.h49
-rw-r--r--icing/store/document-store_test.cc148
-rw-r--r--icing/store/usage-store.cc4
-rw-r--r--icing/store/usage-store.h8
-rw-r--r--icing/store/usage-store_test.cc35
6 files changed, 284 insertions, 94 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 72bf736..59944fe 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -189,6 +189,17 @@ int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
return expiration_timestamp_ms;
}
+void IncrementDeletedOrExpired(FileBackedVector<int64_t>* document_id_mapper,
+ DocumentId document_id, int* num_deleted_out,
+ int* num_expired_out) {
+ auto location_or = document_id_mapper->Get(document_id);
+ if (location_or.ok() && *location_or.ValueOrDie() == kDocDeletedFlag) {
+ ++(*num_deleted_out);
+ } else {
+ ++(*num_expired_out);
+ }
+}
+
} // namespace
DocumentStore::DocumentStore(const Filesystem* filesystem,
@@ -203,13 +214,13 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
return Put(DocumentProto(document), num_tokens, put_document_stats);
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
DocumentProto&& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
return InternalPut(document, put_document_stats);
}
@@ -226,7 +237,7 @@ DocumentStore::~DocumentStore() {
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -243,7 +254,7 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
}
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem_, MakeDocumentLogFilename(base_dir_),
FileBackedProtoLog<DocumentWrapper>::Options(
@@ -264,16 +275,16 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
<< "Data loss in document log, regenerating derived files.";
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::DATA_LOSS);
+ InitializeStatsProto::DATA_LOSS);
if (create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::PARTIAL_LOSS);
+ InitializeStatsProto::PARTIAL_LOSS);
} else {
// Ground truth is completely lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::COMPLETE_LOSS);
+ InitializeStatsProto::COMPLETE_LOSS);
}
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
@@ -294,7 +305,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
"regenerating derived files for DocumentStore.";
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::IO_ERROR);
+ InitializeStatsProto::IO_ERROR);
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status = RegenerateDerivedFiles();
@@ -788,6 +799,11 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
}
Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
+ // NOTE: We purposely don't include usage_store checksum here because we can't
+ // regenerate it from ground truth documents. If it gets corrupted, we'll just
+ // clear all usage reports, but we shouldn't throw everything else in the
+ // document store out.
+
total_checksum.Append(std::to_string(document_log_checksum.Get()));
total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
@@ -819,8 +835,11 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
header.checksum = checksum.Get();
// This should overwrite the header.
- if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
+ ScopedFd sfd(
+ filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+ if (!sfd.is_valid() ||
+ !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+ !filesystem_->DataSync(sfd.get())) {
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
}
@@ -828,7 +847,7 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
- DocumentProto& document, NativePutDocumentStats* put_document_stats) {
+ DocumentProto& document, PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
@@ -1404,30 +1423,62 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
- ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
- document_log_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
- document_key_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
- document_id_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
- score_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
- filter_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
- namespace_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
- corpus_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage,
- corpus_score_cache_->GetDiskUsage());
-
- int64_t disk_usage = document_log_disk_usage +
- document_key_mapper_disk_usage +
- document_id_mapper_disk_usage + score_cache_disk_usage +
- filter_cache_disk_usage + namespace_mapper_disk_usage +
- corpus_mapper_disk_usage + corpus_score_cache_disk_usage;
- return disk_usage;
+int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
+ int64_t default_value) {
+ return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
+}
+
+DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
+ DocumentStorageInfoProto storage_info;
+ storage_info.set_document_log_size(
+ GetValueOrDefault(document_log_->GetDiskUsage(), -1));
+ storage_info.set_key_mapper_size(
+ GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
+ storage_info.set_document_id_mapper_size(
+ GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
+ storage_info.set_score_cache_size(
+ GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
+ storage_info.set_filter_cache_size(
+ GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
+ storage_info.set_namespace_id_mapper_size(
+ GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_mapper_size(
+ GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_score_cache_size(
+ GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const {
+ int num_alive = 0;
+ int num_expired = 0;
+ int num_deleted = 0;
+ for (DocumentId document_id = 0;
+ document_id < document_id_mapper_->num_elements(); ++document_id) {
+ if (DoesDocumentExist(document_id)) {
+ ++num_alive;
+ } else {
+ IncrementDeletedOrExpired(document_id_mapper_.get(), document_id,
+ &num_deleted, &num_expired);
+ }
+ }
+ storage_info.set_num_alive_documents(num_alive);
+ storage_info.set_num_deleted_documents(num_deleted);
+ storage_info.set_num_expired_documents(num_expired);
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
+ DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
+ int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_document_store_size(directory_size);
+ } else {
+ storage_info.set_document_store_size(-1);
+ }
+ storage_info.set_num_namespaces(namespace_mapper_->num_keys());
+ return CalculateDocumentStatusCounts(std::move(storage_info));
}
libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1577,7 +1628,8 @@ libtextclassifier3::Status DocumentStore::Optimize() {
}
libtextclassifier3::Status DocumentStore::OptimizeInto(
- const std::string& new_directory, const LanguageSegmenter* lang_segmenter) {
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats) {
// Validates directory
if (new_directory == base_dir_) {
return absl_ports::InvalidArgumentError(
@@ -1592,10 +1644,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
// Writes all valid docs into new document store (new directory)
int size = document_id_mapper_->num_elements();
+ int num_deleted = 0;
+ int num_expired = 0;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
// Skip nonexistent documents
+ IncrementDeletedOrExpired(document_id_mapper_.get(), document_id,
+ &num_deleted, &num_expired);
continue;
} else if (!document_or.ok()) {
// Real error, pass up
@@ -1640,7 +1696,11 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
ICING_RETURN_IF_ERROR(
new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
-
+ if (stats != nullptr) {
+ stats->set_num_original_documents(size);
+ stats->set_num_deleted_documents(num_deleted);
+ stats->set_num_expired_documents(num_expired);
+ }
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
return libtextclassifier3::Status::OK;
}
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b2908f0..3b8408d 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -29,6 +29,8 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -122,7 +124,7 @@ class DocumentStore {
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats = nullptr);
+ InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -152,10 +154,10 @@ class DocumentStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<DocumentId> Put(
const DocumentProto& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
libtextclassifier3::StatusOr<DocumentId> Put(
DocumentProto&& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Finds and returns the document identified by the given key (namespace +
// uri). If 'clear_internal_fields' is true, document level data that's
@@ -351,16 +353,11 @@ class DocumentStore {
// INTERNAL on I/O error
libtextclassifier3::Status PersistToDisk();
- // Calculates and returns the disk usage in bytes. Rounds up to the nearest
- // block size.
+ // Calculates the StorageInfo for the Document Store.
//
- // Returns:
- // Disk usage on success
- // INTERNAL_ERROR on IO error
- //
- // TODO(tjbarron): consider returning a struct which has the breakdown of each
- // component.
- libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ DocumentStorageInfoProto GetStorageInfo() const;
// Update any derived data off of the SchemaStore with the new SchemaStore.
// This may include pointers, SchemaTypeIds, etc.
@@ -407,6 +404,8 @@ class DocumentStore {
// reassigned so any files / classes that are based on old document ids may be
// outdated.
//
+ // stats will be set if non-null.
+ //
// NOTE: The tasks in this method are too expensive to be executed in
// real-time. The caller should decide how frequently and when to call this
// method based on device usage.
@@ -416,8 +415,8 @@ class DocumentStore {
// INVALID_ARGUMENT if new_directory is same as current base directory
// INTERNAL_ERROR on IO error
libtextclassifier3::Status OptimizeInto(
- const std::string& new_directory,
- const LanguageSegmenter* lang_segmenter);
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats = nullptr);
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -508,7 +507,7 @@ class DocumentStore {
bool initialized_ = false;
libtextclassifier3::StatusOr<DataLoss> Initialize(
- NativeInitializeStats* initialize_stats);
+ InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -576,8 +575,8 @@ class DocumentStore {
// if it doesn't exist.
bool HeaderExists();
- // Update and replace the header file. Creates the header file if it doesn't
- // exist.
+ // Update, replace and persist the header file. Creates the header file if it
+ // doesn't exist.
//
// Returns:
// OK on success
@@ -586,7 +585,7 @@ class DocumentStore {
libtextclassifier3::StatusOr<DocumentId> InternalPut(
DocumentProto& document,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Helper function to do batch deletes. Documents with the given
// "namespace_id" and "schema_type_id" will be deleted. If callers don't need
@@ -688,6 +687,20 @@ class DocumentStore {
// Sets usage scores for the given document.
libtextclassifier3::Status SetUsageScores(
DocumentId document_id, const UsageStore::UsageScores& usage_scores);
+
+ // Returns:
+ // - on success, a DocumentStorageInfoProto with the fields relating to the
+ // size of Document Store member variables populated.
+ // - INTERNAL on failure to get file size
+ DocumentStorageInfoProto GetMemberStorageInfo() const;
+
+ // Returns:
+ // - on success, the storage_info that was passed in but with the number of
+ // alive, deleted and expired documents also set.
+ // - OUT_OF_RANGE, this should never happen. This could only be returned if
+ // the document_id_mapper somehow became larger than the filter cache.
+ DocumentStorageInfoProto CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const;
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7754373..440b48f 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -31,6 +31,7 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -55,6 +56,7 @@ namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
@@ -436,16 +438,16 @@ TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(
document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
@@ -566,7 +568,7 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(doc_store
@@ -575,9 +577,9 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
@@ -590,7 +592,7 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(doc_store
@@ -599,9 +601,9 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
@@ -665,7 +667,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
document4.set_namespace_("namespace.1");
document4.set_uri("uri2");
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -686,7 +688,7 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -710,9 +712,9 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -908,7 +910,7 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(document_store
@@ -917,10 +919,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
@@ -933,7 +935,7 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(document_store
@@ -942,10 +944,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
@@ -1016,7 +1018,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1036,7 +1038,7 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -1060,9 +1062,9 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1100,7 +1102,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1125,7 +1127,7 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -1156,9 +1158,9 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1507,7 +1509,7 @@ TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, GetDiskUsage) {
+TEST_F(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1515,8 +1517,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
- doc_store->GetDiskUsage());
+ DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo();
+ int64_t empty_doc_store_size = doc_store_storage_info.document_store_size();
EXPECT_THAT(empty_doc_store_size, Gt(0));
DocumentProto document = DocumentBuilder()
@@ -1525,15 +1527,16 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
.AddStringProperty("subject", "foo")
.Build();
- // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // Since GetStorageInfo can only get sizes in increments of block_size, we
// need to insert enough documents so the disk usage will increase by at least
// 1 block size. The number 100 is a bit arbitrary, gotten from manually
// testing.
for (int i = 0; i < 100; ++i) {
ICING_ASSERT_OK(doc_store->Put(document));
}
- EXPECT_THAT(doc_store->GetDiskUsage(),
- IsOkAndHolds(Gt(empty_doc_store_size)));
+ doc_store_storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(),
+ Gt(empty_doc_store_size));
// Bad file system
MockFilesystem mock_filesystem;
@@ -1546,8 +1549,8 @@ TEST_F(DocumentStoreTest, GetDiskUsage) {
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
std::move(create_result.document_store);
- EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
}
TEST_F(DocumentStoreTest, MaxDocumentId) {
@@ -2231,7 +2234,7 @@ TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2247,6 +2250,24 @@ TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
IsOkAndHolds(Not(Eq(checksum))));
}
+TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_EXPECT_OK(document_store->Put(test_document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+ UsageReport usage_report =
+ CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(),
+ /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ ICING_EXPECT_OK(document_store->ReportUsage(usage_report));
+ EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+}
+
TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
const std::string schema_store_dir = schema_store_dir_ + "_custom";
@@ -3438,17 +3459,66 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
}
- NativeInitializeStats initializeStats;
+ InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(), &initializeStats));
+ schema_store_.get(), &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The store_cache trigger regeneration because its element size is
// inconsistent: expected 20 (current new size), actual 12 (as per the v0
// score_cache).
- EXPECT_TRUE(initializeStats.has_document_store_recovery_cause());
+ EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
+}
+
+TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Add three documents.
+ DocumentProto document1 = test_document1_;
+ document1.set_namespace_("namespace.1");
+ document1.set_uri("uri1");
+ ICING_ASSERT_OK(doc_store->Put(document1));
+
+ DocumentProto document2 = test_document1_;
+ document2.set_namespace_("namespace.1");
+ document2.set_uri("uri2");
+ document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds());
+ document2.set_ttl_ms(100);
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ DocumentProto document3 = test_document1_;
+ document3.set_namespace_("namespace.1");
+ document3.set_uri("uri3");
+ ICING_ASSERT_OK(doc_store->Put(document3));
+
+ // Delete the first doc.
+ ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri()));
+
+ // Expire the second doc.
+ fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
+ document2.ttl_ms() + 1);
+
+ DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(storage_info.num_alive_documents(), Eq(1));
+ EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1));
+ EXPECT_THAT(storage_info.num_expired_documents(), Eq(1));
+ EXPECT_THAT(storage_info.document_store_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_log_size(), Ge(0));
+ EXPECT_THAT(storage_info.key_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.filter_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.num_namespaces(), Eq(1));
}
} // namespace
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 54896dc..7e5cebf 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -218,6 +218,10 @@ libtextclassifier3::StatusOr<int64_t> UsageStore::GetElementsFileSize() const {
return usage_score_cache_->GetElementsFileSize();
}
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const {
+ return usage_score_cache_->GetDiskUsage();
+}
+
libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
if (num_documents >= usage_score_cache_->num_elements()) {
// No need to truncate
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index b7de970..fd77df4 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -157,6 +157,14 @@ class UsageStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+ // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+ // block size.
+ //
+ // Returns:
+ // Disk usage on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
// Resizes the storage so that only the usage scores of and before
// last_document_id are stored.
//
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 220c226..b2dbe4b 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -577,6 +577,41 @@ TEST_F(UsageStoreTest, GetElementsFileSize) {
IsOkAndHolds(Gt(empty_file_size)));
}
+TEST_F(UsageStoreTest, GetDiskUsageEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+ EXPECT_THAT(empty_disk_usage, Gt(0));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+
+ // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // need to insert enough usage reports so the disk usage will increase by at
+ // least 1 block size. The number 200 is a bit arbitrary, gotten from manually
+ // testing.
+ UsageReport usage_report = CreateUsageReport(
+ "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ for (int i = 0; i < 200; ++i) {
+ usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+ }
+
+ // We need to persist since iOS won't see the new disk allocations until after
+ // everything gets written.
+ usage_store->PersistToDisk();
+
+ EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
+}
+
} // namespace
} // namespace lib