aboutsummaryrefslogtreecommitdiff
path: root/icing/store
diff options
context:
space:
mode:
authorAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-07 00:06:15 +0000
committerAndroid Build Coastguard Worker <android-build-coastguard-worker@google.com>2023-12-07 00:06:15 +0000
commitc2be20616650e2f8ccb1d592654c08d6fda315ca (patch)
tree9892cbbd0fb247ce252a38f258e33741ca025a45 /icing/store
parent6d51031f6fac465f09e95982b19c4f86c88ee2fa (diff)
parentad9e08ff59020f5b52410dc093061d8ef715a781 (diff)
downloadicing-android-14.0.0_r36.tar.gz
Change-Id: Ia93a92877933208d4b5ed5f6473564134928dc04
Diffstat (limited to 'icing/store')
-rw-r--r--icing/store/document-store.cc158
-rw-r--r--icing/store/document-store.h58
-rw-r--r--icing/store/document-store_benchmark.cc5
-rw-r--r--icing/store/document-store_test.cc326
-rw-r--r--icing/store/namespace-fingerprint-identifier.cc73
-rw-r--r--icing/store/namespace-fingerprint-identifier.h72
-rw-r--r--icing/store/namespace-fingerprint-identifier_test.cc148
-rw-r--r--icing/store/usage-store_test.cc38
8 files changed, 784 insertions, 94 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 30de410..094eea1 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -53,6 +53,7 @@
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
#include "icing/store/dynamic-trie-key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/store/persistent-hash-map-key-mapper.h"
#include "icing/store/usage-store.h"
@@ -142,25 +143,6 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
}
-// This function will encode a namespace id into a fixed 3 bytes string.
-std::string EncodeNamespaceId(NamespaceId namespace_id) {
- // encoding should be 1 to 3 bytes based on the value of namespace_id.
- std::string encoding = encode_util::EncodeIntToCString(namespace_id);
- // Make encoding to fixed 3 bytes.
- while (encoding.size() < 3) {
- // DynamicTrie cannot handle keys with 0 as bytes, so we append it using 1,
- // just like what we do in encode_util::EncodeIntToCString.
- //
- // The reason that this works is because DecodeIntToString decodes a byte
- // value of 0x01 as 0x00. When EncodeIntToCString returns a namespaceid
- // encoding that is less than 3 bytes, it means that the id contains
- // unencoded leading 0x00. So here we're explicitly encoding those bytes as
- // 0x01.
- encoding.push_back(1);
- }
- return encoding;
-}
-
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
int64_t ttl_ms) {
if (ttl_ms == 0) {
@@ -269,9 +251,8 @@ std::string DocumentStore::MakeFingerprint(
absl_ports::StrCat(namespace_, uri_or_schema));
return fingerprint_util::GetFingerprintString(fprint);
} else {
- return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
- encode_util::EncodeIntToCString(
- tc3farmhash::Fingerprint64(uri_or_schema)));
+ return NamespaceFingerprintIdentifier(namespace_id, uri_or_schema)
+ .EncodeToCString();
}
}
@@ -328,13 +309,15 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
filesystem, base_dir, clock, schema_store, namespace_id_fingerprint,
pre_mapping_fbv, use_persistent_hash_map, compression_level));
ICING_ASSIGN_OR_RETURN(
- DataLoss data_loss,
+ InitializeResult initialize_result,
document_store->Initialize(force_recovery_and_revalidate_documents,
initialize_stats));
CreateResult create_result;
create_result.document_store = std::move(document_store);
- create_result.data_loss = data_loss;
+ create_result.data_loss = initialize_result.data_loss;
+ create_result.derived_files_regenerated =
+ initialize_result.derived_files_regenerated;
return create_result;
}
@@ -380,9 +363,9 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
- bool force_recovery_and_revalidate_documents,
- InitializeStatsProto* initialize_stats) {
+libtextclassifier3::StatusOr<DocumentStore::InitializeResult>
+DocumentStore::Initialize(bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats) {
auto create_result_or =
DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_);
@@ -400,6 +383,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
InitializeStatsProto::RecoveryCause recovery_cause =
GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
+ bool derived_files_regenerated = false;
if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
ICING_LOG(INFO) << "Starting Document Store Recovery with cause="
<< recovery_cause << ", and create result { new_file="
@@ -416,16 +400,18 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status =
RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
- if (initialize_stats != nullptr &&
- recovery_cause != InitializeStatsProto::NONE) {
+ if (recovery_cause != InitializeStatsProto::NONE) {
// Only consider it a recovery if the client forced a recovery or there
// was data loss. Otherwise, this could just be the first time we're
// initializing and generating derived files.
- initialize_stats->set_document_store_recovery_latency_ms(
- document_recovery_timer->GetElapsedMilliseconds());
- initialize_stats->set_document_store_recovery_cause(recovery_cause);
- initialize_stats->set_document_store_data_status(
- GetDataStatus(create_result.log_create_result.data_loss));
+ derived_files_regenerated = true;
+ if (initialize_stats != nullptr) {
+ initialize_stats->set_document_store_recovery_latency_ms(
+ document_recovery_timer->GetElapsedMilliseconds());
+ initialize_stats->set_document_store_recovery_cause(recovery_cause);
+ initialize_stats->set_document_store_data_status(
+ GetDataStatus(create_result.log_create_result.data_loss));
+ }
}
if (!status.ok()) {
ICING_LOG(ERROR)
@@ -438,6 +424,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
+ derived_files_regenerated = true;
libtextclassifier3::Status status = RegenerateDerivedFiles(
/*force_recovery_and_revalidate_documents=*/false);
if (initialize_stats != nullptr) {
@@ -459,7 +446,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
initialize_stats->set_num_documents(document_id_mapper_->num_elements());
}
- return create_result.log_create_result.data_loss;
+ InitializeResult initialize_result = {
+ .data_loss = create_result.log_create_result.data_loss,
+ .derived_files_regenerated = derived_files_regenerated};
+ return initialize_result;
}
libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
@@ -1177,6 +1167,25 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
"Failed to find DocumentId by key: ", name_space, ", ", uri));
}
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
+ const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+ const {
+ if (!namespace_id_fingerprint_) {
+ return absl_ports::FailedPreconditionError(
+ "Cannot lookup document id by namespace id + fingerprint without "
+ "enabling it on uri_mapper");
+ }
+
+ auto document_id_or = document_key_mapper_->Get(
+ namespace_fingerprint_identifier.EncodeToCString());
+ if (document_id_or.ok()) {
+ return document_id_or.ValueOrDie();
+ }
+ return absl_ports::Annotate(
+ std::move(document_id_or).status(),
+ "Failed to find DocumentId by namespace id + fingerprint");
+}
+
std::vector<std::string> DocumentStore::GetAllNamespaces() const {
std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
GetNamespaceIdsToNamespaces(namespace_mapper_.get());
@@ -1829,10 +1838,10 @@ libtextclassifier3::Status DocumentStore::Optimize() {
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<std::vector<DocumentId>>
+libtextclassifier3::StatusOr<DocumentStore::OptimizeResult>
DocumentStore::OptimizeInto(const std::string& new_directory,
const LanguageSegmenter* lang_segmenter,
- OptimizeStatsProto* stats) {
+ OptimizeStatsProto* stats) const {
// Validates directory
if (new_directory == base_dir_) {
return absl_ports::InvalidArgumentError(
@@ -1850,20 +1859,22 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
std::move(doc_store_create_result.document_store);
// Writes all valid docs into new document store (new directory)
- int size = document_id_mapper_->num_elements();
- int num_deleted = 0;
- int num_expired = 0;
+ int document_cnt = document_id_mapper_->num_elements();
+ int num_deleted_documents = 0;
+ int num_expired_documents = 0;
UsageStore::UsageScores default_usage;
- std::vector<DocumentId> document_id_old_to_new(size, kInvalidDocumentId);
+
+ OptimizeResult result;
+ result.document_id_old_to_new.resize(document_cnt, kInvalidDocumentId);
int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
- for (DocumentId document_id = 0; document_id < size; document_id++) {
+ for (DocumentId document_id = 0; document_id < document_cnt; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
if (IsDeleted(document_id)) {
- ++num_deleted;
+ ++num_deleted_documents;
} else if (!GetNonExpiredDocumentFilterData(document_id,
current_time_ms)) {
- ++num_expired;
+ ++num_expired_documents;
}
continue;
} else if (!document_or.ok()) {
@@ -1903,7 +1914,8 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
return new_document_id_or.status();
}
- document_id_old_to_new[document_id] = new_document_id_or.ValueOrDie();
+ result.document_id_old_to_new[document_id] =
+ new_document_id_or.ValueOrDie();
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
@@ -1917,13 +1929,61 @@ DocumentStore::OptimizeInto(const std::string& new_directory,
new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
}
+
+ // Construct namespace_id_old_to_new
+ int namespace_cnt = namespace_mapper_->num_keys();
+ std::unordered_map<NamespaceId, std::string> old_namespaces =
+ GetNamespaceIdsToNamespaces(namespace_mapper_.get());
+ if (namespace_cnt != old_namespaces.size()) {
+ // This really shouldn't happen. If it really happens, then:
+ // - It won't block DocumentStore optimization, so don't return error here.
+ // - Instead, write a warning log here and hint the caller to rebuild index.
+ ICING_LOG(WARNING) << "Unexpected old namespace count " << namespace_cnt
+ << " vs " << old_namespaces.size();
+ result.should_rebuild_index = true;
+ } else {
+ result.namespace_id_old_to_new.resize(namespace_cnt, kInvalidNamespaceId);
+ for (const auto& [old_namespace_id, ns] : old_namespaces) {
+ if (old_namespace_id >= result.namespace_id_old_to_new.size()) {
+ // This really shouldn't happen. If it really happens, then:
+ // - It won't block DocumentStore optimization, so don't return error
+ // here.
+ // - Instead, write a warning log here and hint the caller to rebuild
+ // index.
+ ICING_LOG(WARNING) << "Found unexpected namespace id "
+ << old_namespace_id << ". Should be in range 0 to "
+ << result.namespace_id_old_to_new.size()
+ << " (exclusive).";
+ result.namespace_id_old_to_new.clear();
+ result.should_rebuild_index = true;
+ break;
+ }
+
+ auto new_namespace_id_or = new_doc_store->namespace_mapper_->Get(ns);
+ if (!new_namespace_id_or.ok()) {
+ if (absl_ports::IsNotFound(new_namespace_id_or.status())) {
+ continue;
+ }
+ // Real error, return it.
+ return std::move(new_namespace_id_or).status();
+ }
+
+ NamespaceId new_namespace_id = new_namespace_id_or.ValueOrDie();
+ // Safe to use bracket to assign given that we've checked the range above.
+ result.namespace_id_old_to_new[old_namespace_id] = new_namespace_id;
+ }
+ }
+
if (stats != nullptr) {
- stats->set_num_original_documents(size);
- stats->set_num_deleted_documents(num_deleted);
- stats->set_num_expired_documents(num_expired);
+ stats->set_num_original_documents(document_cnt);
+ stats->set_num_deleted_documents(num_deleted_documents);
+ stats->set_num_expired_documents(num_expired_documents);
+ stats->set_num_original_namespaces(namespace_cnt);
+ stats->set_num_deleted_namespaces(
+ namespace_cnt - new_doc_store->namespace_mapper_->num_keys());
}
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
- return document_id_old_to_new;
+ return result;
}
libtextclassifier3::StatusOr<DocumentStore::OptimizeInfo>
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 92d4286..c228e8b 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -43,6 +43,7 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
@@ -106,6 +107,11 @@ class DocumentStore {
// unpersisted. This may be used to signal that any derived data off of the
// document store may need to be regenerated.
DataLoss data_loss;
+
+ // A boolean flag indicating if derived files of the document store have
+ // been regenerated or not. This is usually a signal for callers to detect
+ // if any id assignment has changed (e.g. NamespaceId).
+ bool derived_files_regenerated;
};
// Not copyable
@@ -270,6 +276,21 @@ class DocumentStore {
libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
std::string_view name_space, std::string_view uri) const;
+ // Helper method to find a DocumentId that is associated with the given
+ // NamespaceFingerprintIdentifier.
+ //
+ // NOTE: The DocumentId may refer to a invalid document (deleted
+ // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
+ // refers to a valid Document.
+ //
+ // Returns:
+ // A DocumentId on success
+ // NOT_FOUND if the key doesn't exist
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
+ const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier)
+ const;
+
// Returns the CorpusId associated with the given namespace and schema.
//
// Returns:
@@ -439,10 +460,23 @@ class DocumentStore {
// INTERNAL_ERROR on IO error
libtextclassifier3::Status Optimize();
+ struct OptimizeResult {
+ // A vector that maps old document id to new document id.
+ std::vector<DocumentId> document_id_old_to_new;
+
+ // A vector that maps old namespace id to new namespace id. Will be empty if
+ // should_rebuild_index is set to true.
+ std::vector<NamespaceId> namespace_id_old_to_new;
+
+ // A boolean flag that hints the caller (usually IcingSearchEngine) if it
+ // should rebuild index instead of adopting the id changes via the 2 vectors
+ // above. It will be set to true if finding any id inconsistency.
+ bool should_rebuild_index = false;
+ };
// Copy data from current base directory into a new directory. Any outdated or
- // deleted data won't be copied. During the process, document ids will be
- // reassigned so any files / classes that are based on old document ids may be
- // outdated.
+ // deleted data won't be copied. During the process, document/namespace ids
+ // will be reassigned so any files / classes that are based on old
+ // document/namespace ids may be outdated.
//
// stats will be set if non-null.
//
@@ -451,12 +485,14 @@ class DocumentStore {
// method based on device usage.
//
// Returns:
- // A vector that maps from old document id to new document id on success
+ // OptimizeResult which contains a vector mapping from old document id to
+ // new document id and another vector mapping from old namespace id to new
+ // namespace id, on success
// INVALID_ARGUMENT if new_directory is same as current base directory
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto(
+ libtextclassifier3::StatusOr<OptimizeResult> OptimizeInto(
const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
- OptimizeStatsProto* stats = nullptr);
+ OptimizeStatsProto* stats = nullptr) const;
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -580,7 +616,15 @@ class DocumentStore {
// worry about this field.
bool initialized_ = false;
- libtextclassifier3::StatusOr<DataLoss> Initialize(
+ struct InitializeResult {
+ DataLoss data_loss;
+
+ // A boolean flag indicating if derived files of the document store have
+ // been regenerated or not. This is usually a signal for callers to detect
+ // if any id assignment has changed (e.g. NamespaceId).
+ bool derived_files_regenerated;
+ };
+ libtextclassifier3::StatusOr<InitializeResult> Initialize(
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index 5b9c568..46d76d8 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -163,8 +163,9 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
// stuff.
ICING_ASSERT_OK(document_store->Put(
CreateDocument("namespace", /*uri=*/std::to_string(i))));
- document_store->Delete("namespace", /*uri=*/std::to_string(i),
- clock.GetSystemTimeMilliseconds());
+ ICING_ASSERT_OK(document_store->Delete("namespace",
+ /*uri=*/std::to_string(i),
+ clock.GetSystemTimeMilliseconds()));
}
std::default_random_engine random;
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a9c47f0..2d4cd99 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -47,6 +47,7 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-log-creator.h"
+#include "icing/store/namespace-fingerprint-identifier.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -1050,7 +1051,7 @@ TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
IsOkAndHolds(EqualsProto(message_document)));
}
-TEST_P(DocumentStoreTest, OptimizeInto) {
+TEST_P(DocumentStoreTest, OptimizeIntoSingleNamespace) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1103,24 +1104,33 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
// Validates that the optimized document log has the same size if nothing is
- // deleted
+ // deleted. Also namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(0, 1, 2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result1,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result1.document_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
int64_t optimized_size1 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_EQ(original_size, optimized_size1);
// Validates that the optimized document log has a smaller size if something
- // is deleted
+ // is deleted. Namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1",
fake_clock_.GetSystemTimeMilliseconds()));
// DocumentId 0 is removed.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result2,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result2.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, 1));
+ EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
int64_t optimized_size2 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(original_size, Gt(optimized_size2));
@@ -1130,13 +1140,17 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
fake_clock_.SetSystemTimeMilliseconds(300);
// Validates that the optimized document log has a smaller size if something
- // expired
+ // expired. Namespace ids remain the same.
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
// DocumentId 0 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(
- doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result3,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result3.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(0));
+ EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
int64_t optimized_size3 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(optimized_size2, Gt(optimized_size3));
@@ -1146,15 +1160,228 @@ TEST_P(DocumentStoreTest, OptimizeInto) {
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2",
fake_clock_.GetSystemTimeMilliseconds()));
- // DocumentId 0 and 1 is removed, and DocumentId 2 is expired.
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId,
- kInvalidDocumentId)));
+ // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. Since no
+ // document with the namespace is added into new document store, the namespace
+ // id will be invalid.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result4,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(
+ optimize_result4.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
int64_t optimized_size4 =
filesystem_.GetFileSize(optimized_document_log.c_str());
EXPECT_THAT(optimized_size3, Gt(optimized_size4));
}
+TEST_P(DocumentStoreTest, OptimizeIntoMultipleNamespaces) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document0 = DocumentBuilder()
+ .SetKey("namespace1", "uri0")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace1", "uri3")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace3", "uri4")
+ .SetSchema("email")
+ .SetCreationTimestampMs(100)
+ .SetTtlMs(1000)
+ .Build();
+
+ // Nothing should have expired yet.
+ fake_clock_.SetSystemTimeMilliseconds(100);
+
+ ICING_ASSERT_OK(doc_store->Put(document0));
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+ ICING_ASSERT_OK(doc_store->Put(document3));
+ ICING_ASSERT_OK(doc_store->Put(document4));
+
+ std::string original_document_log = absl_ports::StrCat(
+ document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename());
+
+ int64_t original_size =
+ filesystem_.GetFileSize(original_document_log.c_str());
+
+ std::string optimized_dir = document_store_dir_ + "_optimize";
+ std::string optimized_document_log =
+ optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename();
+
+ // Validates that the optimized document log has the same size if nothing is
+ // deleted. Also namespace ids remain the same.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result1,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result1.document_id_old_to_new,
+ ElementsAre(0, 1, 2, 3, 4));
+ EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse());
+ int64_t optimized_size1 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_EQ(original_size, optimized_size1);
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 0 with namespace1.
+ // - Before: ["namespace1#uri0", "namespace1#uri1", "namespace2#uri2",
+ // "namespace1#uri3", "namespace3#uri4"]
+ // - After: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace1",
+ // "namespace2", "namespace3"] order. Since new_doc_store has the same order
+ // of namespace id assignment, namespace ids remain the same.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri0",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result2,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result2.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, 0, 1, 2, 3));
+ EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0, 1, 2));
+ EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse());
+ int64_t optimized_size2 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(original_size, Gt(optimized_size2));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 1 with namespace1.
+ // - Before: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2",
+ // "namespace1", "namespace3"] order, so namespace_id_old_to_new should
+ // reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri1",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result3,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result3.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, 1, 2));
+ EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(1, 0, 2));
+ EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse());
+ int64_t optimized_size3 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size2, Gt(optimized_size3));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 3 with namespace1.
+ // - Before: [nil, nil, "namespace2#uri2", "namespace1#uri3",
+ // "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2",
+ // "namespace3"] order and "namespace1" will be never assigned, so
+ // namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri3",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result4,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result4.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+ kInvalidDocumentId, 1));
+ EXPECT_THAT(optimize_result4.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, 0, 1));
+ EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse());
+ int64_t optimized_size4 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size3, Gt(optimized_size4));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 4 with namespace3.
+ // - Before: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"]
+ // - After: [nil, nil, "namespace2#uri2", nil, nil]
+ // In this case, new_doc_store will assign namespace ids in ["namespace2"]
+ // order and "namespace1", "namespace3" will be never assigned, so
+ // namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace3", "uri4",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result5,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result5.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0,
+ kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result5.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, 0, kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result5.should_rebuild_index, IsFalse());
+ int64_t optimized_size5 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size4, Gt(optimized_size5));
+
+ // Validates that the optimized document log has a smaller size if something
+ // is deleted.
+ ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+ ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+ // Delete DocumentId 2 with namespace2.
+ // - Before: [nil, nil, "namespace2#uri2", nil, nil]
+ // - After: [nil, nil, nil, nil, nil]
+ // In this case, all documents were deleted, so there will be no namespace ids
+ // either. namespace_id_old_to_new should reflect the change.
+ ICING_ASSERT_OK(doc_store->Delete("namespace2", "uri2",
+ fake_clock_.GetSystemTimeMilliseconds()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result6,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(
+ optimize_result6.document_id_old_to_new,
+ ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId,
+ kInvalidDocumentId, kInvalidDocumentId));
+ EXPECT_THAT(optimize_result6.namespace_id_old_to_new,
+ ElementsAre(kInvalidNamespaceId, kInvalidNamespaceId,
+ kInvalidNamespaceId));
+ EXPECT_THAT(optimize_result6.should_rebuild_index, IsFalse());
+ int64_t optimized_size6 =
+ filesystem_.GetFileSize(optimized_document_log.c_str());
+ EXPECT_THAT(optimized_size5, Gt(optimized_size6));
+}
+
TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1165,8 +1392,13 @@ TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) {
std::string optimized_dir = document_store_dir_ + "_optimize";
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
- EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()),
- IsOkAndHolds(IsEmpty()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::OptimizeResult optimize_result,
+ doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
+ EXPECT_THAT(optimize_result.document_id_old_to_new, IsEmpty());
+ EXPECT_THAT(optimize_result.namespace_id_old_to_new, IsEmpty());
+ EXPECT_THAT(optimize_result.should_rebuild_index, IsFalse());
}
TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) {
@@ -3427,6 +3659,7 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(DocumentProto(test_document1_)));
@@ -3455,7 +3688,8 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) {
schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
@@ -3471,6 +3705,7 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsFalse());
// There's some space at the beginning of the file (e.g. header, kmagic,
// etc) that is necessary to initialize the FileBackedProtoLog. We can't
@@ -3520,7 +3755,8 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) {
schema_store_.get()));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
@@ -3573,8 +3809,12 @@ TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
// needed, which will also trigger regeneration.
- EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
- InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
+ EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+ Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
+ // There should be no data loss, but we still need to regenerate derived files
+ // since we migrated document log from v0 to v1.
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
}
TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) {
@@ -4227,8 +4467,10 @@ TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
.Build();
// Check that we didn't lose anything. A migration also doesn't technically
- // count as a recovery.
+ // count as data loss, but we still have to regenerate derived files after
+ // migration.
EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+ EXPECT_THAT(create_result.derived_files_regenerated, IsTrue());
EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
@@ -4582,6 +4824,46 @@ TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) {
}
}
+TEST_P(DocumentStoreTest, GetDocumentIdByNamespaceFingerprintIdentifier) {
+ std::string dynamic_trie_uri_mapper_dir =
+ document_store_dir_ + "/key_mapper_dir";
+ std::string persistent_hash_map_uri_mapper_dir =
+ document_store_dir_ + "/uri_mapper";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv,
+ GetParam().use_persistent_hash_map,
+ PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
+ /*initialize_stats=*/nullptr));
+
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ NamespaceId namespace_id,
+ doc_store->GetNamespaceId(test_document1_.namespace_()));
+ NamespaceFingerprintIdentifier ns_fingerprint(
+ namespace_id,
+ /*target_str=*/test_document1_.uri());
+ if (GetParam().namespace_id_fingerprint) {
+ EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+ IsOkAndHolds(document_id));
+
+ NamespaceFingerprintIdentifier non_existing_ns_fingerprint(
+ namespace_id + 1, /*target_str=*/test_document1_.uri());
+ EXPECT_THAT(doc_store->GetDocumentId(non_existing_ns_fingerprint),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ } else {
+ EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
INSTANTIATE_TEST_SUITE_P(
DocumentStoreTest, DocumentStoreTest,
testing::Values(
diff --git a/icing/store/namespace-fingerprint-identifier.cc b/icing/store/namespace-fingerprint-identifier.cc
new file mode 100644
index 0000000..3910105
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.cc
@@ -0,0 +1,73 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/text_classifier/lib3/utils/hash/farmhash.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/store/namespace-id.h"
+#include "icing/util/encode-util.h"
+
+namespace icing {
+namespace lib {
+
+/* static */ libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+NamespaceFingerprintIdentifier::DecodeFromCString(
+ std::string_view encoded_cstr) {
+ if (encoded_cstr.size() < kMinEncodedLength) {
+ return absl_ports::InvalidArgumentError("Invalid length");
+ }
+
+ NamespaceId namespace_id = encode_util::DecodeIntFromCString(
+ encoded_cstr.substr(0, kEncodedNamespaceIdLength));
+ uint64_t fingerprint = encode_util::DecodeIntFromCString(
+ encoded_cstr.substr(kEncodedNamespaceIdLength));
+ return NamespaceFingerprintIdentifier(namespace_id, fingerprint);
+}
+
+NamespaceFingerprintIdentifier::NamespaceFingerprintIdentifier(
+ NamespaceId namespace_id, std::string_view target_str)
+ : namespace_id_(namespace_id),
+ fingerprint_(tc3farmhash::Fingerprint64(target_str)) {}
+
+std::string NamespaceFingerprintIdentifier::EncodeToCString() const {
+ // encoded_namespace_id_str should be 1 to 3 bytes based on the value of
+ // namespace_id.
+ std::string encoded_namespace_id_str =
+ encode_util::EncodeIntToCString(namespace_id_);
+ // Make encoded_namespace_id_str to fixed kEncodedNamespaceIdLength bytes.
+ while (encoded_namespace_id_str.size() < kEncodedNamespaceIdLength) {
+ // C string cannot contain 0 bytes, so we append it using 1, just like what
+ // we do in encode_util::EncodeIntToCString.
+ //
+ // The reason that this works is because DecodeIntToString decodes a byte
+ // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded
+ // namespace id that is less than 3 bytes, it means that the id contains
+ // unencoded leading 0x00. So here we're explicitly encoding those bytes as
+ // 0x01.
+ encoded_namespace_id_str.push_back(1);
+ }
+
+ return absl_ports::StrCat(encoded_namespace_id_str,
+ encode_util::EncodeIntToCString(fingerprint_));
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/namespace-fingerprint-identifier.h b/icing/store/namespace-fingerprint-identifier.h
new file mode 100644
index 0000000..d91ef94
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+#define ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/store/namespace-id.h"
+
+namespace icing {
+namespace lib {
+
+class NamespaceFingerprintIdentifier {
+ public:
+ static constexpr int kEncodedNamespaceIdLength = 3;
+ static constexpr int kMinEncodedLength = kEncodedNamespaceIdLength + 1;
+
+ static libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier>
+ DecodeFromCString(std::string_view encoded_cstr);
+
+ explicit NamespaceFingerprintIdentifier()
+ : namespace_id_(0), fingerprint_(0) {}
+
+ explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+ uint64_t fingerprint)
+ : namespace_id_(namespace_id), fingerprint_(fingerprint) {}
+
+ explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id,
+ std::string_view target_str);
+
+ std::string EncodeToCString() const;
+
+ bool operator<(const NamespaceFingerprintIdentifier& other) const {
+ if (namespace_id_ != other.namespace_id_) {
+ return namespace_id_ < other.namespace_id_;
+ }
+ return fingerprint_ < other.fingerprint_;
+ }
+
+ bool operator==(const NamespaceFingerprintIdentifier& other) const {
+ return namespace_id_ == other.namespace_id_ &&
+ fingerprint_ == other.fingerprint_;
+ }
+
+ NamespaceId namespace_id() const { return namespace_id_; }
+ uint64_t fingerprint() const { return fingerprint_; }
+
+ private:
+ NamespaceId namespace_id_;
+ uint64_t fingerprint_;
+} __attribute__((packed));
+static_assert(sizeof(NamespaceFingerprintIdentifier) == 10, "");
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_
diff --git a/icing/store/namespace-fingerprint-identifier_test.cc b/icing/store/namespace-fingerprint-identifier_test.cc
new file mode 100644
index 0000000..5f86156
--- /dev/null
+++ b/icing/store/namespace-fingerprint-identifier_test.cc
@@ -0,0 +1,148 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/namespace-fingerprint-identifier.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/store/namespace-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(NamespaceFingerprintIdentifierTest, EncodeToCString) {
+ NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier1.EncodeToCString(), Eq("\x01\x01\x01\x01"));
+
+ NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier2.EncodeToCString(), Eq("\x01\x01\x01\x02"));
+
+ NamespaceFingerprintIdentifier identifier3(
+ /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier3.EncodeToCString(),
+ Eq("\x01\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+ NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier4.EncodeToCString(), Eq("\x02\x01\x01\x01"));
+
+ NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier5.EncodeToCString(), Eq("\x02\x01\x01\x02"));
+
+ NamespaceFingerprintIdentifier identifier6(
+ /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier6.EncodeToCString(),
+ Eq("\x02\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+
+ NamespaceFingerprintIdentifier identifier7(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/0);
+ EXPECT_THAT(identifier7.EncodeToCString(), Eq("\x80\x80\x02\x01"));
+
+ NamespaceFingerprintIdentifier identifier8(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/1);
+ EXPECT_THAT(identifier8.EncodeToCString(), Eq("\x80\x80\x02\x02"));
+
+ NamespaceFingerprintIdentifier identifier9(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(identifier9.EncodeToCString(),
+ Eq("\x80\x80\x02\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02"));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+ MultipleCStringConversionsAreReversible) {
+ NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0,
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier1.EncodeToCString()),
+ IsOkAndHolds(identifier1));
+
+ NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0,
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier2.EncodeToCString()),
+ IsOkAndHolds(identifier2));
+
+ NamespaceFingerprintIdentifier identifier3(
+ /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier3.EncodeToCString()),
+ IsOkAndHolds(identifier3));
+
+ NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1,
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier4.EncodeToCString()),
+ IsOkAndHolds(identifier4));
+
+ NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1,
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier5.EncodeToCString()),
+ IsOkAndHolds(identifier5));
+
+ NamespaceFingerprintIdentifier identifier6(
+ /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier6.EncodeToCString()),
+ IsOkAndHolds(identifier6));
+
+ NamespaceFingerprintIdentifier identifier7(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/0);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier7.EncodeToCString()),
+ IsOkAndHolds(identifier7));
+
+ NamespaceFingerprintIdentifier identifier8(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/1);
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier8.EncodeToCString()),
+ IsOkAndHolds(identifier8));
+
+ NamespaceFingerprintIdentifier identifier9(
+ /*namespace_id=*/std::numeric_limits<NamespaceId>::max(),
+ /*fingerprint=*/std::numeric_limits<uint64_t>::max());
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(
+ identifier9.EncodeToCString()),
+ IsOkAndHolds(identifier9));
+}
+
+TEST(NamespaceFingerprintIdentifierTest,
+ DecodeFromCStringInvalidLengthShouldReturnError) {
+ std::string invalid_str = "\x01\x01\x01";
+ EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(invalid_str),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 2b17f13..07fe2c5 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -154,7 +154,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
UsageStore::Create(&filesystem_, test_dir_));
// Report a usage with timestamp 5.
- usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
/*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
@@ -162,13 +163,15 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) {
IsOkAndHolds(expected_scores));
// Report a usage with timestamp 1. The timestamp won't be updated.
- usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1));
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with timestamp 10. The timestamp should be updated.
- usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1));
expected_scores.usage_type1_last_used_timestamp_s = 10;
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -188,7 +191,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
UsageStore::Create(&filesystem_, test_dir_));
// Report a usage with type 1.
- usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
/*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
@@ -196,29 +200,34 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) {
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 1.
- usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1));
++expected_scores.usage_type1_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with type 2.
- usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
++expected_scores.usage_type2_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 2.
- usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1));
++expected_scores.usage_type2_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report a usage with type 3.
- usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
++expected_scores.usage_type3_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 3.
- usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1));
++expected_scores.usage_type3_count;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
@@ -457,7 +466,7 @@ TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
UsageStore::Create(&filesystem_, test_dir_));
// The stored timestamp in seconds should be the max value of uint32.
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
UsageStore::UsageScores expected_scores = CreateUsageScores(
/*type1_timestamp=*/std::numeric_limits<uint32_t>::max(),
/*type2_timestamp=*/0, /*type3_timestamp=*/0,
@@ -483,7 +492,7 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) {
// Report another usage with type 1.
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1);
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
// usage_type1_count should not change because it's already the max value.
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -571,7 +580,7 @@ TEST_F(UsageStoreTest, GetElementsFileSize) {
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
- usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+ ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1));
EXPECT_THAT(usage_store->GetElementsFileSize(),
IsOkAndHolds(Gt(empty_file_size)));
@@ -602,12 +611,13 @@ TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
UsageReport usage_report = CreateUsageReport(
"namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
for (int i = 0; i < 200; ++i) {
- usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+ ICING_ASSERT_OK(
+ usage_store->AddUsageReport(usage_report, /*document_id=*/i));
}
// We need to persist since iOS won't see the new disk allocations until after
// everything gets written.
- usage_store->PersistToDisk();
+ ICING_ASSERT_OK(usage_store->PersistToDisk());
EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
}