diff options
author | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-07 00:06:15 +0000 |
---|---|---|
committer | Android Build Coastguard Worker <android-build-coastguard-worker@google.com> | 2023-12-07 00:06:15 +0000 |
commit | c2be20616650e2f8ccb1d592654c08d6fda315ca (patch) | |
tree | 9892cbbd0fb247ce252a38f258e33741ca025a45 /icing/store | |
parent | 6d51031f6fac465f09e95982b19c4f86c88ee2fa (diff) | |
parent | ad9e08ff59020f5b52410dc093061d8ef715a781 (diff) | |
download | icing-android-14.0.0_r36.tar.gz |
Snap for 11186783 from ad9e08ff59020f5b52410dc093061d8ef715a781 to 24Q1-releaseandroid-14.0.0_r37android-14.0.0_r36android-14.0.0_r35android-14.0.0_r34android-14.0.0_r33android-14.0.0_r32android-14.0.0_r31android-14.0.0_r30android-14.0.0_r29android14-qpr2-s5-releaseandroid14-qpr2-s4-releaseandroid14-qpr2-s3-releaseandroid14-qpr2-s2-releaseandroid14-qpr2-s1-releaseandroid14-qpr2-release
Change-Id: Ia93a92877933208d4b5ed5f6473564134928dc04
Diffstat (limited to 'icing/store')
-rw-r--r-- | icing/store/document-store.cc | 158 | ||||
-rw-r--r-- | icing/store/document-store.h | 58 | ||||
-rw-r--r-- | icing/store/document-store_benchmark.cc | 5 | ||||
-rw-r--r-- | icing/store/document-store_test.cc | 326 | ||||
-rw-r--r-- | icing/store/namespace-fingerprint-identifier.cc | 73 | ||||
-rw-r--r-- | icing/store/namespace-fingerprint-identifier.h | 72 | ||||
-rw-r--r-- | icing/store/namespace-fingerprint-identifier_test.cc | 148 | ||||
-rw-r--r-- | icing/store/usage-store_test.cc | 38 |
8 files changed, 784 insertions, 94 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index 30de410..094eea1 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -53,6 +53,7 @@ #include "icing/store/document-id.h" #include "icing/store/document-log-creator.h" #include "icing/store/dynamic-trie-key-mapper.h" +#include "icing/store/namespace-fingerprint-identifier.h" #include "icing/store/namespace-id.h" #include "icing/store/persistent-hash-map-key-mapper.h" #include "icing/store/usage-store.h" @@ -142,25 +143,6 @@ std::string MakeCorpusMapperFilename(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename); } -// This function will encode a namespace id into a fixed 3 bytes string. -std::string EncodeNamespaceId(NamespaceId namespace_id) { - // encoding should be 1 to 3 bytes based on the value of namespace_id. - std::string encoding = encode_util::EncodeIntToCString(namespace_id); - // Make encoding to fixed 3 bytes. - while (encoding.size() < 3) { - // DynamicTrie cannot handle keys with 0 as bytes, so we append it using 1, - // just like what we do in encode_util::EncodeIntToCString. - // - // The reason that this works is because DecodeIntToString decodes a byte - // value of 0x01 as 0x00. When EncodeIntToCString returns a namespaceid - // encoding that is less than 3 bytes, it means that the id contains - // unencoded leading 0x00. So here we're explicitly encoding those bytes as - // 0x01. - encoding.push_back(1); - } - return encoding; -} - int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms, int64_t ttl_ms) { if (ttl_ms == 0) { @@ -269,9 +251,8 @@ std::string DocumentStore::MakeFingerprint( absl_ports::StrCat(namespace_, uri_or_schema)); return fingerprint_util::GetFingerprintString(fprint); } else { - return absl_ports::StrCat(EncodeNamespaceId(namespace_id), - encode_util::EncodeIntToCString( - tc3farmhash::Fingerprint64(uri_or_schema))); + return NamespaceFingerprintIdentifier(namespace_id, uri_or_schema) + .EncodeToCString(); } } @@ -328,13 +309,15 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( filesystem, base_dir, clock, schema_store, namespace_id_fingerprint, pre_mapping_fbv, use_persistent_hash_map, compression_level)); ICING_ASSIGN_OR_RETURN( - DataLoss data_loss, + InitializeResult initialize_result, document_store->Initialize(force_recovery_and_revalidate_documents, initialize_stats)); CreateResult create_result; create_result.document_store = std::move(document_store); - create_result.data_loss = data_loss; + create_result.data_loss = initialize_result.data_loss; + create_result.derived_files_regenerated = + initialize_result.derived_files_regenerated; return create_result; } @@ -380,9 +363,9 @@ libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create( return libtextclassifier3::Status::OK; } -libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( - bool force_recovery_and_revalidate_documents, - InitializeStatsProto* initialize_stats) { +libtextclassifier3::StatusOr<DocumentStore::InitializeResult> +DocumentStore::Initialize(bool force_recovery_and_revalidate_documents, + InitializeStatsProto* initialize_stats) { auto create_result_or = DocumentLogCreator::Create(filesystem_, base_dir_, compression_level_); @@ -400,6 +383,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( InitializeStatsProto::RecoveryCause recovery_cause = GetRecoveryCause(create_result, force_recovery_and_revalidate_documents); + bool derived_files_regenerated = false; if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) { ICING_LOG(INFO) << "Starting Document Store Recovery with cause=" << recovery_cause << ", and create result { new_file=" @@ -416,16 +400,18 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer(); libtextclassifier3::Status status = RegenerateDerivedFiles(force_recovery_and_revalidate_documents); - if (initialize_stats != nullptr && - recovery_cause != InitializeStatsProto::NONE) { + if (recovery_cause != InitializeStatsProto::NONE) { // Only consider it a recovery if the client forced a recovery or there // was data loss. Otherwise, this could just be the first time we're // initializing and generating derived files. - initialize_stats->set_document_store_recovery_latency_ms( - document_recovery_timer->GetElapsedMilliseconds()); - initialize_stats->set_document_store_recovery_cause(recovery_cause); - initialize_stats->set_document_store_data_status( - GetDataStatus(create_result.log_create_result.data_loss)); + derived_files_regenerated = true; + if (initialize_stats != nullptr) { + initialize_stats->set_document_store_recovery_latency_ms( + document_recovery_timer->GetElapsedMilliseconds()); + initialize_stats->set_document_store_recovery_cause(recovery_cause); + initialize_stats->set_document_store_data_status( + GetDataStatus(create_result.log_create_result.data_loss)); + } } if (!status.ok()) { ICING_LOG(ERROR) @@ -438,6 +424,7 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( << "Couldn't find derived files or failed to initialize them, " "regenerating derived files for DocumentStore."; std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer(); + derived_files_regenerated = true; libtextclassifier3::Status status = RegenerateDerivedFiles( /*force_recovery_and_revalidate_documents=*/false); if (initialize_stats != nullptr) { @@ -459,7 +446,10 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize( initialize_stats->set_num_documents(document_id_mapper_->num_elements()); } - return create_result.log_create_result.data_loss; + InitializeResult initialize_result = { + .data_loss = create_result.log_create_result.data_loss, + .derived_files_regenerated = derived_files_regenerated}; + return initialize_result; } libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() { @@ -1177,6 +1167,25 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId( "Failed to find DocumentId by key: ", name_space, ", ", uri)); } +libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId( + const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier) + const { + if (!namespace_id_fingerprint_) { + return absl_ports::FailedPreconditionError( + "Cannot lookup document id by namespace id + fingerprint without " + "enabling it on uri_mapper"); + } + + auto document_id_or = document_key_mapper_->Get( + namespace_fingerprint_identifier.EncodeToCString()); + if (document_id_or.ok()) { + return document_id_or.ValueOrDie(); + } + return absl_ports::Annotate( + std::move(document_id_or).status(), + "Failed to find DocumentId by namespace id + fingerprint"); +} + std::vector<std::string> DocumentStore::GetAllNamespaces() const { std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace = GetNamespaceIdsToNamespaces(namespace_mapper_.get()); @@ -1829,10 +1838,10 @@ libtextclassifier3::Status DocumentStore::Optimize() { return libtextclassifier3::Status::OK; } -libtextclassifier3::StatusOr<std::vector<DocumentId>> +libtextclassifier3::StatusOr<DocumentStore::OptimizeResult> DocumentStore::OptimizeInto(const std::string& new_directory, const LanguageSegmenter* lang_segmenter, - OptimizeStatsProto* stats) { + OptimizeStatsProto* stats) const { // Validates directory if (new_directory == base_dir_) { return absl_ports::InvalidArgumentError( @@ -1850,20 +1859,22 @@ DocumentStore::OptimizeInto(const std::string& new_directory, std::move(doc_store_create_result.document_store); // Writes all valid docs into new document store (new directory) - int size = document_id_mapper_->num_elements(); - int num_deleted = 0; - int num_expired = 0; + int document_cnt = document_id_mapper_->num_elements(); + int num_deleted_documents = 0; + int num_expired_documents = 0; UsageStore::UsageScores default_usage; - std::vector<DocumentId> document_id_old_to_new(size, kInvalidDocumentId); + + OptimizeResult result; + result.document_id_old_to_new.resize(document_cnt, kInvalidDocumentId); int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); - for (DocumentId document_id = 0; document_id < size; document_id++) { + for (DocumentId document_id = 0; document_id < document_cnt; document_id++) { auto document_or = Get(document_id, /*clear_internal_fields=*/false); if (absl_ports::IsNotFound(document_or.status())) { if (IsDeleted(document_id)) { - ++num_deleted; + ++num_deleted_documents; } else if (!GetNonExpiredDocumentFilterData(document_id, current_time_ms)) { - ++num_expired; + ++num_expired_documents; } continue; } else if (!document_or.ok()) { @@ -1903,7 +1914,8 @@ DocumentStore::OptimizeInto(const std::string& new_directory, return new_document_id_or.status(); } - document_id_old_to_new[document_id] = new_document_id_or.ValueOrDie(); + result.document_id_old_to_new[document_id] = + new_document_id_or.ValueOrDie(); // Copy over usage scores. ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores, @@ -1917,13 +1929,61 @@ DocumentStore::OptimizeInto(const std::string& new_directory, new_doc_store->SetUsageScores(new_document_id, usage_scores)); } } + + // Construct namespace_id_old_to_new + int namespace_cnt = namespace_mapper_->num_keys(); + std::unordered_map<NamespaceId, std::string> old_namespaces = + GetNamespaceIdsToNamespaces(namespace_mapper_.get()); + if (namespace_cnt != old_namespaces.size()) { + // This really shouldn't happen. If it really happens, then: + // - It won't block DocumentStore optimization, so don't return error here. + // - Instead, write a warning log here and hint the caller to rebuild index. + ICING_LOG(WARNING) << "Unexpected old namespace count " << namespace_cnt + << " vs " << old_namespaces.size(); + result.should_rebuild_index = true; + } else { + result.namespace_id_old_to_new.resize(namespace_cnt, kInvalidNamespaceId); + for (const auto& [old_namespace_id, ns] : old_namespaces) { + if (old_namespace_id >= result.namespace_id_old_to_new.size()) { + // This really shouldn't happen. If it really happens, then: + // - It won't block DocumentStore optimization, so don't return error + // here. + // - Instead, write a warning log here and hint the caller to rebuild + // index. + ICING_LOG(WARNING) << "Found unexpected namespace id " + << old_namespace_id << ". Should be in range 0 to " + << result.namespace_id_old_to_new.size() + << " (exclusive)."; + result.namespace_id_old_to_new.clear(); + result.should_rebuild_index = true; + break; + } + + auto new_namespace_id_or = new_doc_store->namespace_mapper_->Get(ns); + if (!new_namespace_id_or.ok()) { + if (absl_ports::IsNotFound(new_namespace_id_or.status())) { + continue; + } + // Real error, return it. + return std::move(new_namespace_id_or).status(); + } + + NamespaceId new_namespace_id = new_namespace_id_or.ValueOrDie(); + // Safe to use bracket to assign given that we've checked the range above. + result.namespace_id_old_to_new[old_namespace_id] = new_namespace_id; + } + } + if (stats != nullptr) { - stats->set_num_original_documents(size); - stats->set_num_deleted_documents(num_deleted); - stats->set_num_expired_documents(num_expired); + stats->set_num_original_documents(document_cnt); + stats->set_num_deleted_documents(num_deleted_documents); + stats->set_num_expired_documents(num_expired_documents); + stats->set_num_original_namespaces(namespace_cnt); + stats->set_num_deleted_namespaces( + namespace_cnt - new_doc_store->namespace_mapper_->num_keys()); } ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL)); - return document_id_old_to_new; + return result; } libtextclassifier3::StatusOr<DocumentStore::OptimizeInfo> diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 92d4286..c228e8b 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -43,6 +43,7 @@ #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/key-mapper.h" +#include "icing/store/namespace-fingerprint-identifier.h" #include "icing/store/namespace-id.h" #include "icing/store/usage-store.h" #include "icing/tokenization/language-segmenter.h" @@ -106,6 +107,11 @@ class DocumentStore { // unpersisted. This may be used to signal that any derived data off of the // document store may need to be regenerated. DataLoss data_loss; + + // A boolean flag indicating if derived files of the document store have + // been regenerated or not. This is usually a signal for callers to detect + // if any id assignment has changed (e.g. NamespaceId). + bool derived_files_regenerated; }; // Not copyable @@ -270,6 +276,21 @@ class DocumentStore { libtextclassifier3::StatusOr<DocumentId> GetDocumentId( std::string_view name_space, std::string_view uri) const; + // Helper method to find a DocumentId that is associated with the given + // NamespaceFingerprintIdentifier. + // + // NOTE: The DocumentId may refer to a invalid document (deleted + // or expired). Callers can call DoesDocumentExist(document_id) to ensure it + // refers to a valid Document. + // + // Returns: + // A DocumentId on success + // NOT_FOUND if the key doesn't exist + // INTERNAL_ERROR on IO error + libtextclassifier3::StatusOr<DocumentId> GetDocumentId( + const NamespaceFingerprintIdentifier& namespace_fingerprint_identifier) + const; + // Returns the CorpusId associated with the given namespace and schema. // // Returns: @@ -439,10 +460,23 @@ class DocumentStore { // INTERNAL_ERROR on IO error libtextclassifier3::Status Optimize(); + struct OptimizeResult { + // A vector that maps old document id to new document id. + std::vector<DocumentId> document_id_old_to_new; + + // A vector that maps old namespace id to new namespace id. Will be empty if + // should_rebuild_index is set to true. + std::vector<NamespaceId> namespace_id_old_to_new; + + // A boolean flag that hints the caller (usually IcingSearchEngine) if it + // should rebuild index instead of adopting the id changes via the 2 vectors + // above. It will be set to true if finding any id inconsistency. + bool should_rebuild_index = false; + }; // Copy data from current base directory into a new directory. Any outdated or - // deleted data won't be copied. During the process, document ids will be - // reassigned so any files / classes that are based on old document ids may be - // outdated. + // deleted data won't be copied. During the process, document/namespace ids + // will be reassigned so any files / classes that are based on old + // document/namespace ids may be outdated. // // stats will be set if non-null. // @@ -451,12 +485,14 @@ class DocumentStore { // method based on device usage. // // Returns: - // A vector that maps from old document id to new document id on success + // OptimizeResult which contains a vector mapping from old document id to + // new document id and another vector mapping from old namespace id to new + // namespace id, on success // INVALID_ARGUMENT if new_directory is same as current base directory // INTERNAL_ERROR on IO error - libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeInto( + libtextclassifier3::StatusOr<OptimizeResult> OptimizeInto( const std::string& new_directory, const LanguageSegmenter* lang_segmenter, - OptimizeStatsProto* stats = nullptr); + OptimizeStatsProto* stats = nullptr) const; // Calculates status for a potential Optimize call. Includes how many docs // there are vs how many would be optimized away. And also includes an @@ -580,7 +616,15 @@ class DocumentStore { // worry about this field. bool initialized_ = false; - libtextclassifier3::StatusOr<DataLoss> Initialize( + struct InitializeResult { + DataLoss data_loss; + + // A boolean flag indicating if derived files of the document store have + // been regenerated or not. This is usually a signal for callers to detect + // if any id assignment has changed (e.g. NamespaceId). + bool derived_files_regenerated; + }; + libtextclassifier3::StatusOr<InitializeResult> Initialize( bool force_recovery_and_revalidate_documents, InitializeStatsProto* initialize_stats); diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc index 5b9c568..46d76d8 100644 --- a/icing/store/document-store_benchmark.cc +++ b/icing/store/document-store_benchmark.cc @@ -163,8 +163,9 @@ void BM_DoesDocumentExistBenchmark(benchmark::State& state) { // stuff. ICING_ASSERT_OK(document_store->Put( CreateDocument("namespace", /*uri=*/std::to_string(i)))); - document_store->Delete("namespace", /*uri=*/std::to_string(i), - clock.GetSystemTimeMilliseconds()); + ICING_ASSERT_OK(document_store->Delete("namespace", + /*uri=*/std::to_string(i), + clock.GetSystemTimeMilliseconds())); } std::default_random_engine random; diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index a9c47f0..2d4cd99 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -47,6 +47,7 @@ #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" #include "icing/store/document-log-creator.h" +#include "icing/store/namespace-fingerprint-identifier.h" #include "icing/store/namespace-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" @@ -1050,7 +1051,7 @@ TEST_P(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { IsOkAndHolds(EqualsProto(message_document))); } -TEST_P(DocumentStoreTest, OptimizeInto) { +TEST_P(DocumentStoreTest, OptimizeIntoSingleNamespace) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, @@ -1103,24 +1104,33 @@ TEST_P(DocumentStoreTest, OptimizeInto) { optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename(); // Validates that the optimized document log has the same size if nothing is - // deleted + // deleted. Also namespace ids remain the same. ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), - IsOkAndHolds(ElementsAre(0, 1, 2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result1, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result1.document_id_old_to_new, ElementsAre(0, 1, 2)); + EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0)); + EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse()); int64_t optimized_size1 = filesystem_.GetFileSize(optimized_document_log.c_str()); EXPECT_EQ(original_size, optimized_size1); // Validates that the optimized document log has a smaller size if something - // is deleted + // is deleted. Namespace ids remain the same. ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1", fake_clock_.GetSystemTimeMilliseconds())); // DocumentId 0 is removed. - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), - IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, 1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result2, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result2.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, 0, 1)); + EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0)); + EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse()); int64_t optimized_size2 = filesystem_.GetFileSize(optimized_document_log.c_str()); EXPECT_THAT(original_size, Gt(optimized_size2)); @@ -1130,13 +1140,17 @@ TEST_P(DocumentStoreTest, OptimizeInto) { fake_clock_.SetSystemTimeMilliseconds(300); // Validates that the optimized document log has a smaller size if something - // expired + // expired. Namespace ids remain the same. ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); // DocumentId 0 is removed, and DocumentId 2 is expired. - EXPECT_THAT( - doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), - IsOkAndHolds(ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result3, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result3.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, 0, kInvalidDocumentId)); + EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(0)); + EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse()); int64_t optimized_size3 = filesystem_.GetFileSize(optimized_document_log.c_str()); EXPECT_THAT(optimized_size2, Gt(optimized_size3)); @@ -1146,15 +1160,228 @@ TEST_P(DocumentStoreTest, OptimizeInto) { ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); ICING_ASSERT_OK(doc_store->Delete("namespace", "uri2", fake_clock_.GetSystemTimeMilliseconds())); - // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), - IsOkAndHolds(ElementsAre(kInvalidDocumentId, kInvalidDocumentId, - kInvalidDocumentId))); + // DocumentId 0 and 1 is removed, and DocumentId 2 is expired. Since no + // document with the namespace is added into new document store, the namespace + // id will be invalid. + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result4, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT( + optimize_result4.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId)); + EXPECT_THAT(optimize_result4.namespace_id_old_to_new, + ElementsAre(kInvalidNamespaceId)); + EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse()); int64_t optimized_size4 = filesystem_.GetFileSize(optimized_document_log.c_str()); EXPECT_THAT(optimized_size3, Gt(optimized_size4)); } +TEST_P(DocumentStoreTest, OptimizeIntoMultipleNamespaces) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + CreateDocumentStore(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + DocumentProto document0 = DocumentBuilder() + .SetKey("namespace1", "uri0") + .SetSchema("email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + DocumentProto document1 = DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + DocumentProto document2 = DocumentBuilder() + .SetKey("namespace2", "uri2") + .SetSchema("email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + DocumentProto document3 = DocumentBuilder() + .SetKey("namespace1", "uri3") + .SetSchema("email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + DocumentProto document4 = DocumentBuilder() + .SetKey("namespace3", "uri4") + .SetSchema("email") + .SetCreationTimestampMs(100) + .SetTtlMs(1000) + .Build(); + + // Nothing should have expired yet. + fake_clock_.SetSystemTimeMilliseconds(100); + + ICING_ASSERT_OK(doc_store->Put(document0)); + ICING_ASSERT_OK(doc_store->Put(document1)); + ICING_ASSERT_OK(doc_store->Put(document2)); + ICING_ASSERT_OK(doc_store->Put(document3)); + ICING_ASSERT_OK(doc_store->Put(document4)); + + std::string original_document_log = absl_ports::StrCat( + document_store_dir_, "/", DocumentLogCreator::GetDocumentLogFilename()); + + int64_t original_size = + filesystem_.GetFileSize(original_document_log.c_str()); + + std::string optimized_dir = document_store_dir_ + "_optimize"; + std::string optimized_document_log = + optimized_dir + "/" + DocumentLogCreator::GetDocumentLogFilename(); + + // Validates that the optimized document log has the same size if nothing is + // deleted. Also namespace ids remain the same. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result1, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result1.document_id_old_to_new, + ElementsAre(0, 1, 2, 3, 4)); + EXPECT_THAT(optimize_result1.namespace_id_old_to_new, ElementsAre(0, 1, 2)); + EXPECT_THAT(optimize_result1.should_rebuild_index, IsFalse()); + int64_t optimized_size1 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_EQ(original_size, optimized_size1); + + // Validates that the optimized document log has a smaller size if something + // is deleted. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + // Delete DocumentId 0 with namespace1. + // - Before: ["namespace1#uri0", "namespace1#uri1", "namespace2#uri2", + // "namespace1#uri3", "namespace3#uri4"] + // - After: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3", + // "namespace3#uri4"] + // In this case, new_doc_store will assign namespace ids in ["namespace1", + // "namespace2", "namespace3"] order. Since new_doc_store has the same order + // of namespace id assignment, namespace ids remain the same. + ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri0", + fake_clock_.GetSystemTimeMilliseconds())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result2, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result2.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, 0, 1, 2, 3)); + EXPECT_THAT(optimize_result2.namespace_id_old_to_new, ElementsAre(0, 1, 2)); + EXPECT_THAT(optimize_result2.should_rebuild_index, IsFalse()); + int64_t optimized_size2 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_THAT(original_size, Gt(optimized_size2)); + + // Validates that the optimized document log has a smaller size if something + // is deleted. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + // Delete DocumentId 1 with namespace1. + // - Before: [nil, "namespace1#uri1", "namespace2#uri2", "namespace1#uri3", + // "namespace3#uri4"] + // - After: [nil, nil, "namespace2#uri2", "namespace1#uri3", + // "namespace3#uri4"] + // In this case, new_doc_store will assign namespace ids in ["namespace2", + // "namespace1", "namespace3"] order, so namespace_id_old_to_new should + // reflect the change. + ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri1", + fake_clock_.GetSystemTimeMilliseconds())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result3, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result3.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, 1, 2)); + EXPECT_THAT(optimize_result3.namespace_id_old_to_new, ElementsAre(1, 0, 2)); + EXPECT_THAT(optimize_result3.should_rebuild_index, IsFalse()); + int64_t optimized_size3 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_THAT(optimized_size2, Gt(optimized_size3)); + + // Validates that the optimized document log has a smaller size if something + // is deleted. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + // Delete DocumentId 3 with namespace1. + // - Before: [nil, nil, "namespace2#uri2", "namespace1#uri3", + // "namespace3#uri4"] + // - After: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"] + // In this case, new_doc_store will assign namespace ids in ["namespace2", + // "namespace3"] order and "namespace1" will be never assigned, so + // namespace_id_old_to_new should reflect the change. + ICING_ASSERT_OK(doc_store->Delete("namespace1", "uri3", + fake_clock_.GetSystemTimeMilliseconds())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result4, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result4.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, + kInvalidDocumentId, 1)); + EXPECT_THAT(optimize_result4.namespace_id_old_to_new, + ElementsAre(kInvalidNamespaceId, 0, 1)); + EXPECT_THAT(optimize_result4.should_rebuild_index, IsFalse()); + int64_t optimized_size4 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_THAT(optimized_size3, Gt(optimized_size4)); + + // Validates that the optimized document log has a smaller size if something + // is deleted. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + // Delete DocumentId 4 with namespace3. + // - Before: [nil, nil, "namespace2#uri2", nil, "namespace3#uri4"] + // - After: [nil, nil, "namespace2#uri2", nil, nil] + // In this case, new_doc_store will assign namespace ids in ["namespace2"] + // order and "namespace1", "namespace3" will be never assigned, so + // namespace_id_old_to_new should reflect the change. + ICING_ASSERT_OK(doc_store->Delete("namespace3", "uri4", + fake_clock_.GetSystemTimeMilliseconds())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result5, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result5.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, kInvalidDocumentId, 0, + kInvalidDocumentId, kInvalidDocumentId)); + EXPECT_THAT(optimize_result5.namespace_id_old_to_new, + ElementsAre(kInvalidNamespaceId, 0, kInvalidNamespaceId)); + EXPECT_THAT(optimize_result5.should_rebuild_index, IsFalse()); + int64_t optimized_size5 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_THAT(optimized_size4, Gt(optimized_size5)); + + // Validates that the optimized document log has a smaller size if something + // is deleted. + ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); + ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); + // Delete DocumentId 2 with namespace2. + // - Before: [nil, nil, "namespace2#uri2", nil, nil] + // - After: [nil, nil, nil, nil, nil] + // In this case, all documents were deleted, so there will be no namespace ids + // either. namespace_id_old_to_new should reflect the change. + ICING_ASSERT_OK(doc_store->Delete("namespace2", "uri2", + fake_clock_.GetSystemTimeMilliseconds())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result6, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT( + optimize_result6.document_id_old_to_new, + ElementsAre(kInvalidDocumentId, kInvalidDocumentId, kInvalidDocumentId, + kInvalidDocumentId, kInvalidDocumentId)); + EXPECT_THAT(optimize_result6.namespace_id_old_to_new, + ElementsAre(kInvalidNamespaceId, kInvalidNamespaceId, + kInvalidNamespaceId)); + EXPECT_THAT(optimize_result6.should_rebuild_index, IsFalse()); + int64_t optimized_size6 = + filesystem_.GetFileSize(optimized_document_log.c_str()); + EXPECT_THAT(optimized_size5, Gt(optimized_size6)); +} + TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -1165,8 +1392,13 @@ TEST_P(DocumentStoreTest, OptimizeIntoForEmptyDocumentStore) { std::string optimized_dir = document_store_dir_ + "_optimize"; ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str())); ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str())); - EXPECT_THAT(doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()), - IsOkAndHolds(IsEmpty())); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::OptimizeResult optimize_result, + doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get())); + EXPECT_THAT(optimize_result.document_id_old_to_new, IsEmpty()); + EXPECT_THAT(optimize_result.namespace_id_old_to_new, IsEmpty()); + EXPECT_THAT(optimize_result.should_rebuild_index, IsFalse()); } TEST_P(DocumentStoreTest, ShouldRecoverFromDataLoss) { @@ -3427,6 +3659,7 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); + EXPECT_THAT(create_result.derived_files_regenerated, IsFalse()); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(DocumentProto(test_document1_))); @@ -3455,7 +3688,8 @@ TEST_P(DocumentStoreTest, DetectPartialDataLoss) { schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); - ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL)); + EXPECT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL)); + EXPECT_THAT(create_result.derived_files_regenerated, IsTrue()); } TEST_P(DocumentStoreTest, DetectCompleteDataLoss) { @@ -3471,6 +3705,7 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) { std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); + EXPECT_THAT(create_result.derived_files_regenerated, IsFalse()); // There's some space at the beginning of the file (e.g. header, kmagic, // etc) that is necessary to initialize the FileBackedProtoLog. We can't @@ -3520,7 +3755,8 @@ TEST_P(DocumentStoreTest, DetectCompleteDataLoss) { schema_store_.get())); std::unique_ptr<DocumentStore> doc_store = std::move(create_result.document_store); - ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE)); + EXPECT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE)); + EXPECT_THAT(create_result.derived_files_regenerated, IsTrue()); } TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { @@ -3573,8 +3809,12 @@ TEST_P(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { std::move(create_result.document_store); // The document log is using the legacy v0 format so that a migration is // needed, which will also trigger regeneration. - EXPECT_EQ(initialize_stats.document_store_recovery_cause(), - InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT); + EXPECT_THAT(initialize_stats.document_store_recovery_cause(), + Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT)); + // There should be no data loss, but we still need to regenerate derived files + // since we migrated document log from v0 to v1. + EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); + EXPECT_THAT(create_result.derived_files_regenerated, IsTrue()); } TEST_P(DocumentStoreTest, DocumentStoreStorageInfo) { @@ -4227,8 +4467,10 @@ TEST_P(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) { .Build(); // Check that we didn't lose anything. A migration also doesn't technically - // count as a recovery. + // count as data loss, but we still have to regenerate derived files after + // migration. EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE)); + EXPECT_THAT(create_result.derived_files_regenerated, IsTrue()); EXPECT_EQ(initialize_stats.document_store_recovery_cause(), InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT); @@ -4582,6 +4824,46 @@ TEST_P(DocumentStoreTest, SameKeyMapperTypeShouldNotRegenerateDerivedFiles) { } } +TEST_P(DocumentStoreTest, GetDocumentIdByNamespaceFingerprintIdentifier) { + std::string dynamic_trie_uri_mapper_dir = + document_store_dir_ + "/key_mapper_dir"; + std::string persistent_hash_map_uri_mapper_dir = + document_store_dir_ + "/uri_mapper"; + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create( + &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(), + /*force_recovery_and_revalidate_documents=*/false, + GetParam().namespace_id_fingerprint, GetParam().pre_mapping_fbv, + GetParam().use_persistent_hash_map, + PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel, + /*initialize_stats=*/nullptr)); + + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + doc_store->Put(test_document1_)); + + ICING_ASSERT_OK_AND_ASSIGN( + NamespaceId namespace_id, + doc_store->GetNamespaceId(test_document1_.namespace_())); + NamespaceFingerprintIdentifier ns_fingerprint( + namespace_id, + /*target_str=*/test_document1_.uri()); + if (GetParam().namespace_id_fingerprint) { + EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint), + IsOkAndHolds(document_id)); + + NamespaceFingerprintIdentifier non_existing_ns_fingerprint( + namespace_id + 1, /*target_str=*/test_document1_.uri()); + EXPECT_THAT(doc_store->GetDocumentId(non_existing_ns_fingerprint), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + } else { + EXPECT_THAT(doc_store->GetDocumentId(ns_fingerprint), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + } +} + INSTANTIATE_TEST_SUITE_P( DocumentStoreTest, DocumentStoreTest, testing::Values( diff --git a/icing/store/namespace-fingerprint-identifier.cc b/icing/store/namespace-fingerprint-identifier.cc new file mode 100644 index 0000000..3910105 --- /dev/null +++ b/icing/store/namespace-fingerprint-identifier.cc @@ -0,0 +1,73 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/store/namespace-fingerprint-identifier.h" + +#include <cstdint> +#include <string> +#include <string_view> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/text_classifier/lib3/utils/hash/farmhash.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/store/namespace-id.h" +#include "icing/util/encode-util.h" + +namespace icing { +namespace lib { + +/* static */ libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier> +NamespaceFingerprintIdentifier::DecodeFromCString( + std::string_view encoded_cstr) { + if (encoded_cstr.size() < kMinEncodedLength) { + return absl_ports::InvalidArgumentError("Invalid length"); + } + + NamespaceId namespace_id = encode_util::DecodeIntFromCString( + encoded_cstr.substr(0, kEncodedNamespaceIdLength)); + uint64_t fingerprint = encode_util::DecodeIntFromCString( + encoded_cstr.substr(kEncodedNamespaceIdLength)); + return NamespaceFingerprintIdentifier(namespace_id, fingerprint); +} + +NamespaceFingerprintIdentifier::NamespaceFingerprintIdentifier( + NamespaceId namespace_id, std::string_view target_str) + : namespace_id_(namespace_id), + fingerprint_(tc3farmhash::Fingerprint64(target_str)) {} + +std::string NamespaceFingerprintIdentifier::EncodeToCString() const { + // encoded_namespace_id_str should be 1 to 3 bytes based on the value of + // namespace_id. + std::string encoded_namespace_id_str = + encode_util::EncodeIntToCString(namespace_id_); + // Make encoded_namespace_id_str to fixed kEncodedNamespaceIdLength bytes. + while (encoded_namespace_id_str.size() < kEncodedNamespaceIdLength) { + // C string cannot contain 0 bytes, so we append it using 1, just like what + // we do in encode_util::EncodeIntToCString. + // + // The reason that this works is because DecodeIntToString decodes a byte + // value of 0x01 as 0x00. When EncodeIntToCString returns an encoded + // namespace id that is less than 3 bytes, it means that the id contains + // unencoded leading 0x00. So here we're explicitly encoding those bytes as + // 0x01. + encoded_namespace_id_str.push_back(1); + } + + return absl_ports::StrCat(encoded_namespace_id_str, + encode_util::EncodeIntToCString(fingerprint_)); +} + +} // namespace lib +} // namespace icing diff --git a/icing/store/namespace-fingerprint-identifier.h b/icing/store/namespace-fingerprint-identifier.h new file mode 100644 index 0000000..d91ef94 --- /dev/null +++ b/icing/store/namespace-fingerprint-identifier.h @@ -0,0 +1,72 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_ +#define ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_ + +#include <cstdint> +#include <string> +#include <string_view> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/store/namespace-id.h" + +namespace icing { +namespace lib { + +class NamespaceFingerprintIdentifier { + public: + static constexpr int kEncodedNamespaceIdLength = 3; + static constexpr int kMinEncodedLength = kEncodedNamespaceIdLength + 1; + + static libtextclassifier3::StatusOr<NamespaceFingerprintIdentifier> + DecodeFromCString(std::string_view encoded_cstr); + + explicit NamespaceFingerprintIdentifier() + : namespace_id_(0), fingerprint_(0) {} + + explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id, + uint64_t fingerprint) + : namespace_id_(namespace_id), fingerprint_(fingerprint) {} + + explicit NamespaceFingerprintIdentifier(NamespaceId namespace_id, + std::string_view target_str); + + std::string EncodeToCString() const; + + bool operator<(const NamespaceFingerprintIdentifier& other) const { + if (namespace_id_ != other.namespace_id_) { + return namespace_id_ < other.namespace_id_; + } + return fingerprint_ < other.fingerprint_; + } + + bool operator==(const NamespaceFingerprintIdentifier& other) const { + return namespace_id_ == other.namespace_id_ && + fingerprint_ == other.fingerprint_; + } + + NamespaceId namespace_id() const { return namespace_id_; } + uint64_t fingerprint() const { return fingerprint_; } + + private: + NamespaceId namespace_id_; + uint64_t fingerprint_; +} __attribute__((packed)); +static_assert(sizeof(NamespaceFingerprintIdentifier) == 10, ""); + +} // namespace lib +} // namespace icing + +#endif // ICING_STORE_NAMESPACE_FINGERPRINT_IDENTIFIER_H_ diff --git a/icing/store/namespace-fingerprint-identifier_test.cc b/icing/store/namespace-fingerprint-identifier_test.cc new file mode 100644 index 0000000..5f86156 --- /dev/null +++ b/icing/store/namespace-fingerprint-identifier_test.cc @@ -0,0 +1,148 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/store/namespace-fingerprint-identifier.h" + +#include <cstdint> +#include <limits> +#include <string> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/store/namespace-id.h" +#include "icing/testing/common-matchers.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::Eq; + +TEST(NamespaceFingerprintIdentifierTest, EncodeToCString) { + NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0, + /*fingerprint=*/0); + EXPECT_THAT(identifier1.EncodeToCString(), Eq("\x01\x01\x01\x01")); + + NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0, + /*fingerprint=*/1); + EXPECT_THAT(identifier2.EncodeToCString(), Eq("\x01\x01\x01\x02")); + + NamespaceFingerprintIdentifier identifier3( + /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(identifier3.EncodeToCString(), + Eq("\x01\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02")); + + NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1, + /*fingerprint=*/0); + EXPECT_THAT(identifier4.EncodeToCString(), Eq("\x02\x01\x01\x01")); + + NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1, + /*fingerprint=*/1); + EXPECT_THAT(identifier5.EncodeToCString(), Eq("\x02\x01\x01\x02")); + + NamespaceFingerprintIdentifier identifier6( + /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(identifier6.EncodeToCString(), + Eq("\x02\x01\x01\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02")); + + NamespaceFingerprintIdentifier identifier7( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/0); + EXPECT_THAT(identifier7.EncodeToCString(), Eq("\x80\x80\x02\x01")); + + NamespaceFingerprintIdentifier identifier8( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/1); + EXPECT_THAT(identifier8.EncodeToCString(), Eq("\x80\x80\x02\x02")); + + NamespaceFingerprintIdentifier identifier9( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(identifier9.EncodeToCString(), + Eq("\x80\x80\x02\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02")); +} + +TEST(NamespaceFingerprintIdentifierTest, + MultipleCStringConversionsAreReversible) { + NamespaceFingerprintIdentifier identifier1(/*namespace_id=*/0, + /*fingerprint=*/0); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier1.EncodeToCString()), + IsOkAndHolds(identifier1)); + + NamespaceFingerprintIdentifier identifier2(/*namespace_id=*/0, + /*fingerprint=*/1); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier2.EncodeToCString()), + IsOkAndHolds(identifier2)); + + NamespaceFingerprintIdentifier identifier3( + /*namespace_id=*/0, /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier3.EncodeToCString()), + IsOkAndHolds(identifier3)); + + NamespaceFingerprintIdentifier identifier4(/*namespace_id=*/1, + /*fingerprint=*/0); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier4.EncodeToCString()), + IsOkAndHolds(identifier4)); + + NamespaceFingerprintIdentifier identifier5(/*namespace_id=*/1, + /*fingerprint=*/1); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier5.EncodeToCString()), + IsOkAndHolds(identifier5)); + + NamespaceFingerprintIdentifier identifier6( + /*namespace_id=*/1, /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier6.EncodeToCString()), + IsOkAndHolds(identifier6)); + + NamespaceFingerprintIdentifier identifier7( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/0); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier7.EncodeToCString()), + IsOkAndHolds(identifier7)); + + NamespaceFingerprintIdentifier identifier8( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/1); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier8.EncodeToCString()), + IsOkAndHolds(identifier8)); + + NamespaceFingerprintIdentifier identifier9( + /*namespace_id=*/std::numeric_limits<NamespaceId>::max(), + /*fingerprint=*/std::numeric_limits<uint64_t>::max()); + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString( + identifier9.EncodeToCString()), + IsOkAndHolds(identifier9)); +} + +TEST(NamespaceFingerprintIdentifierTest, + DecodeFromCStringInvalidLengthShouldReturnError) { + std::string invalid_str = "\x01\x01\x01"; + EXPECT_THAT(NamespaceFingerprintIdentifier::DecodeFromCString(invalid_str), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc index 2b17f13..07fe2c5 100644 --- a/icing/store/usage-store_test.cc +++ b/icing/store/usage-store_test.cc @@ -154,7 +154,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) { UsageStore::Create(&filesystem_, test_dir_)); // Report a usage with timestamp 5. - usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1)); UsageStore::UsageScores expected_scores = CreateUsageScores( /*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0, /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0); @@ -162,13 +163,15 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateLastUsedTimestamp) { IsOkAndHolds(expected_scores)); // Report a usage with timestamp 1. The timestamp won't be updated. - usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_time1, /*document_id=*/1)); ++expected_scores.usage_type1_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report a usage with timestamp 10. The timestamp should be updated. - usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_time10, /*document_id=*/1)); expected_scores.usage_type1_last_used_timestamp_s = 10; ++expected_scores.usage_type1_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), @@ -188,7 +191,8 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) { UsageStore::Create(&filesystem_, test_dir_)); // Report a usage with type 1. - usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1)); UsageStore::UsageScores expected_scores = CreateUsageScores( /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0, /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0); @@ -196,29 +200,34 @@ TEST_F(UsageStoreTest, AddUsageReportShouldUpdateCounts) { EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report another usage with type 1. - usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1)); ++expected_scores.usage_type1_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report a usage with type 2. - usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1)); ++expected_scores.usage_type2_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report another usage with type 2. - usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type2, /*document_id=*/1)); ++expected_scores.usage_type2_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report a usage with type 3. - usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1)); ++expected_scores.usage_type3_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); // Report another usage with type 3. - usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report_type3, /*document_id=*/1)); ++expected_scores.usage_type3_count; EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), IsOkAndHolds(expected_scores)); @@ -457,7 +466,7 @@ TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) { UsageStore::Create(&filesystem_, test_dir_)); // The stored timestamp in seconds should be the max value of uint32. - usage_store->AddUsageReport(usage_report, /*document_id=*/1); + ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1)); UsageStore::UsageScores expected_scores = CreateUsageScores( /*type1_timestamp=*/std::numeric_limits<uint32_t>::max(), /*type2_timestamp=*/0, /*type3_timestamp=*/0, @@ -483,7 +492,7 @@ TEST_F(UsageStoreTest, CountsShouldNotOverflow) { // Report another usage with type 1. UsageReport usage_report = CreateUsageReport( "namespace", "uri", /*timestamp_ms=*/0, UsageReport::USAGE_TYPE1); - usage_store->AddUsageReport(usage_report, /*document_id=*/1); + ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1)); // usage_type1_count should not change because it's already the max value. EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1), @@ -571,7 +580,7 @@ TEST_F(UsageStoreTest, GetElementsFileSize) { UsageReport usage_report = CreateUsageReport( "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1); - usage_store->AddUsageReport(usage_report, /*document_id=*/1); + ICING_ASSERT_OK(usage_store->AddUsageReport(usage_report, /*document_id=*/1)); EXPECT_THAT(usage_store->GetElementsFileSize(), IsOkAndHolds(Gt(empty_file_size))); @@ -602,12 +611,13 @@ TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) { UsageReport usage_report = CreateUsageReport( "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1); for (int i = 0; i < 200; ++i) { - usage_store->AddUsageReport(usage_report, /*document_id=*/i); + ICING_ASSERT_OK( + usage_store->AddUsageReport(usage_report, /*document_id=*/i)); } // We need to persist since iOS won't see the new disk allocations until after // everything gets written. - usage_store->PersistToDisk(); + ICING_ASSERT_OK(usage_store->PersistToDisk()); EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage))); } |