diff options
Diffstat (limited to 'icing/store/document-store.cc')
-rw-r--r-- | icing/store/document-store.cc | 79 |
1 files changed, 43 insertions, 36 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index b49d0de..e99bacf 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -1010,7 +1010,8 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut( // Delete the old document. It's fine if it's not found since it might have // been deleted previously. - auto delete_status = Delete(old_document_id); + auto delete_status = + Delete(old_document_id, clock_.GetSystemTimeMilliseconds()); if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) { // Real error, pass it up. return delete_status; @@ -1055,7 +1056,9 @@ libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get( libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get( DocumentId document_id, bool clear_internal_fields) const { - auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); + auto document_filter_data_optional_ = + GetAliveDocumentFilterData(document_id, current_time_ms); if (!document_filter_data_optional_) { // The document doesn't exist. Let's check if the document id is invalid, we // will return InvalidArgumentError. Otherwise we should return NOT_FOUND @@ -1118,6 +1121,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const { GetNamespaceIdsToNamespaces(namespace_mapper_.get()); std::unordered_set<NamespaceId> existing_namespace_ids; + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < filter_cache_->num_elements(); ++document_id) { // filter_cache_->Get can only fail if document_id is < 0 @@ -1130,7 +1134,7 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const { } const DocumentFilterData* data = status_or_data.ValueOrDie(); - if (GetAliveDocumentFilterData(document_id)) { + if (GetAliveDocumentFilterData(document_id, current_time_ms)) { existing_namespace_ids.insert(data->namespace_id()); } } @@ -1144,14 +1148,11 @@ std::vector<std::string> DocumentStore::GetAllNamespaces() const { } std::optional<DocumentFilterData> DocumentStore::GetAliveDocumentFilterData( - DocumentId document_id) const { - if (!IsDocumentIdValid(document_id)) { - return std::nullopt; - } + DocumentId document_id, int64_t current_time_ms) const { if (IsDeleted(document_id)) { return std::nullopt; } - return GetNonExpiredDocumentFilterData(document_id); + return GetNonExpiredDocumentFilterData(document_id, current_time_ms); } bool DocumentStore::IsDeleted(DocumentId document_id) const { @@ -1171,7 +1172,8 @@ bool DocumentStore::IsDeleted(DocumentId document_id) const { // Returns DocumentFilterData if the document is not expired. Otherwise, // std::nullopt. std::optional<DocumentFilterData> -DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const { +DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id, + int64_t current_time_ms) const { auto filter_data_or = filter_cache_->GetCopy(document_id); if (!filter_data_or.ok()) { // This would only happen if document_id is out of range of the @@ -1184,15 +1186,15 @@ DocumentStore::GetNonExpiredDocumentFilterData(DocumentId document_id) const { DocumentFilterData document_filter_data = filter_data_or.ValueOrDie(); // Check if it's past the expiration time - if (clock_.GetSystemTimeMilliseconds() >= - document_filter_data.expiration_timestamp_ms()) { + if (current_time_ms >= document_filter_data.expiration_timestamp_ms()) { return std::nullopt; } return document_filter_data; } libtextclassifier3::Status DocumentStore::Delete( - const std::string_view name_space, const std::string_view uri) { + const std::string_view name_space, const std::string_view uri, + int64_t current_time_ms) { // Try to get the DocumentId first auto document_id_or = GetDocumentId(name_space, uri); if (!document_id_or.ok()) { @@ -1201,11 +1203,13 @@ libtextclassifier3::Status DocumentStore::Delete( absl_ports::StrCat("Failed to delete Document. namespace: ", name_space, ", uri: ", uri)); } - return Delete(document_id_or.ValueOrDie()); + return Delete(document_id_or.ValueOrDie(), current_time_ms); } -libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) { - auto document_filter_data_optional_ = GetAliveDocumentFilterData(document_id); +libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id, + int64_t current_time_ms) { + auto document_filter_data_optional_ = + GetAliveDocumentFilterData(document_id, current_time_ms); if (!document_filter_data_optional_) { // The document doesn't exist. We should return InvalidArgumentError if the // document id is invalid. Otherwise we should return NOT_FOUND error. @@ -1292,24 +1296,16 @@ libtextclassifier3::StatusOr<int32_t> DocumentStore::GetResultGroupingEntryId( libtextclassifier3::StatusOr<DocumentAssociatedScoreData> DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const { - if (!GetAliveDocumentFilterData(document_id)) { - return absl_ports::NotFoundError(IcingStringUtil::StringPrintf( - "Can't get usage scores, document id '%d' doesn't exist", document_id)); - } - auto score_data_or = score_cache_->GetCopy(document_id); if (!score_data_or.ok()) { ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id << " from score_cache_"; - return score_data_or.status(); + return absl_ports::NotFoundError( + std::move(score_data_or).status().error_message()); } DocumentAssociatedScoreData document_associated_score_data = std::move(score_data_or).ValueOrDie(); - if (document_associated_score_data.document_score() < 0) { - // An negative / invalid score means that the score data has been deleted. - return absl_ports::NotFoundError("Document score data not found."); - } return document_associated_score_data; } @@ -1344,9 +1340,9 @@ DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const { // TODO(b/273826815): Decide on and adopt a consistent pattern for handling // NOT_FOUND 'errors' returned by our internal classes. std::optional<UsageStore::UsageScores> DocumentStore::GetUsageScores( - DocumentId document_id) const { + DocumentId document_id, int64_t current_time_ms) const { std::optional<DocumentFilterData> opt = - GetAliveDocumentFilterData(document_id); + GetAliveDocumentFilterData(document_id, current_time_ms); if (!opt) { return std::nullopt; } @@ -1370,7 +1366,8 @@ libtextclassifier3::Status DocumentStore::ReportUsage( // We can use the internal version here because we got our document_id from // our internal data structures. We would have thrown some error if the // namespace and/or uri were incorrect. - if (!GetAliveDocumentFilterData(document_id)) { + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); + if (!GetAliveDocumentFilterData(document_id, current_time_ms)) { // Document was probably deleted or expired. return absl_ports::NotFoundError(absl_ports::StrCat( "Couldn't report usage on a nonexistent document: (namespace: '", @@ -1446,6 +1443,7 @@ libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete( // Traverse FilterCache and delete all docs that match namespace_id and // schema_type_id. + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < filter_cache_->num_elements(); ++document_id) { // filter_cache_->Get can only fail if document_id is < 0 @@ -1473,7 +1471,8 @@ libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete( // The document has the desired namespace and schema type, it either // exists or has expired. - libtextclassifier3::Status delete_status = Delete(document_id); + libtextclassifier3::Status delete_status = + Delete(document_id, current_time_ms); if (absl_ports::IsNotFound(delete_status)) { continue; } else if (!delete_status.ok()) { @@ -1545,6 +1544,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts( std::unordered_map<std::string, NamespaceStorageInfoProto> namespace_to_storage_info; + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < document_id_mapper_->num_elements(); ++document_id) { // Check if it's deleted first. @@ -1588,7 +1588,7 @@ DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts( UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie(); // Update our stats - if (!GetNonExpiredDocumentFilterData(document_id)) { + if (!GetNonExpiredDocumentFilterData(document_id, current_time_ms)) { ++total_num_expired; namespace_storage_info.set_num_expired_documents( namespace_storage_info.num_expired_documents() + 1); @@ -1651,6 +1651,7 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore( document_validator_.UpdateSchemaStore(schema_store); int size = document_id_mapper_->num_elements(); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < size; document_id++) { auto document_or = Get(document_id); if (absl_ports::IsNotFound(document_or.status())) { @@ -1680,7 +1681,8 @@ libtextclassifier3::Status DocumentStore::UpdateSchemaStore( } else { // Document is no longer valid with the new SchemaStore. Mark as // deleted - auto delete_status = Delete(document.namespace_(), document.uri()); + auto delete_status = + Delete(document.namespace_(), document.uri(), current_time_ms); if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) { // Real error, pass up return delete_status; @@ -1704,8 +1706,9 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore( document_validator_.UpdateSchemaStore(schema_store); int size = document_id_mapper_->num_elements(); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < size; document_id++) { - if (!GetAliveDocumentFilterData(document_id)) { + if (!GetAliveDocumentFilterData(document_id, current_time_ms)) { // Skip nonexistent documents continue; } @@ -1749,7 +1752,7 @@ libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore( if (delete_document) { // Document is no longer valid with the new SchemaStore. Mark as deleted - auto delete_status = Delete(document_id); + auto delete_status = Delete(document_id, current_time_ms); if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) { // Real error, pass up return delete_status; @@ -1791,12 +1794,14 @@ DocumentStore::OptimizeInto(const std::string& new_directory, int num_expired = 0; UsageStore::UsageScores default_usage; std::vector<DocumentId> document_id_old_to_new(size, kInvalidDocumentId); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < size; document_id++) { auto document_or = Get(document_id, /*clear_internal_fields=*/false); if (absl_ports::IsNotFound(document_or.status())) { if (IsDeleted(document_id)) { ++num_deleted; - } else if (!GetNonExpiredDocumentFilterData(document_id)) { + } else if (!GetNonExpiredDocumentFilterData(document_id, + current_time_ms)) { ++num_expired; } continue; @@ -1866,9 +1871,10 @@ DocumentStore::GetOptimizeInfo() const { // Figure out our ratio of optimizable/total docs. int32_t num_documents = document_id_mapper_->num_elements(); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = kMinDocumentId; document_id < num_documents; ++document_id) { - if (!GetAliveDocumentFilterData(document_id)) { + if (!GetAliveDocumentFilterData(document_id, current_time_ms)) { ++optimize_info.optimizable_docs; } @@ -1979,9 +1985,10 @@ DocumentStore::CollectCorpusInfo() const { std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace = GetNamespaceIdsToNamespaces(namespace_mapper_.get()); const SchemaProto* schema_proto = schema_proto_or.ValueOrDie(); + int64_t current_time_ms = clock_.GetSystemTimeMilliseconds(); for (DocumentId document_id = 0; document_id < filter_cache_->num_elements(); ++document_id) { - if (!GetAliveDocumentFilterData(document_id)) { + if (!GetAliveDocumentFilterData(document_id, current_time_ms)) { continue; } ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data, |