diff options
Diffstat (limited to 'icing/file/persistent-hash-map.cc')
-rw-r--r-- | icing/file/persistent-hash-map.cc | 119 |
1 files changed, 105 insertions, 14 deletions
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc index d20285a..43530dd 100644 --- a/icing/file/persistent-hash-map.cc +++ b/icing/file/persistent-hash-map.cc @@ -14,6 +14,7 @@ #include "icing/file/persistent-hash-map.h" +#include <cstdint> #include <cstring> #include <memory> #include <string> @@ -213,16 +214,16 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { // If not found, then insert new key value pair. return Insert(bucket_idx, key, value); } // Otherwise, overwrite the value. ICING_ASSIGN_OR_RETURN(const Entry* entry, - entry_storage_->Get(target_entry_idx)); + entry_storage_->Get(idx_pair.target_entry_index)); int32_t kv_len = key.length() + 1 + info()->value_type_size; int32_t value_offset = key.length() + 1; @@ -244,15 +245,15 @@ libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { // If not found, then insert new key value pair. return Insert(bucket_idx, key, next_value); } // Otherwise, copy the hash map value into next_value. - return CopyEntryValue(target_entry_idx, next_value); + return CopyEntryValue(idx_pair.target_entry_index, next_value); } libtextclassifier3::Status PersistentHashMap::Get(std::string_view key, @@ -262,14 +263,76 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { return absl_ports::NotFoundError( absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); } - return CopyEntryValue(target_entry_idx, value); + return CopyEntryValue(idx_pair.target_entry_index, value); +} + +libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) { + ICING_RETURN_IF_ERROR(ValidateKey(key)); + ICING_ASSIGN_OR_RETURN( + int32_t bucket_idx, + HashKeyToBucketIndex(key, bucket_storage_->num_elements())); + + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, + FindEntryIndexByKey(bucket_idx, key)); + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); + } + + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Entry>::MutableView mutable_target_entry, + entry_storage_->GetMutable(idx_pair.target_entry_index)); + if (idx_pair.prev_entry_index == Entry::kInvalidIndex) { + // If prev_entry_idx is Entry::kInvalidIndex, then target_entry must be the + // head element of the entry linked list, and we have to update + // bucket->head_entry_index_. + // + // Before: target_entry (head) -> next_entry -> ... + // After: next_entry (head) -> ... + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Bucket>::MutableView mutable_bucket, + bucket_storage_->GetMutable(bucket_idx)); + if (mutable_bucket.Get().head_entry_index() != + idx_pair.target_entry_index) { + return absl_ports::InternalError( + "Bucket head entry index is inconsistent with the actual entry linked" + "list head. This shouldn't happen"); + } + mutable_bucket.Get().set_head_entry_index( + mutable_target_entry.Get().next_entry_index()); + } else { + // Otherwise, connect prev_entry and next_entry, to remove target_entry from + // the entry linked list. + // + // Before: ... -> prev_entry -> target_entry -> next_entry -> ... + // After: ... -> prev_entry -> next_entry -> ... + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Entry>::MutableView mutable_prev_entry, + entry_storage_->GetMutable(idx_pair.prev_entry_index)); + mutable_prev_entry.Get().set_next_entry_index( + mutable_target_entry.Get().next_entry_index()); + } + + // Zero out the key value bytes. It is necessary for iterator to iterate + // through kv_storage and handle deleted keys properly. + int32_t kv_len = key.length() + 1 + info()->value_type_size; + ICING_RETURN_IF_ERROR(kv_storage_->Set( + mutable_target_entry.Get().key_value_index(), kv_len, '\0')); + + // Invalidate target_entry + mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex); + mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex); + + ++(info()->num_deleted_entries); + + return libtextclassifier3::Status::OK; } libtextclassifier3::Status PersistentHashMap::PersistToDisk() { @@ -440,7 +503,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, // Allow max_load_factor_percent_ change. if (max_load_factor_percent != info_ptr->max_load_factor_percent) { - ICING_VLOG(2) << "Changing max_load_factor_percent from " << info_ptr->max_load_factor_percent << " to " << max_load_factor_percent; + ICING_VLOG(2) << "Changing max_load_factor_percent from " + << info_ptr->max_load_factor_percent << " to " + << max_load_factor_percent; info_ptr->max_load_factor_percent = max_load_factor_percent; crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get(); @@ -455,12 +520,15 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, std::move(kv_storage))); } -libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey( - int32_t bucket_idx, std::string_view key) const { +libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair> +PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx, + std::string_view key) const { // Iterate all entries in the bucket, compare with key, and return the entry // index if exists. ICING_ASSIGN_OR_RETURN(const Bucket* bucket, bucket_storage_->Get(bucket_idx)); + + int32_t prev_entry_idx = Entry::kInvalidIndex; int32_t curr_entry_idx = bucket->head_entry_index(); while (curr_entry_idx != Entry::kInvalidIndex) { ICING_ASSIGN_OR_RETURN(const Entry* entry, @@ -473,13 +541,14 @@ libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey( ICING_ASSIGN_OR_RETURN(const char* kv_arr, kv_storage_->Get(entry->key_value_index())); if (key.compare(kv_arr) == 0) { - return curr_entry_idx; + return EntryIndexPair(curr_entry_idx, prev_entry_idx); } + prev_entry_idx = curr_entry_idx; curr_entry_idx = entry->next_entry_index(); } - return curr_entry_idx; + return EntryIndexPair(curr_entry_idx, prev_entry_idx); } libtextclassifier3::Status PersistentHashMap::CopyEntryValue( @@ -530,5 +599,27 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx, return libtextclassifier3::Status::OK; } +bool PersistentHashMap::Iterator::Advance() { + // Jump over the current key value pair before advancing to the next valid + // key value pair. In the first round (after construction), curr_key_len_ + // is 0, so don't jump over anything. + if (curr_key_len_ != 0) { + curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size; + curr_key_len_ = 0; + } + + // By skipping null chars, we will be automatically handling deleted entries + // (which are zeroed out during deletion). + for (const char* curr_kv_ptr = map_->kv_storage_->array() + curr_kv_idx_; + curr_kv_idx_ < map_->kv_storage_->num_elements(); + ++curr_kv_ptr, ++curr_kv_idx_) { + if (*curr_kv_ptr != '\0') { + curr_key_len_ = strlen(curr_kv_ptr); + return true; + } + } + return false; +} + } // namespace lib } // namespace icing |