diff options
author | Grace Zhao <gracezrx@google.com> | 2022-09-08 20:26:31 +0000 |
---|---|---|
committer | Grace Zhao <gracezrx@google.com> | 2022-09-08 22:53:11 +0000 |
commit | b02eecda6a12241798cdbaaa7069d19f2fc5f41f (patch) | |
tree | 15687379068030d4d5443c916d91e9ed364f9b39 /icing/file | |
parent | 87267cbc5531600072a283ba0c9500c3fcac87af (diff) | |
download | icing-b02eecda6a12241798cdbaaa7069d19f2fc5f41f.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
[FileBackedVector Consolidation][4/x] Fix potential PWrite bug in GrowIfNecessary
======================================================================
[FileBackedVector Consolidation][5/x] Create benchmark for FileBackedVector
======================================================================
[FileBackedVector Consolidation][6/x] Avoid calling GetFileSize in GrowIfNecessary
======================================================================
[PersistentHashMap][3.3/x] Implement Delete
======================================================================
Fix the PopulateMatchedTermsStats bug
======================================================================
Add JNI latency for query latency stats breakdown.
======================================================================
[ResultStateManager] Thread safety test1
======================================================================
[ResultStateManager][2/x] Thread safety test2
======================================================================
Add native lock contention latency for measuring query latency
======================================================================
Fix implementation of HasMember operator in ANTLR-based list-filter prototype.
======================================================================
Fix improper uses of std::string_view
======================================================================
Extend the scale of Icing
======================================================================
Decouple the term frequency array from DocHitInfo
======================================================================
Disable hit_term_frequency for non-relevance queries
======================================================================
[ResultStateManager][3/x] Thread safety test3
======================================================================
[PersistentHashMap][4/x] Implement iterator
=======================================================================
Fix the lite index compaction bug
=======================================================================
Change-Id: I0edad67affed97af107e2d7cd73770e0268c0903
Diffstat (limited to 'icing/file')
-rw-r--r-- | icing/file/file-backed-vector.h | 85 | ||||
-rw-r--r-- | icing/file/file-backed-vector_benchmark.cc | 158 | ||||
-rw-r--r-- | icing/file/file-backed-vector_test.cc | 94 | ||||
-rw-r--r-- | icing/file/persistent-hash-map.cc | 119 | ||||
-rw-r--r-- | icing/file/persistent-hash-map.h | 80 | ||||
-rw-r--r-- | icing/file/persistent-hash-map_test.cc | 367 |
6 files changed, 853 insertions, 50 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index bcfbbdd..7916666 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -261,9 +261,20 @@ class FileBackedVector { // // Returns: // OUT_OF_RANGE_ERROR if idx < 0 or idx > kMaxIndex or file cannot be grown - // idx size + // to fit idx + 1 elements libtextclassifier3::Status Set(int32_t idx, const T& value); + // Set [idx, idx + len) to a single value. + // + // May grow the underlying file and mmapped region as needed to fit the new + // value. If it does grow, then any pointers/references to previous values + // returned from Get/GetMutable/Allocate may be invalidated. + // + // Returns: + // OUT_OF_RANGE_ERROR if idx < 0 or idx + len > kMaxNumElements or file + // cannot be grown to fit idx + len elements + libtextclassifier3::Status Set(int32_t idx, int32_t len, const T& value); + // Appends the value to the end of the vector. // // May grow the underlying file and mmapped region as needed to fit the new @@ -369,8 +380,8 @@ class FileBackedVector { // It handles SetDirty properly for the file-backed-vector when modifying // elements. // - // REQUIRES: arr is valid && arr_len >= 0 && idx + arr_len <= size(), - // otherwise the behavior is undefined. + // REQUIRES: arr is valid && arr_len >= 0 && idx >= 0 && idx + arr_len <= + // size(), otherwise the behavior is undefined. void SetArray(int32_t idx, const T* arr, int32_t arr_len) { for (int32_t i = 0; i < arr_len; ++i) { SetDirty(idx + i); @@ -433,10 +444,11 @@ class FileBackedVector { static constexpr int32_t kMaxIndex = kMaxNumElements - 1; // Can only be created through the factory ::Create function - FileBackedVector(const Filesystem& filesystem, const std::string& file_path, - std::unique_ptr<Header> header, - std::unique_ptr<MemoryMappedFile> mmapped_file, - int32_t max_file_size); + explicit FileBackedVector(const Filesystem& filesystem, + const std::string& file_path, + std::unique_ptr<Header> header, + std::unique_ptr<MemoryMappedFile> mmapped_file, + int32_t max_file_size); // Initialize a new FileBackedVector, and create the file. static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> @@ -765,30 +777,44 @@ FileBackedVector<T>::GetMutable(int32_t idx, int32_t len) { template <typename T> libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx, const T& value) { + return Set(idx, 1, value); +} + +template <typename T> +libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx, int32_t len, + const T& value) { if (idx < 0) { return absl_ports::OutOfRangeError( IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx)); } - if (idx > kMaxIndex) { - return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( - "Index, %d, was greater than max index allowed, %d", idx, kMaxIndex)); + if (len <= 0) { + return absl_ports::OutOfRangeError("Invalid set length"); } - ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1)); - - if (idx + 1 > header_->num_elements) { - header_->num_elements = idx + 1; + if (idx > kMaxNumElements - len) { + return absl_ports::OutOfRangeError( + IcingStringUtil::StringPrintf("Length %d (with index %d), was too long " + "for max num elements allowed, %d", + len, idx, kMaxNumElements)); } - if (mutable_array()[idx] == value) { - // No need to update - return libtextclassifier3::Status::OK; + ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + len)); + + if (idx + len > header_->num_elements) { + header_->num_elements = idx + len; } - SetDirty(idx); + for (int32_t i = 0; i < len; ++i) { + if (array()[idx + i] == value) { + // No need to update + continue; + } + + SetDirty(idx + i); + mutable_array()[idx + i] = value; + } - mutable_array()[idx] = value; return libtextclassifier3::Status::OK; } @@ -835,19 +861,16 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary( num_elements, max_file_size_ - Header::kHeaderSize)); } - int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str()); - if (current_file_size == Filesystem::kBadFileSize) { - return absl_ports::InternalError("Unable to retrieve file size."); - } - - int32_t least_file_size_needed = - Header::kHeaderSize + num_elements * kElementTypeSize; // Won't overflow - if (least_file_size_needed <= current_file_size) { - // Our underlying file can hold the target num_elements cause we've grown + int32_t least_element_file_size_needed = + num_elements * kElementTypeSize; // Won't overflow + if (least_element_file_size_needed <= mmapped_file_->region_size()) { + // Our mmapped region can hold the target num_elements cause we've grown // before return libtextclassifier3::Status::OK; } + int32_t least_file_size_needed = + Header::kHeaderSize + least_element_file_size_needed; // Otherwise, we need to grow. Grow to kGrowElements boundary. // Note that we need to use int64_t here, since int32_t might overflow after // round up. @@ -857,6 +880,12 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary( least_file_size_needed = std::min(round_up_file_size_needed, int64_t{max_file_size_}); + // Get the actual file size here. + int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str()); + if (current_file_size == Filesystem::kBadFileSize) { + return absl_ports::InternalError("Unable to retrieve file size."); + } + // We use PWrite here rather than Grow because Grow doesn't actually allocate // an underlying disk block. This can lead to problems with mmap because mmap // has no effective way to signal that it was impossible to allocate the disk diff --git a/icing/file/file-backed-vector_benchmark.cc b/icing/file/file-backed-vector_benchmark.cc new file mode 100644 index 0000000..b2e660b --- /dev/null +++ b/icing/file/file-backed-vector_benchmark.cc @@ -0,0 +1,158 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <limits> +#include <memory> +#include <random> +#include <string> + +#include "testing/base/public/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/destructible-directory.h" +#include "icing/file/file-backed-vector.h" +#include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +namespace icing { +namespace lib { + +namespace { + +class FileBackedVectorBenchmark { + public: + explicit FileBackedVectorBenchmark() + : base_dir_(GetTestTempDir() + "/file_backed_vector_benchmark"), + file_path_(base_dir_ + "/test_vector"), + ddir_(&filesystem_, base_dir_), + random_engine_(/*seed=*/12345) {} + + const Filesystem& filesystem() const { return filesystem_; } + const std::string& file_path() const { return file_path_; } + std::default_random_engine& random_engine() { return random_engine_; } + + private: + Filesystem filesystem_; + std::string base_dir_; + std::string file_path_; + DestructibleDirectory ddir_; + + std::default_random_engine random_engine_; +}; + +// Benchmark Set() (without extending vector, i.e. the index should be in range +// [0, num_elts - 1]. +void BM_Set(benchmark::State& state) { + int num_elts = state.range(0); + + FileBackedVectorBenchmark fbv_benchmark; + + fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<int>> fbv, + FileBackedVector<int>::Create( + fbv_benchmark.filesystem(), fbv_benchmark.file_path(), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + // Extend to num_elts + fbv->Set(num_elts - 1, 0); + + std::uniform_int_distribution<> distrib(0, num_elts - 1); + for (auto _ : state) { + int idx = distrib(fbv_benchmark.random_engine()); + ICING_ASSERT_OK(fbv->Set(idx, idx)); + } +} +BENCHMARK(BM_Set) + ->Arg(1 << 10) + ->Arg(1 << 11) + ->Arg(1 << 12) + ->Arg(1 << 13) + ->Arg(1 << 14) + ->Arg(1 << 15) + ->Arg(1 << 16) + ->Arg(1 << 17) + ->Arg(1 << 18) + ->Arg(1 << 19) + ->Arg(1 << 20); + +// Benchmark single Append(). Equivalent to Set(fbv->num_elements(), val), which +// extends the vector every round. +void BM_Append(benchmark::State& state) { + FileBackedVectorBenchmark fbv_benchmark; + + fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<int>> fbv, + FileBackedVector<int>::Create( + fbv_benchmark.filesystem(), fbv_benchmark.file_path(), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + std::uniform_int_distribution<> distrib(0, std::numeric_limits<int>::max()); + for (auto _ : state) { + ICING_ASSERT_OK(fbv->Append(distrib(fbv_benchmark.random_engine()))); + } +} +BENCHMARK(BM_Append); + +// Benchmark appending many elements. +void BM_AppendMany(benchmark::State& state) { + int num = state.range(0); + + FileBackedVectorBenchmark fbv_benchmark; + + for (auto _ : state) { + state.PauseTiming(); + fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str()); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<int>> fbv, + FileBackedVector<int>::Create( + fbv_benchmark.filesystem(), fbv_benchmark.file_path(), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + state.ResumeTiming(); + + for (int i = 0; i < num; ++i) { + ICING_ASSERT_OK(fbv->Append(i)); + } + + // Since destructor calls PersistToDisk, to avoid calling it twice, we reset + // the unique pointer to invoke destructor instead of calling PersistToDisk + // explicitly, so in this case PersistToDisk will be called only once. + fbv.reset(); + } +} +BENCHMARK(BM_AppendMany) + ->Arg(1 << 5) + ->Arg(1 << 6) + ->Arg(1 << 7) + ->Arg(1 << 8) + ->Arg(1 << 9) + ->Arg(1 << 10) + ->Arg(1 << 11) + ->Arg(1 << 12) + ->Arg(1 << 13) + ->Arg(1 << 14) + ->Arg(1 << 15) + ->Arg(1 << 16) + ->Arg(1 << 17) + ->Arg(1 << 18) + ->Arg(1 << 19) + ->Arg(1 << 20); + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc index 60ed887..74e4132 100644 --- a/icing/file/file-backed-vector_test.cc +++ b/icing/file/file-backed-vector_test.cc @@ -36,6 +36,7 @@ #include "icing/util/crc32.h" #include "icing/util/logging.h" +using ::testing::DoDefault; using ::testing::ElementsAre; using ::testing::Eq; using ::testing::IsTrue; @@ -284,6 +285,65 @@ TEST_F(FileBackedVectorTest, Get) { StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); } +TEST_F(FileBackedVectorTest, SetWithoutGrowing) { + // Create a vector and add some data. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0))); + + std::string original = "abcde"; + Insert(vector.get(), /*idx=*/0, original); + ASSERT_THAT(vector->num_elements(), Eq(original.length())); + ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original)); + + ICING_EXPECT_OK(vector->Set(/*idx=*/1, /*len=*/3, 'z')); + EXPECT_THAT(vector->num_elements(), Eq(5)); + EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq("azzze")); +} + +TEST_F(FileBackedVectorTest, SetWithGrowing) { + // Create a vector and add some data. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0))); + + std::string original = "abcde"; + Insert(vector.get(), /*idx=*/0, original); + ASSERT_THAT(vector->num_elements(), Eq(original.length())); + ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original)); + + ICING_EXPECT_OK(vector->Set(/*idx=*/3, /*len=*/4, 'z')); + EXPECT_THAT(vector->num_elements(), Eq(7)); + EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/7), Eq("abczzzz")); +} + +TEST_F(FileBackedVectorTest, SetInvalidArguments) { + // Create a vector and add some data. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/-1, 'z'), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/0, 'z'), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(vector->Set(/*idx=*/-1, /*len=*/2, 'z'), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(vector->Set(/*idx=*/100, + /*len=*/std::numeric_limits<int32_t>::max(), 'z'), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + TEST_F(FileBackedVectorTest, MutableView) { // Create a vector and add some data. ICING_ASSERT_OK_AND_ASSIGN( @@ -1225,6 +1285,40 @@ TEST_F(FileBackedVectorTest, BadFileSizeDuringGrowReturnsError) { StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } +TEST_F(FileBackedVectorTest, PWriteFailsInTheSecondRound) { + auto mock_filesystem = std::make_unique<MockFilesystem>(); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<int>> vector, + FileBackedVector<int>::Create( + *mock_filesystem, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + + // At first, the vector is empty and has no mapping established. The first Set + // call will cause a Grow. + // During Grow, we call PWrite for several rounds to grow the file. Mock + // PWrite to succeed in the first round, fail in the second round, and succeed + // in the rest rounds. + + // This unit test checks if we check file size and Remap properly. If the + // first PWrite succeeds but the second PWrite fails, then the file size has + // been grown, but there will be no Remap for the MemoryMappedFile. Then, + // the next several Append() won't require file growth since the file size has + // been grown, but it causes memory error because we haven't remapped. + EXPECT_CALL(*mock_filesystem, + PWrite(A<int>(), A<off_t>(), A<const void*>(), A<size_t>())) + .WillOnce(DoDefault()) + .WillOnce(Return(false)) + .WillRepeatedly(DoDefault()); + + EXPECT_THAT(vector->Append(7), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT(vector->Get(/*idx=*/0), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + EXPECT_THAT(vector->Append(7), IsOk()); + EXPECT_THAT(vector->Get(/*idx=*/0), IsOkAndHolds(Pointee(Eq(7)))); +} + } // namespace } // namespace lib diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc index d20285a..43530dd 100644 --- a/icing/file/persistent-hash-map.cc +++ b/icing/file/persistent-hash-map.cc @@ -14,6 +14,7 @@ #include "icing/file/persistent-hash-map.h" +#include <cstdint> #include <cstring> #include <memory> #include <string> @@ -213,16 +214,16 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { // If not found, then insert new key value pair. return Insert(bucket_idx, key, value); } // Otherwise, overwrite the value. ICING_ASSIGN_OR_RETURN(const Entry* entry, - entry_storage_->Get(target_entry_idx)); + entry_storage_->Get(idx_pair.target_entry_index)); int32_t kv_len = key.length() + 1 + info()->value_type_size; int32_t value_offset = key.length() + 1; @@ -244,15 +245,15 @@ libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { // If not found, then insert new key value pair. return Insert(bucket_idx, key, next_value); } // Otherwise, copy the hash map value into next_value. - return CopyEntryValue(target_entry_idx, next_value); + return CopyEntryValue(idx_pair.target_entry_index, next_value); } libtextclassifier3::Status PersistentHashMap::Get(std::string_view key, @@ -262,14 +263,76 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key, int32_t bucket_idx, HashKeyToBucketIndex(key, bucket_storage_->num_elements())); - ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx, + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, FindEntryIndexByKey(bucket_idx, key)); - if (target_entry_idx == Entry::kInvalidIndex) { + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { return absl_ports::NotFoundError( absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); } - return CopyEntryValue(target_entry_idx, value); + return CopyEntryValue(idx_pair.target_entry_index, value); +} + +libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) { + ICING_RETURN_IF_ERROR(ValidateKey(key)); + ICING_ASSIGN_OR_RETURN( + int32_t bucket_idx, + HashKeyToBucketIndex(key, bucket_storage_->num_elements())); + + ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair, + FindEntryIndexByKey(bucket_idx, key)); + if (idx_pair.target_entry_index == Entry::kInvalidIndex) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_)); + } + + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Entry>::MutableView mutable_target_entry, + entry_storage_->GetMutable(idx_pair.target_entry_index)); + if (idx_pair.prev_entry_index == Entry::kInvalidIndex) { + // If prev_entry_idx is Entry::kInvalidIndex, then target_entry must be the + // head element of the entry linked list, and we have to update + // bucket->head_entry_index_. + // + // Before: target_entry (head) -> next_entry -> ... + // After: next_entry (head) -> ... + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Bucket>::MutableView mutable_bucket, + bucket_storage_->GetMutable(bucket_idx)); + if (mutable_bucket.Get().head_entry_index() != + idx_pair.target_entry_index) { + return absl_ports::InternalError( + "Bucket head entry index is inconsistent with the actual entry linked" + "list head. This shouldn't happen"); + } + mutable_bucket.Get().set_head_entry_index( + mutable_target_entry.Get().next_entry_index()); + } else { + // Otherwise, connect prev_entry and next_entry, to remove target_entry from + // the entry linked list. + // + // Before: ... -> prev_entry -> target_entry -> next_entry -> ... + // After: ... -> prev_entry -> next_entry -> ... + ICING_ASSIGN_OR_RETURN( + typename FileBackedVector<Entry>::MutableView mutable_prev_entry, + entry_storage_->GetMutable(idx_pair.prev_entry_index)); + mutable_prev_entry.Get().set_next_entry_index( + mutable_target_entry.Get().next_entry_index()); + } + + // Zero out the key value bytes. It is necessary for iterator to iterate + // through kv_storage and handle deleted keys properly. + int32_t kv_len = key.length() + 1 + info()->value_type_size; + ICING_RETURN_IF_ERROR(kv_storage_->Set( + mutable_target_entry.Get().key_value_index(), kv_len, '\0')); + + // Invalidate target_entry + mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex); + mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex); + + ++(info()->num_deleted_entries); + + return libtextclassifier3::Status::OK; } libtextclassifier3::Status PersistentHashMap::PersistToDisk() { @@ -440,7 +503,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, // Allow max_load_factor_percent_ change. if (max_load_factor_percent != info_ptr->max_load_factor_percent) { - ICING_VLOG(2) << "Changing max_load_factor_percent from " << info_ptr->max_load_factor_percent << " to " << max_load_factor_percent; + ICING_VLOG(2) << "Changing max_load_factor_percent from " + << info_ptr->max_load_factor_percent << " to " + << max_load_factor_percent; info_ptr->max_load_factor_percent = max_load_factor_percent; crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get(); @@ -455,12 +520,15 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, std::move(kv_storage))); } -libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey( - int32_t bucket_idx, std::string_view key) const { +libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair> +PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx, + std::string_view key) const { // Iterate all entries in the bucket, compare with key, and return the entry // index if exists. ICING_ASSIGN_OR_RETURN(const Bucket* bucket, bucket_storage_->Get(bucket_idx)); + + int32_t prev_entry_idx = Entry::kInvalidIndex; int32_t curr_entry_idx = bucket->head_entry_index(); while (curr_entry_idx != Entry::kInvalidIndex) { ICING_ASSIGN_OR_RETURN(const Entry* entry, @@ -473,13 +541,14 @@ libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey( ICING_ASSIGN_OR_RETURN(const char* kv_arr, kv_storage_->Get(entry->key_value_index())); if (key.compare(kv_arr) == 0) { - return curr_entry_idx; + return EntryIndexPair(curr_entry_idx, prev_entry_idx); } + prev_entry_idx = curr_entry_idx; curr_entry_idx = entry->next_entry_index(); } - return curr_entry_idx; + return EntryIndexPair(curr_entry_idx, prev_entry_idx); } libtextclassifier3::Status PersistentHashMap::CopyEntryValue( @@ -530,5 +599,27 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx, return libtextclassifier3::Status::OK; } +bool PersistentHashMap::Iterator::Advance() { + // Jump over the current key value pair before advancing to the next valid + // key value pair. In the first round (after construction), curr_key_len_ + // is 0, so don't jump over anything. + if (curr_key_len_ != 0) { + curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size; + curr_key_len_ = 0; + } + + // By skipping null chars, we will be automatically handling deleted entries + // (which are zeroed out during deletion). + for (const char* curr_kv_ptr = map_->kv_storage_->array() + curr_kv_idx_; + curr_kv_idx_ < map_->kv_storage_->num_elements(); + ++curr_kv_ptr, ++curr_kv_idx_) { + if (*curr_kv_ptr != '\0') { + curr_key_len_ = strlen(curr_kv_ptr); + return true; + } + } + return false; +} + } // namespace lib } // namespace icing diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h index 24a47ea..a1ca25d 100644 --- a/icing/file/persistent-hash-map.h +++ b/icing/file/persistent-hash-map.h @@ -36,6 +36,48 @@ namespace lib { // should not contain termination character '\0'. class PersistentHashMap { public: + // For iterating through persistent hash map. The order is not guaranteed. + // + // Not thread-safe. + // + // Change in underlying persistent hash map invalidates iterator. + class Iterator { + public: + // Advance to the next entry. + // + // Returns: + // True on success, otherwise false. + bool Advance(); + + // Get the key. + // + // REQUIRES: The preceding call for Advance() is true. + std::string_view GetKey() const { + return std::string_view(map_->kv_storage_->array() + curr_kv_idx_, + curr_key_len_); + } + + // Get the memory mapped address of the value. + // + // REQUIRES: The preceding call for Advance() is true. + const void* GetValue() const { + return static_cast<const void*>(map_->kv_storage_->array() + + curr_kv_idx_ + curr_key_len_ + 1); + } + + private: + explicit Iterator(const PersistentHashMap* map) + : map_(map), curr_kv_idx_(0), curr_key_len_(0) {} + + // Does not own + const PersistentHashMap* map_; + + int32_t curr_kv_idx_; + int32_t curr_key_len_; + + friend class PersistentHashMap; + }; + // Crcs and Info will be written into the metadata file. // File layout: <Crcs><Info> // Crcs @@ -257,6 +299,19 @@ class PersistentHashMap { // Any FileBackedVector errors libtextclassifier3::Status Get(std::string_view key, void* value) const; + // Delete the key value pair from the storage. If key doesn't exist, then do + // nothing and return NOT_FOUND_ERROR. + // + // Returns: + // OK on success + // NOT_FOUND_ERROR if the key doesn't exist + // INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0') + // INTERNAL_ERROR on I/O error or any data inconsistency + // Any FileBackedVector errors + libtextclassifier3::Status Delete(std::string_view key); + + Iterator GetIterator() const { return Iterator(this); } + // Flushes content to underlying files. // // Returns: @@ -296,7 +351,19 @@ class PersistentHashMap { bool empty() const { return size() == 0; } + int32_t num_buckets() const { return bucket_storage_->num_elements(); } + private: + struct EntryIndexPair { + int32_t target_entry_index; + int32_t prev_entry_index; + + explicit EntryIndexPair(int32_t target_entry_index_in, + int32_t prev_entry_index_in) + : target_entry_index(target_entry_index_in), + prev_entry_index(prev_entry_index_in) {} + }; + explicit PersistentHashMap( const Filesystem& filesystem, std::string_view base_dir, std::unique_ptr<MemoryMappedFile> metadata_mmapped_file, @@ -319,15 +386,18 @@ class PersistentHashMap { std::string_view base_dir, int32_t value_type_size, int32_t max_load_factor_percent); - // Find the index of the key entry from a bucket (specified by bucket index). - // The caller should specify the desired bucket index. + // Find the index of the target entry (that contains the key) from a bucket + // (specified by bucket index). Also return the previous entry index, since + // Delete() needs it to update the linked list and head entry index. The + // caller should specify the desired bucket index. // // Returns: - // int32_t: on success, the index of the entry, or Entry::kInvalidIndex if - // not found + // std::pair<int32_t, int32_t>: target entry index and previous entry index + // on success. If not found, then target entry + // index will be Entry::kInvalidIndex // INTERNAL_ERROR if any content inconsistency // Any FileBackedVector errors - libtextclassifier3::StatusOr<int32_t> FindEntryIndexByKey( + libtextclassifier3::StatusOr<EntryIndexPair> FindEntryIndexByKey( int32_t bucket_idx, std::string_view key) const; // Copy the hash map value of the entry into value buffer. diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc index fb15175..138320c 100644 --- a/icing/file/persistent-hash-map_test.cc +++ b/icing/file/persistent-hash-map_test.cc @@ -15,6 +15,9 @@ #include "icing/file/persistent-hash-map.h" #include <cstring> +#include <string_view> +#include <unordered_map> +#include <unordered_set> #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" @@ -34,12 +37,16 @@ namespace { static constexpr int32_t kCorruptedValueOffset = 3; +using ::testing::Contains; using ::testing::Eq; using ::testing::HasSubstr; using ::testing::IsEmpty; +using ::testing::Key; using ::testing::Not; +using ::testing::Pair; using ::testing::Pointee; using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; using Bucket = PersistentHashMap::Bucket; using Crcs = PersistentHashMap::Crcs; @@ -69,6 +76,18 @@ class PersistentHashMapTest : public ::testing::Test { return val; } + std::unordered_map<std::string, int> GetAllKeyValuePairs( + PersistentHashMap::Iterator&& iter) { + std::unordered_map<std::string, int> kvps; + + while (iter.Advance()) { + int val; + memcpy(&val, iter.GetValue(), sizeof(val)); + kvps.emplace(iter.GetKey(), val); + } + return kvps; + } + Filesystem filesystem_; std::string base_dir_; }; @@ -148,7 +167,10 @@ TEST_F(PersistentHashMapTest, // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map->Delete("c")); ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); @@ -178,7 +200,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) { // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map1->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map1->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map1->Delete("c")); ASSERT_THAT(persistent_hash_map1, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map1.get(), "a"), IsOkAndHolds(1)); @@ -214,7 +239,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsAfterDestruction) { /*max_load_factor_percent=*/1000)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map->Delete("c")); ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); @@ -637,6 +665,218 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { IsOkAndHolds(1)); } +TEST_F(PersistentHashMapTest, Delete) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + // Delete a non-existing key should get NOT_FOUND error + EXPECT_THAT(persistent_hash_map->Delete("default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(100).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-youtube.com", Serialize(50).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); + + // Delete an existing key should succeed + ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com")); + EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + // The deleted key should not be found. + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // Other key should remain unchanged and available + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"), + IsOkAndHolds(50)); + + // Insert back the deleted key. Should get new value + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(200).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + IsOkAndHolds(200)); + + // Delete again + ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com")); + EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // Other keys should remain unchanged and available + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"), + IsOkAndHolds(50)); +} + +TEST_F(PersistentHashMapTest, DeleteMultiple) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + std::unordered_map<std::string, int> existing_keys; + std::unordered_set<std::string> deleted_keys; + // Insert 100 key value pairs + for (int i = 0; i < 100; ++i) { + std::string key = "default-google.com-" + std::to_string(i); + ICING_ASSERT_OK(persistent_hash_map->Put(key, &i)); + existing_keys[key] = i; + } + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(existing_keys.size()))); + + // Delete several keys. + // Simulate with std::unordered_map and verify. + std::vector<int> delete_target_ids{3, 4, 6, 9, 13, 18, 24, 31, 39, 48, 58}; + for (const int delete_target_id : delete_target_ids) { + std::string key = "default-google.com-" + std::to_string(delete_target_id); + ASSERT_THAT(existing_keys, Contains(Key(key))); + ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), key), + IsOkAndHolds(existing_keys[key])); + ICING_EXPECT_OK(persistent_hash_map->Delete(key)); + + existing_keys.erase(key); + deleted_keys.insert(key); + } + + // Deleted keys should not be found. + for (const std::string& deleted_key : deleted_keys) { + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), deleted_key), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + } + // Other keys should remain unchanged and available + for (const auto& [existing_key, existing_value] : existing_keys) { + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), existing_key), + IsOkAndHolds(existing_value)); + } + // Verify by iterator as well + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + Eq(existing_keys)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete the head element of the bucket. Note that in our implementation, the + // last added element will become the head element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + IsOkAndHolds(0)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + IsOkAndHolds(1)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete any intermediate element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + IsOkAndHolds(0)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + IsOkAndHolds(2)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete the last element of the bucket. Note that in our implementation, the + // first added element will become the tail element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + IsOkAndHolds(1)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + IsOkAndHolds(2)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(100).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + + // Delete the only single element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com")); + ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty())); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( @@ -654,6 +894,127 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); EXPECT_THAT(persistent_hash_map->Get(invalid_key_view, &val), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(persistent_hash_map->Delete(invalid_key_view), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(PersistentHashMapTest, EmptyHashMapIterator) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); +} + +TEST_F(PersistentHashMapTest, Iterator) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + std::unordered_map<std::string, int> kvps; + // Insert 100 key value pairs + for (int i = 0; i < 100; ++i) { + std::string key = "default-google.com-" + std::to_string(i); + ICING_ASSERT_OK(persistent_hash_map->Put(key, &i)); + kvps.emplace(key, i); + } + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(kvps.size()))); + + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + Eq(kvps)); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete the first key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-1", 1), + Pair("default-google.com-2", 2))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete any intermediate key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-0", 0), + Pair("default-google.com-2", 2))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete the last key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-0", 0), + Pair("default-google.com-1", 1))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete all key value pairs. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty())); + EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); } } // namespace |