aboutsummaryrefslogtreecommitdiff
path: root/icing/file
diff options
context:
space:
mode:
authorGrace Zhao <gracezrx@google.com>2022-09-08 20:26:31 +0000
committerGrace Zhao <gracezrx@google.com>2022-09-08 22:53:11 +0000
commitb02eecda6a12241798cdbaaa7069d19f2fc5f41f (patch)
tree15687379068030d4d5443c916d91e9ed364f9b39 /icing/file
parent87267cbc5531600072a283ba0c9500c3fcac87af (diff)
downloadicing-b02eecda6a12241798cdbaaa7069d19f2fc5f41f.tar.gz
Sync from upstream.
Descriptions: ====================================================================== [FileBackedVector Consolidation][4/x] Fix potential PWrite bug in GrowIfNecessary ====================================================================== [FileBackedVector Consolidation][5/x] Create benchmark for FileBackedVector ====================================================================== [FileBackedVector Consolidation][6/x] Avoid calling GetFileSize in GrowIfNecessary ====================================================================== [PersistentHashMap][3.3/x] Implement Delete ====================================================================== Fix the PopulateMatchedTermsStats bug ====================================================================== Add JNI latency for query latency stats breakdown. ====================================================================== [ResultStateManager] Thread safety test1 ====================================================================== [ResultStateManager][2/x] Thread safety test2 ====================================================================== Add native lock contention latency for measuring query latency ====================================================================== Fix implementation of HasMember operator in ANTLR-based list-filter prototype. ====================================================================== Fix improper uses of std::string_view ====================================================================== Extend the scale of Icing ====================================================================== Decouple the term frequency array from DocHitInfo ====================================================================== Disable hit_term_frequency for non-relevance queries ====================================================================== [ResultStateManager][3/x] Thread safety test3 ====================================================================== [PersistentHashMap][4/x] Implement iterator ======================================================================= Fix the lite index compaction bug ======================================================================= Change-Id: I0edad67affed97af107e2d7cd73770e0268c0903
Diffstat (limited to 'icing/file')
-rw-r--r--icing/file/file-backed-vector.h85
-rw-r--r--icing/file/file-backed-vector_benchmark.cc158
-rw-r--r--icing/file/file-backed-vector_test.cc94
-rw-r--r--icing/file/persistent-hash-map.cc119
-rw-r--r--icing/file/persistent-hash-map.h80
-rw-r--r--icing/file/persistent-hash-map_test.cc367
6 files changed, 853 insertions, 50 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index bcfbbdd..7916666 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -261,9 +261,20 @@ class FileBackedVector {
//
// Returns:
// OUT_OF_RANGE_ERROR if idx < 0 or idx > kMaxIndex or file cannot be grown
- // idx size
+ // to fit idx + 1 elements
libtextclassifier3::Status Set(int32_t idx, const T& value);
+ // Set [idx, idx + len) to a single value.
+ //
+ // May grow the underlying file and mmapped region as needed to fit the new
+ // value. If it does grow, then any pointers/references to previous values
+ // returned from Get/GetMutable/Allocate may be invalidated.
+ //
+ // Returns:
+ // OUT_OF_RANGE_ERROR if idx < 0 or idx + len > kMaxNumElements or file
+ // cannot be grown to fit idx + len elements
+ libtextclassifier3::Status Set(int32_t idx, int32_t len, const T& value);
+
// Appends the value to the end of the vector.
//
// May grow the underlying file and mmapped region as needed to fit the new
@@ -369,8 +380,8 @@ class FileBackedVector {
// It handles SetDirty properly for the file-backed-vector when modifying
// elements.
//
- // REQUIRES: arr is valid && arr_len >= 0 && idx + arr_len <= size(),
- // otherwise the behavior is undefined.
+ // REQUIRES: arr is valid && arr_len >= 0 && idx >= 0 && idx + arr_len <=
+ // size(), otherwise the behavior is undefined.
void SetArray(int32_t idx, const T* arr, int32_t arr_len) {
for (int32_t i = 0; i < arr_len; ++i) {
SetDirty(idx + i);
@@ -433,10 +444,11 @@ class FileBackedVector {
static constexpr int32_t kMaxIndex = kMaxNumElements - 1;
// Can only be created through the factory ::Create function
- FileBackedVector(const Filesystem& filesystem, const std::string& file_path,
- std::unique_ptr<Header> header,
- std::unique_ptr<MemoryMappedFile> mmapped_file,
- int32_t max_file_size);
+ explicit FileBackedVector(const Filesystem& filesystem,
+ const std::string& file_path,
+ std::unique_ptr<Header> header,
+ std::unique_ptr<MemoryMappedFile> mmapped_file,
+ int32_t max_file_size);
// Initialize a new FileBackedVector, and create the file.
static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
@@ -765,30 +777,44 @@ FileBackedVector<T>::GetMutable(int32_t idx, int32_t len) {
template <typename T>
libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
const T& value) {
+ return Set(idx, 1, value);
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx, int32_t len,
+ const T& value) {
if (idx < 0) {
return absl_ports::OutOfRangeError(
IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
}
- if (idx > kMaxIndex) {
- return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Index, %d, was greater than max index allowed, %d", idx, kMaxIndex));
+ if (len <= 0) {
+ return absl_ports::OutOfRangeError("Invalid set length");
}
- ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
-
- if (idx + 1 > header_->num_elements) {
- header_->num_elements = idx + 1;
+ if (idx > kMaxNumElements - len) {
+ return absl_ports::OutOfRangeError(
+ IcingStringUtil::StringPrintf("Length %d (with index %d), was too long "
+ "for max num elements allowed, %d",
+ len, idx, kMaxNumElements));
}
- if (mutable_array()[idx] == value) {
- // No need to update
- return libtextclassifier3::Status::OK;
+ ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + len));
+
+ if (idx + len > header_->num_elements) {
+ header_->num_elements = idx + len;
}
- SetDirty(idx);
+ for (int32_t i = 0; i < len; ++i) {
+ if (array()[idx + i] == value) {
+ // No need to update
+ continue;
+ }
+
+ SetDirty(idx + i);
+ mutable_array()[idx + i] = value;
+ }
- mutable_array()[idx] = value;
return libtextclassifier3::Status::OK;
}
@@ -835,19 +861,16 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
num_elements, max_file_size_ - Header::kHeaderSize));
}
- int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
- if (current_file_size == Filesystem::kBadFileSize) {
- return absl_ports::InternalError("Unable to retrieve file size.");
- }
-
- int32_t least_file_size_needed =
- Header::kHeaderSize + num_elements * kElementTypeSize; // Won't overflow
- if (least_file_size_needed <= current_file_size) {
- // Our underlying file can hold the target num_elements cause we've grown
+ int32_t least_element_file_size_needed =
+ num_elements * kElementTypeSize; // Won't overflow
+ if (least_element_file_size_needed <= mmapped_file_->region_size()) {
+ // Our mmapped region can hold the target num_elements cause we've grown
// before
return libtextclassifier3::Status::OK;
}
+ int32_t least_file_size_needed =
+ Header::kHeaderSize + least_element_file_size_needed;
// Otherwise, we need to grow. Grow to kGrowElements boundary.
// Note that we need to use int64_t here, since int32_t might overflow after
// round up.
@@ -857,6 +880,12 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
least_file_size_needed =
std::min(round_up_file_size_needed, int64_t{max_file_size_});
+ // Get the actual file size here.
+ int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
+ if (current_file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError("Unable to retrieve file size.");
+ }
+
// We use PWrite here rather than Grow because Grow doesn't actually allocate
// an underlying disk block. This can lead to problems with mmap because mmap
// has no effective way to signal that it was impossible to allocate the disk
diff --git a/icing/file/file-backed-vector_benchmark.cc b/icing/file/file-backed-vector_benchmark.cc
new file mode 100644
index 0000000..b2e660b
--- /dev/null
+++ b/icing/file/file-backed-vector_benchmark.cc
@@ -0,0 +1,158 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class FileBackedVectorBenchmark {
+ public:
+ explicit FileBackedVectorBenchmark()
+ : base_dir_(GetTestTempDir() + "/file_backed_vector_benchmark"),
+ file_path_(base_dir_ + "/test_vector"),
+ ddir_(&filesystem_, base_dir_),
+ random_engine_(/*seed=*/12345) {}
+
+ const Filesystem& filesystem() const { return filesystem_; }
+ const std::string& file_path() const { return file_path_; }
+ std::default_random_engine& random_engine() { return random_engine_; }
+
+ private:
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::string file_path_;
+ DestructibleDirectory ddir_;
+
+ std::default_random_engine random_engine_;
+};
+
+// Benchmark Set() (without extending vector, i.e. the index should be in range
+// [0, num_elts - 1].
+void BM_Set(benchmark::State& state) {
+ int num_elts = state.range(0);
+
+ FileBackedVectorBenchmark fbv_benchmark;
+
+ fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> fbv,
+ FileBackedVector<int>::Create(
+ fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // Extend to num_elts
+ fbv->Set(num_elts - 1, 0);
+
+ std::uniform_int_distribution<> distrib(0, num_elts - 1);
+ for (auto _ : state) {
+ int idx = distrib(fbv_benchmark.random_engine());
+ ICING_ASSERT_OK(fbv->Set(idx, idx));
+ }
+}
+BENCHMARK(BM_Set)
+ ->Arg(1 << 10)
+ ->Arg(1 << 11)
+ ->Arg(1 << 12)
+ ->Arg(1 << 13)
+ ->Arg(1 << 14)
+ ->Arg(1 << 15)
+ ->Arg(1 << 16)
+ ->Arg(1 << 17)
+ ->Arg(1 << 18)
+ ->Arg(1 << 19)
+ ->Arg(1 << 20);
+
+// Benchmark single Append(). Equivalent to Set(fbv->num_elements(), val), which
+// extends the vector every round.
+void BM_Append(benchmark::State& state) {
+ FileBackedVectorBenchmark fbv_benchmark;
+
+ fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> fbv,
+ FileBackedVector<int>::Create(
+ fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ std::uniform_int_distribution<> distrib(0, std::numeric_limits<int>::max());
+ for (auto _ : state) {
+ ICING_ASSERT_OK(fbv->Append(distrib(fbv_benchmark.random_engine())));
+ }
+}
+BENCHMARK(BM_Append);
+
+// Benchmark appending many elements.
+void BM_AppendMany(benchmark::State& state) {
+ int num = state.range(0);
+
+ FileBackedVectorBenchmark fbv_benchmark;
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ fbv_benchmark.filesystem().DeleteFile(fbv_benchmark.file_path().c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> fbv,
+ FileBackedVector<int>::Create(
+ fbv_benchmark.filesystem(), fbv_benchmark.file_path(),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ state.ResumeTiming();
+
+ for (int i = 0; i < num; ++i) {
+ ICING_ASSERT_OK(fbv->Append(i));
+ }
+
+ // Since destructor calls PersistToDisk, to avoid calling it twice, we reset
+ // the unique pointer to invoke destructor instead of calling PersistToDisk
+ // explicitly, so in this case PersistToDisk will be called only once.
+ fbv.reset();
+ }
+}
+BENCHMARK(BM_AppendMany)
+ ->Arg(1 << 5)
+ ->Arg(1 << 6)
+ ->Arg(1 << 7)
+ ->Arg(1 << 8)
+ ->Arg(1 << 9)
+ ->Arg(1 << 10)
+ ->Arg(1 << 11)
+ ->Arg(1 << 12)
+ ->Arg(1 << 13)
+ ->Arg(1 << 14)
+ ->Arg(1 << 15)
+ ->Arg(1 << 16)
+ ->Arg(1 << 17)
+ ->Arg(1 << 18)
+ ->Arg(1 << 19)
+ ->Arg(1 << 20);
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index 60ed887..74e4132 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -36,6 +36,7 @@
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
+using ::testing::DoDefault;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsTrue;
@@ -284,6 +285,65 @@ TEST_F(FileBackedVectorTest, Get) {
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
+TEST_F(FileBackedVectorTest, SetWithoutGrowing) {
+ // Create a vector and add some data.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+ std::string original = "abcde";
+ Insert(vector.get(), /*idx=*/0, original);
+ ASSERT_THAT(vector->num_elements(), Eq(original.length()));
+ ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
+
+ ICING_EXPECT_OK(vector->Set(/*idx=*/1, /*len=*/3, 'z'));
+ EXPECT_THAT(vector->num_elements(), Eq(5));
+ EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq("azzze"));
+}
+
+TEST_F(FileBackedVectorTest, SetWithGrowing) {
+ // Create a vector and add some data.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+ std::string original = "abcde";
+ Insert(vector.get(), /*idx=*/0, original);
+ ASSERT_THAT(vector->num_elements(), Eq(original.length()));
+ ASSERT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/5), Eq(original));
+
+ ICING_EXPECT_OK(vector->Set(/*idx=*/3, /*len=*/4, 'z'));
+ EXPECT_THAT(vector->num_elements(), Eq(7));
+ EXPECT_THAT(Get(vector.get(), /*idx=*/0, /*expected_len=*/7), Eq("abczzzz"));
+}
+
+TEST_F(FileBackedVectorTest, SetInvalidArguments) {
+ // Create a vector and add some data.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/-1, 'z'),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Set(/*idx=*/0, /*len=*/0, 'z'),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Set(/*idx=*/-1, /*len=*/2, 'z'),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(vector->Set(/*idx=*/100,
+ /*len=*/std::numeric_limits<int32_t>::max(), 'z'),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
TEST_F(FileBackedVectorTest, MutableView) {
// Create a vector and add some data.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1225,6 +1285,40 @@ TEST_F(FileBackedVectorTest, BadFileSizeDuringGrowReturnsError) {
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
+TEST_F(FileBackedVectorTest, PWriteFailsInTheSecondRound) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<int>> vector,
+ FileBackedVector<int>::Create(
+ *mock_filesystem, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+ // At first, the vector is empty and has no mapping established. The first Set
+ // call will cause a Grow.
+ // During Grow, we call PWrite for several rounds to grow the file. Mock
+ // PWrite to succeed in the first round, fail in the second round, and succeed
+ // in the rest rounds.
+
+ // This unit test checks if we check file size and Remap properly. If the
+ // first PWrite succeeds but the second PWrite fails, then the file size has
+ // been grown, but there will be no Remap for the MemoryMappedFile. Then,
+ // the next several Append() won't require file growth since the file size has
+ // been grown, but it causes memory error because we haven't remapped.
+ EXPECT_CALL(*mock_filesystem,
+ PWrite(A<int>(), A<off_t>(), A<const void*>(), A<size_t>()))
+ .WillOnce(DoDefault())
+ .WillOnce(Return(false))
+ .WillRepeatedly(DoDefault());
+
+ EXPECT_THAT(vector->Append(7),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(vector->Get(/*idx=*/0),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(vector->Append(7), IsOk());
+ EXPECT_THAT(vector->Get(/*idx=*/0), IsOkAndHolds(Pointee(Eq(7))));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index d20285a..43530dd 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -14,6 +14,7 @@
#include "icing/file/persistent-hash-map.h"
+#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
@@ -213,16 +214,16 @@ libtextclassifier3::Status PersistentHashMap::Put(std::string_view key,
int32_t bucket_idx,
HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
- ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx,
+ ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
- if (target_entry_idx == Entry::kInvalidIndex) {
+ if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
// If not found, then insert new key value pair.
return Insert(bucket_idx, key, value);
}
// Otherwise, overwrite the value.
ICING_ASSIGN_OR_RETURN(const Entry* entry,
- entry_storage_->Get(target_entry_idx));
+ entry_storage_->Get(idx_pair.target_entry_index));
int32_t kv_len = key.length() + 1 + info()->value_type_size;
int32_t value_offset = key.length() + 1;
@@ -244,15 +245,15 @@ libtextclassifier3::Status PersistentHashMap::GetOrPut(std::string_view key,
int32_t bucket_idx,
HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
- ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx,
+ ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
- if (target_entry_idx == Entry::kInvalidIndex) {
+ if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
// If not found, then insert new key value pair.
return Insert(bucket_idx, key, next_value);
}
// Otherwise, copy the hash map value into next_value.
- return CopyEntryValue(target_entry_idx, next_value);
+ return CopyEntryValue(idx_pair.target_entry_index, next_value);
}
libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
@@ -262,14 +263,76 @@ libtextclassifier3::Status PersistentHashMap::Get(std::string_view key,
int32_t bucket_idx,
HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
- ICING_ASSIGN_OR_RETURN(int32_t target_entry_idx,
+ ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
FindEntryIndexByKey(bucket_idx, key));
- if (target_entry_idx == Entry::kInvalidIndex) {
+ if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
return absl_ports::NotFoundError(
absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
}
- return CopyEntryValue(target_entry_idx, value);
+ return CopyEntryValue(idx_pair.target_entry_index, value);
+}
+
+libtextclassifier3::Status PersistentHashMap::Delete(std::string_view key) {
+ ICING_RETURN_IF_ERROR(ValidateKey(key));
+ ICING_ASSIGN_OR_RETURN(
+ int32_t bucket_idx,
+ HashKeyToBucketIndex(key, bucket_storage_->num_elements()));
+
+ ICING_ASSIGN_OR_RETURN(EntryIndexPair idx_pair,
+ FindEntryIndexByKey(bucket_idx, key));
+ if (idx_pair.target_entry_index == Entry::kInvalidIndex) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Key not found in PersistentHashMap ", base_dir_));
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Entry>::MutableView mutable_target_entry,
+ entry_storage_->GetMutable(idx_pair.target_entry_index));
+ if (idx_pair.prev_entry_index == Entry::kInvalidIndex) {
+ // If prev_entry_idx is Entry::kInvalidIndex, then target_entry must be the
+ // head element of the entry linked list, and we have to update
+ // bucket->head_entry_index_.
+ //
+ // Before: target_entry (head) -> next_entry -> ...
+ // After: next_entry (head) -> ...
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Bucket>::MutableView mutable_bucket,
+ bucket_storage_->GetMutable(bucket_idx));
+ if (mutable_bucket.Get().head_entry_index() !=
+ idx_pair.target_entry_index) {
+ return absl_ports::InternalError(
+ "Bucket head entry index is inconsistent with the actual entry linked"
+ "list head. This shouldn't happen");
+ }
+ mutable_bucket.Get().set_head_entry_index(
+ mutable_target_entry.Get().next_entry_index());
+ } else {
+ // Otherwise, connect prev_entry and next_entry, to remove target_entry from
+ // the entry linked list.
+ //
+ // Before: ... -> prev_entry -> target_entry -> next_entry -> ...
+ // After: ... -> prev_entry -> next_entry -> ...
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Entry>::MutableView mutable_prev_entry,
+ entry_storage_->GetMutable(idx_pair.prev_entry_index));
+ mutable_prev_entry.Get().set_next_entry_index(
+ mutable_target_entry.Get().next_entry_index());
+ }
+
+ // Zero out the key value bytes. It is necessary for iterator to iterate
+ // through kv_storage and handle deleted keys properly.
+ int32_t kv_len = key.length() + 1 + info()->value_type_size;
+ ICING_RETURN_IF_ERROR(kv_storage_->Set(
+ mutable_target_entry.Get().key_value_index(), kv_len, '\0'));
+
+ // Invalidate target_entry
+ mutable_target_entry.Get().set_key_value_index(kInvalidKVIndex);
+ mutable_target_entry.Get().set_next_entry_index(Entry::kInvalidIndex);
+
+ ++(info()->num_deleted_entries);
+
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status PersistentHashMap::PersistToDisk() {
@@ -440,7 +503,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
// Allow max_load_factor_percent_ change.
if (max_load_factor_percent != info_ptr->max_load_factor_percent) {
- ICING_VLOG(2) << "Changing max_load_factor_percent from " << info_ptr->max_load_factor_percent << " to " << max_load_factor_percent;
+ ICING_VLOG(2) << "Changing max_load_factor_percent from "
+ << info_ptr->max_load_factor_percent << " to "
+ << max_load_factor_percent;
info_ptr->max_load_factor_percent = max_load_factor_percent;
crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get();
@@ -455,12 +520,15 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
std::move(kv_storage)));
}
-libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey(
- int32_t bucket_idx, std::string_view key) const {
+libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair>
+PersistentHashMap::FindEntryIndexByKey(int32_t bucket_idx,
+ std::string_view key) const {
// Iterate all entries in the bucket, compare with key, and return the entry
// index if exists.
ICING_ASSIGN_OR_RETURN(const Bucket* bucket,
bucket_storage_->Get(bucket_idx));
+
+ int32_t prev_entry_idx = Entry::kInvalidIndex;
int32_t curr_entry_idx = bucket->head_entry_index();
while (curr_entry_idx != Entry::kInvalidIndex) {
ICING_ASSIGN_OR_RETURN(const Entry* entry,
@@ -473,13 +541,14 @@ libtextclassifier3::StatusOr<int32_t> PersistentHashMap::FindEntryIndexByKey(
ICING_ASSIGN_OR_RETURN(const char* kv_arr,
kv_storage_->Get(entry->key_value_index()));
if (key.compare(kv_arr) == 0) {
- return curr_entry_idx;
+ return EntryIndexPair(curr_entry_idx, prev_entry_idx);
}
+ prev_entry_idx = curr_entry_idx;
curr_entry_idx = entry->next_entry_index();
}
- return curr_entry_idx;
+ return EntryIndexPair(curr_entry_idx, prev_entry_idx);
}
libtextclassifier3::Status PersistentHashMap::CopyEntryValue(
@@ -530,5 +599,27 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
return libtextclassifier3::Status::OK;
}
+bool PersistentHashMap::Iterator::Advance() {
+ // Jump over the current key value pair before advancing to the next valid
+ // key value pair. In the first round (after construction), curr_key_len_
+ // is 0, so don't jump over anything.
+ if (curr_key_len_ != 0) {
+ curr_kv_idx_ += curr_key_len_ + 1 + map_->info()->value_type_size;
+ curr_key_len_ = 0;
+ }
+
+ // By skipping null chars, we will be automatically handling deleted entries
+ // (which are zeroed out during deletion).
+ for (const char* curr_kv_ptr = map_->kv_storage_->array() + curr_kv_idx_;
+ curr_kv_idx_ < map_->kv_storage_->num_elements();
+ ++curr_kv_ptr, ++curr_kv_idx_) {
+ if (*curr_kv_ptr != '\0') {
+ curr_key_len_ = strlen(curr_kv_ptr);
+ return true;
+ }
+ }
+ return false;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index 24a47ea..a1ca25d 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -36,6 +36,48 @@ namespace lib {
// should not contain termination character '\0'.
class PersistentHashMap {
public:
+ // For iterating through persistent hash map. The order is not guaranteed.
+ //
+ // Not thread-safe.
+ //
+ // Change in underlying persistent hash map invalidates iterator.
+ class Iterator {
+ public:
+ // Advance to the next entry.
+ //
+ // Returns:
+ // True on success, otherwise false.
+ bool Advance();
+
+ // Get the key.
+ //
+ // REQUIRES: The preceding call for Advance() is true.
+ std::string_view GetKey() const {
+ return std::string_view(map_->kv_storage_->array() + curr_kv_idx_,
+ curr_key_len_);
+ }
+
+ // Get the memory mapped address of the value.
+ //
+ // REQUIRES: The preceding call for Advance() is true.
+ const void* GetValue() const {
+ return static_cast<const void*>(map_->kv_storage_->array() +
+ curr_kv_idx_ + curr_key_len_ + 1);
+ }
+
+ private:
+ explicit Iterator(const PersistentHashMap* map)
+ : map_(map), curr_kv_idx_(0), curr_key_len_(0) {}
+
+ // Does not own
+ const PersistentHashMap* map_;
+
+ int32_t curr_kv_idx_;
+ int32_t curr_key_len_;
+
+ friend class PersistentHashMap;
+ };
+
// Crcs and Info will be written into the metadata file.
// File layout: <Crcs><Info>
// Crcs
@@ -257,6 +299,19 @@ class PersistentHashMap {
// Any FileBackedVector errors
libtextclassifier3::Status Get(std::string_view key, void* value) const;
+ // Delete the key value pair from the storage. If key doesn't exist, then do
+ // nothing and return NOT_FOUND_ERROR.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND_ERROR if the key doesn't exist
+ // INVALID_ARGUMENT_ERROR if the key is invalid (i.e. contains '\0')
+ // INTERNAL_ERROR on I/O error or any data inconsistency
+ // Any FileBackedVector errors
+ libtextclassifier3::Status Delete(std::string_view key);
+
+ Iterator GetIterator() const { return Iterator(this); }
+
// Flushes content to underlying files.
//
// Returns:
@@ -296,7 +351,19 @@ class PersistentHashMap {
bool empty() const { return size() == 0; }
+ int32_t num_buckets() const { return bucket_storage_->num_elements(); }
+
private:
+ struct EntryIndexPair {
+ int32_t target_entry_index;
+ int32_t prev_entry_index;
+
+ explicit EntryIndexPair(int32_t target_entry_index_in,
+ int32_t prev_entry_index_in)
+ : target_entry_index(target_entry_index_in),
+ prev_entry_index(prev_entry_index_in) {}
+ };
+
explicit PersistentHashMap(
const Filesystem& filesystem, std::string_view base_dir,
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
@@ -319,15 +386,18 @@ class PersistentHashMap {
std::string_view base_dir, int32_t value_type_size,
int32_t max_load_factor_percent);
- // Find the index of the key entry from a bucket (specified by bucket index).
- // The caller should specify the desired bucket index.
+ // Find the index of the target entry (that contains the key) from a bucket
+ // (specified by bucket index). Also return the previous entry index, since
+ // Delete() needs it to update the linked list and head entry index. The
+ // caller should specify the desired bucket index.
//
// Returns:
- // int32_t: on success, the index of the entry, or Entry::kInvalidIndex if
- // not found
+ // std::pair<int32_t, int32_t>: target entry index and previous entry index
+ // on success. If not found, then target entry
+ // index will be Entry::kInvalidIndex
// INTERNAL_ERROR if any content inconsistency
// Any FileBackedVector errors
- libtextclassifier3::StatusOr<int32_t> FindEntryIndexByKey(
+ libtextclassifier3::StatusOr<EntryIndexPair> FindEntryIndexByKey(
int32_t bucket_idx, std::string_view key) const;
// Copy the hash map value of the entry into value buffer.
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc
index fb15175..138320c 100644
--- a/icing/file/persistent-hash-map_test.cc
+++ b/icing/file/persistent-hash-map_test.cc
@@ -15,6 +15,9 @@
#include "icing/file/persistent-hash-map.h"
#include <cstring>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
@@ -34,12 +37,16 @@ namespace {
static constexpr int32_t kCorruptedValueOffset = 3;
+using ::testing::Contains;
using ::testing::Eq;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
+using ::testing::Key;
using ::testing::Not;
+using ::testing::Pair;
using ::testing::Pointee;
using ::testing::SizeIs;
+using ::testing::UnorderedElementsAre;
using Bucket = PersistentHashMap::Bucket;
using Crcs = PersistentHashMap::Crcs;
@@ -69,6 +76,18 @@ class PersistentHashMapTest : public ::testing::Test {
return val;
}
+ std::unordered_map<std::string, int> GetAllKeyValuePairs(
+ PersistentHashMap::Iterator&& iter) {
+ std::unordered_map<std::string, int> kvps;
+
+ while (iter.Advance()) {
+ int val;
+ memcpy(&val, iter.GetValue(), sizeof(val));
+ kvps.emplace(iter.GetKey(), val);
+ }
+ return kvps;
+ }
+
Filesystem filesystem_;
std::string base_dir_;
};
@@ -148,7 +167,10 @@ TEST_F(PersistentHashMapTest,
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
- // TODO(b/193919210): call Delete() to change PersistentHashMap header
+ ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+ // Call Delete() to change PersistentHashMap metadata info
+ // (num_deleted_entries)
+ ICING_ASSERT_OK(persistent_hash_map->Delete("c"));
ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
@@ -178,7 +200,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) {
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map1->Put("b", Serialize(2).data()));
- // TODO(b/193919210): call Delete() to change PersistentHashMap header
+ ICING_ASSERT_OK(persistent_hash_map1->Put("c", Serialize(3).data()));
+ // Call Delete() to change PersistentHashMap metadata info
+ // (num_deleted_entries)
+ ICING_ASSERT_OK(persistent_hash_map1->Delete("c"));
ASSERT_THAT(persistent_hash_map1, Pointee(SizeIs(2)));
ASSERT_THAT(GetValueByKey(persistent_hash_map1.get(), "a"), IsOkAndHolds(1));
@@ -214,7 +239,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsAfterDestruction) {
/*max_load_factor_percent=*/1000));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
- // TODO(b/193919210): call Delete() to change PersistentHashMap header
+ ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+ // Call Delete() to change PersistentHashMap metadata info
+ // (num_deleted_entries)
+ ICING_ASSERT_OK(persistent_hash_map->Delete("c"));
ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
@@ -637,6 +665,218 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
IsOkAndHolds(1));
}
+TEST_F(PersistentHashMapTest, Delete) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ // Delete a non-existing key should get NOT_FOUND error
+ EXPECT_THAT(persistent_hash_map->Delete("default-google.com"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-youtube.com", Serialize(50).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+
+ // Delete an existing key should succeed
+ ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com"));
+ EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+ // The deleted key should not be found.
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // Other key should remain unchanged and available
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+ IsOkAndHolds(50));
+
+ // Insert back the deleted key. Should get new value
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com", Serialize(200).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+ IsOkAndHolds(200));
+
+ // Delete again
+ ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com"));
+ EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // Other keys should remain unchanged and available
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"),
+ IsOkAndHolds(50));
+}
+
+TEST_F(PersistentHashMapTest, DeleteMultiple) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ std::unordered_map<std::string, int> existing_keys;
+ std::unordered_set<std::string> deleted_keys;
+ // Insert 100 key value pairs
+ for (int i = 0; i < 100; ++i) {
+ std::string key = "default-google.com-" + std::to_string(i);
+ ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+ existing_keys[key] = i;
+ }
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(existing_keys.size())));
+
+ // Delete several keys.
+ // Simulate with std::unordered_map and verify.
+ std::vector<int> delete_target_ids{3, 4, 6, 9, 13, 18, 24, 31, 39, 48, 58};
+ for (const int delete_target_id : delete_target_ids) {
+ std::string key = "default-google.com-" + std::to_string(delete_target_id);
+ ASSERT_THAT(existing_keys, Contains(Key(key)));
+ ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), key),
+ IsOkAndHolds(existing_keys[key]));
+ ICING_EXPECT_OK(persistent_hash_map->Delete(key));
+
+ existing_keys.erase(key);
+ deleted_keys.insert(key);
+ }
+
+ // Deleted keys should not be found.
+ for (const std::string& deleted_key : deleted_keys) {
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), deleted_key),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ }
+ // Other keys should remain unchanged and available
+ for (const auto& [existing_key, existing_value] : existing_keys) {
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), existing_key),
+ IsOkAndHolds(existing_value));
+ }
+ // Verify by iterator as well
+ EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+ Eq(existing_keys));
+}
+
+TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
+ // Create new persistent hash map
+ // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+ // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+ // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+ // 1000, we would allow the insertion of up to 10 keys before rehashing.
+ // Preventing rehashing makes it much easier to test collisions.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ /*max_load_factor_percent=*/1000));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+ ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+ // Delete the head element of the bucket. Note that in our implementation, the
+ // last added element will become the head element of the bucket.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+ IsOkAndHolds(0));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+ IsOkAndHolds(1));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
+ // Create new persistent hash map
+ // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+ // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+ // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+ // 1000, we would allow the insertion of up to 10 keys before rehashing.
+ // Preventing rehashing makes it much easier to test collisions.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ /*max_load_factor_percent=*/1000));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+ ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+ // Delete any intermediate element of the bucket.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+ IsOkAndHolds(0));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+ IsOkAndHolds(2));
+}
+
+TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
+ // Create new persistent hash map
+ // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+ // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+ // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+ // 1000, we would allow the insertion of up to 10 keys before rehashing.
+ // Preventing rehashing makes it much easier to test collisions.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ /*max_load_factor_percent=*/1000));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+ ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1));
+
+ // Delete the last element of the bucket. Note that in our implementation, the
+ // first added element will become the tail element of the bucket.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"),
+ IsOkAndHolds(1));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"),
+ IsOkAndHolds(2));
+}
+
+TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
+ // Create new persistent hash map
+ // Set max_load_factor_percent as 1000. Load factor percent is calculated as
+ // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
+ // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
+ // 1000, we would allow the insertion of up to 10 keys before rehashing.
+ // Preventing rehashing makes it much easier to test collisions.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ /*max_load_factor_percent=*/1000));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com", Serialize(100).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1)));
+
+ // Delete the only single element of the bucket.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com"));
+ ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
@@ -654,6 +894,127 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(persistent_hash_map->Get(invalid_key_view, &val),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ EXPECT_THAT(persistent_hash_map->Delete(invalid_key_view),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PersistentHashMapTest, EmptyHashMapIterator) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
+}
+
+TEST_F(PersistentHashMapTest, Iterator) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ std::unordered_map<std::string, int> kvps;
+ // Insert 100 key value pairs
+ for (int i = 0; i < 100; ++i) {
+ std::string key = "default-google.com-" + std::to_string(i);
+ ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+ kvps.emplace(key, i);
+ }
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(kvps.size())));
+
+ EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+ Eq(kvps));
+}
+
+TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+ // Delete the first key value pair.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+ EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+ UnorderedElementsAre(Pair("default-google.com-1", 1),
+ Pair("default-google.com-2", 2)));
+}
+
+TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+ // Delete any intermediate key value pair.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+ EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+ UnorderedElementsAre(Pair("default-google.com-0", 0),
+ Pair("default-google.com-2", 2)));
+}
+
+TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+ // Delete the last key value pair.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+ EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()),
+ UnorderedElementsAre(Pair("default-google.com-0", 0),
+ Pair("default-google.com-1", 1)));
+}
+
+TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int)));
+
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-1", Serialize(1).data()));
+ ICING_ASSERT_OK(
+ persistent_hash_map->Put("default-google.com-2", Serialize(2).data()));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+
+ // Delete all key value pairs.
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0"));
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1"));
+ ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2"));
+ ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty()));
+ EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
}
} // namespace