diff options
Diffstat (limited to 'icing/file/persistent-hash-map_test.cc')
-rw-r--r-- | icing/file/persistent-hash-map_test.cc | 367 |
1 files changed, 364 insertions, 3 deletions
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc index fb15175..138320c 100644 --- a/icing/file/persistent-hash-map_test.cc +++ b/icing/file/persistent-hash-map_test.cc @@ -15,6 +15,9 @@ #include "icing/file/persistent-hash-map.h" #include <cstring> +#include <string_view> +#include <unordered_map> +#include <unordered_set> #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" @@ -34,12 +37,16 @@ namespace { static constexpr int32_t kCorruptedValueOffset = 3; +using ::testing::Contains; using ::testing::Eq; using ::testing::HasSubstr; using ::testing::IsEmpty; +using ::testing::Key; using ::testing::Not; +using ::testing::Pair; using ::testing::Pointee; using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; using Bucket = PersistentHashMap::Bucket; using Crcs = PersistentHashMap::Crcs; @@ -69,6 +76,18 @@ class PersistentHashMapTest : public ::testing::Test { return val; } + std::unordered_map<std::string, int> GetAllKeyValuePairs( + PersistentHashMap::Iterator&& iter) { + std::unordered_map<std::string, int> kvps; + + while (iter.Advance()) { + int val; + memcpy(&val, iter.GetValue(), sizeof(val)); + kvps.emplace(iter.GetKey(), val); + } + return kvps; + } + Filesystem filesystem_; std::string base_dir_; }; @@ -148,7 +167,10 @@ TEST_F(PersistentHashMapTest, // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map->Delete("c")); ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); @@ -178,7 +200,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) { // Put some key value pairs. ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map1->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map1->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map1->Delete("c")); ASSERT_THAT(persistent_hash_map1, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map1.get(), "a"), IsOkAndHolds(1)); @@ -214,7 +239,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsAfterDestruction) { /*max_load_factor_percent=*/1000)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data())); - // TODO(b/193919210): call Delete() to change PersistentHashMap header + ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data())); + // Call Delete() to change PersistentHashMap metadata info + // (num_deleted_entries) + ICING_ASSERT_OK(persistent_hash_map->Delete("c")); ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); @@ -637,6 +665,218 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { IsOkAndHolds(1)); } +TEST_F(PersistentHashMapTest, Delete) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + // Delete a non-existing key should get NOT_FOUND error + EXPECT_THAT(persistent_hash_map->Delete("default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(100).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-youtube.com", Serialize(50).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); + + // Delete an existing key should succeed + ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com")); + EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + // The deleted key should not be found. + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // Other key should remain unchanged and available + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"), + IsOkAndHolds(50)); + + // Insert back the deleted key. Should get new value + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(200).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(2))); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + IsOkAndHolds(200)); + + // Delete again + ICING_EXPECT_OK(persistent_hash_map->Delete("default-google.com")); + EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // Other keys should remain unchanged and available + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-youtube.com"), + IsOkAndHolds(50)); +} + +TEST_F(PersistentHashMapTest, DeleteMultiple) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + std::unordered_map<std::string, int> existing_keys; + std::unordered_set<std::string> deleted_keys; + // Insert 100 key value pairs + for (int i = 0; i < 100; ++i) { + std::string key = "default-google.com-" + std::to_string(i); + ICING_ASSERT_OK(persistent_hash_map->Put(key, &i)); + existing_keys[key] = i; + } + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(existing_keys.size()))); + + // Delete several keys. + // Simulate with std::unordered_map and verify. + std::vector<int> delete_target_ids{3, 4, 6, 9, 13, 18, 24, 31, 39, 48, 58}; + for (const int delete_target_id : delete_target_ids) { + std::string key = "default-google.com-" + std::to_string(delete_target_id); + ASSERT_THAT(existing_keys, Contains(Key(key))); + ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), key), + IsOkAndHolds(existing_keys[key])); + ICING_EXPECT_OK(persistent_hash_map->Delete(key)); + + existing_keys.erase(key); + deleted_keys.insert(key); + } + + // Deleted keys should not be found. + for (const std::string& deleted_key : deleted_keys) { + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), deleted_key), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + } + // Other keys should remain unchanged and available + for (const auto& [existing_key, existing_value] : existing_keys) { + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), existing_key), + IsOkAndHolds(existing_value)); + } + // Verify by iterator as well + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + Eq(existing_keys)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete the head element of the bucket. Note that in our implementation, the + // last added element will become the head element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + IsOkAndHolds(0)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + IsOkAndHolds(1)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete any intermediate element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + IsOkAndHolds(0)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + IsOkAndHolds(2)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + ASSERT_THAT(persistent_hash_map->num_buckets(), Eq(1)); + + // Delete the last element of the bucket. Note that in our implementation, the + // first added element will become the tail element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-0"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-1"), + IsOkAndHolds(1)); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com-2"), + IsOkAndHolds(2)); +} + +TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { + // Create new persistent hash map + // Set max_load_factor_percent as 1000. Load factor percent is calculated as + // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of + // buckets in an empty PersistentHashMap) and a max_load_factor_percent of + // 1000, we would allow the insertion of up to 10 keys before rehashing. + // Preventing rehashing makes it much easier to test collisions. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int), + /*max_load_factor_percent=*/1000)); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com", Serialize(100).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(1))); + + // Delete the only single element of the bucket. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com")); + ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty())); + EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( @@ -654,6 +894,127 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); EXPECT_THAT(persistent_hash_map->Get(invalid_key_view, &val), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(persistent_hash_map->Delete(invalid_key_view), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(PersistentHashMapTest, EmptyHashMapIterator) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); +} + +TEST_F(PersistentHashMapTest, Iterator) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + std::unordered_map<std::string, int> kvps; + // Insert 100 key value pairs + for (int i = 0; i < 100; ++i) { + std::string key = "default-google.com-" + std::to_string(i); + ICING_ASSERT_OK(persistent_hash_map->Put(key, &i)); + kvps.emplace(key, i); + } + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(kvps.size()))); + + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + Eq(kvps)); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete the first key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-1", 1), + Pair("default-google.com-2", 2))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete any intermediate key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-0", 0), + Pair("default-google.com-2", 2))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete the last key value pair. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + EXPECT_THAT(GetAllKeyValuePairs(persistent_hash_map->GetIterator()), + UnorderedElementsAre(Pair("default-google.com-0", 0), + Pair("default-google.com-1", 1))); +} + +TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { + // Create new persistent hash map + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<PersistentHashMap> persistent_hash_map, + PersistentHashMap::Create(filesystem_, base_dir_, + /*value_type_size=*/sizeof(int))); + + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-1", Serialize(1).data())); + ICING_ASSERT_OK( + persistent_hash_map->Put("default-google.com-2", Serialize(2).data())); + ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3))); + + // Delete all key value pairs. + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-0")); + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-1")); + ICING_ASSERT_OK(persistent_hash_map->Delete("default-google.com-2")); + ASSERT_THAT(persistent_hash_map, Pointee(IsEmpty())); + EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); } } // namespace |