aboutsummaryrefslogtreecommitdiff
path: root/icing/file
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2022-09-30 12:54:54 -0700
committerTerry Wang <tytytyww@google.com>2022-09-30 15:58:03 -0700
commita570eb457c8cf5d677a2616aff0b7a7dcf72e665 (patch)
tree4f8778d18291397ace1f86cdad2fd8ca19dcdddc /icing/file
parentb02eecda6a12241798cdbaaa7069d19f2fc5f41f (diff)
downloadicing-a570eb457c8cf5d677a2616aff0b7a7dcf72e665.tar.gz
Update Icing from upstream.
Descriptions: ====================================================================== Integrate ANTLR-based advanced query prototype with query processor. ====================================================================== [PersistentHashMap][6.1/x] Wrap the return value of KeyMapper::ComputeChecksum by StatusOr ====================================================================== [PersistentHashMap][6.0/x] Replace GetValuesToKeys with iterator for KeyMapper ====================================================================== [PersistentHashMap][5.1/x] Allow client to specify initial num buckets ====================================================================== [PersistentHashMap][5/x] Implement rehashing ====================================================================== Add SchemaType filter and Document Id filter in Search Suggestion API. ====================================================================== Follow up to cl/463377778 ====================================================================== Add Document Id filters in Search Suggestion API. ====================================================================== Cleanup LSC: Replace inclusion of *_proto_portable.pb.h files coming from portable_proto_library() with *(!_proto_portable).pb.h coming from cc_proto_library(). ====================================================================== Cleanup LSC: Replace inclusion of *_proto_portable.pb.h files coming from portable_proto_library() with *(!_proto_portable).pb.h coming from cc_proto_library(). ====================================================================== Cloned from CL 464902284 by 'g4 patch'. ====================================================================== Cleanup LSC: Replace inclusion of *_proto_portable.pb.h files coming from portable_proto_library() with *(!_proto_portable).pb.h coming from cc_proto_library(). ====================================================================== Cleanup Remove unused visibility specs (last referenced in codebase over 132 days ago). ====================================================================== Cleanup LSC: Replace inclusion of *_proto_portable.pb.h files coming from portable_proto_library() with *(!_proto_portable).pb.h coming from cc_proto_library(). ====================================================================== Cleanup LSC: Replace inclusion of *_proto_portable.pb.h files coming from portable_proto_library() with *(!_proto_portable).pb.h coming from cc_proto_library(). ====================================================================== Remove dsaadati@ from third_party/icing OWNERS ====================================================================== Cleanup Move package level default_copts attribute to copts. ====================================================================== Refactor QueryProcessor and QueryProcessTest in preparation for adding ANTLR prototype to parse queries with search_type EXPERIMENTAL_ICING_ADVANCED_QUERY. ====================================================================== Bug: 208654892 Bug: 230553264 Bug: 237324702 Bug: 193919210 Change-Id: I2f0a612747ccb754502489a9b168406532cffaee
Diffstat (limited to 'icing/file')
-rw-r--r--icing/file/file-backed-vector.h12
-rw-r--r--icing/file/persistent-hash-map.cc74
-rw-r--r--icing/file/persistent-hash-map.h23
-rw-r--r--icing/file/persistent-hash-map_test.cc255
4 files changed, 297 insertions, 67 deletions
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 7916666..165597d 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -350,6 +350,12 @@ class FileBackedVector {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+ // Updates checksum of the vector contents and returns it.
+ //
+ // Returns:
+ // INTERNAL_ERROR if the vector's internal state is inconsistent
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
// Accessors.
const T* array() const {
return reinterpret_cast<const T*>(mmapped_file_->region());
@@ -357,12 +363,6 @@ class FileBackedVector {
int32_t num_elements() const { return header_->num_elements; }
- // Updates checksum of the vector contents and returns it.
- //
- // Returns:
- // INTERNAL_ERROR if the vector's internal state is inconsistent
- libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
-
public:
class MutableArrayView {
public:
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 43530dd..8101c89 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <utility>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -184,7 +185,8 @@ std::string GetKeyValueStorageFilePath(std::string_view base_dir) {
/* static */ libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
PersistentHashMap::Create(const Filesystem& filesystem,
std::string_view base_dir, int32_t value_type_size,
- int32_t max_load_factor_percent) {
+ int32_t max_load_factor_percent,
+ int32_t init_num_buckets) {
if (!filesystem.FileExists(
GetMetadataFilePath(base_dir, kSubDirectory).c_str()) ||
!filesystem.FileExists(
@@ -194,7 +196,7 @@ PersistentHashMap::Create(const Filesystem& filesystem,
!filesystem.FileExists(GetKeyValueStorageFilePath(base_dir).c_str())) {
// TODO: erase all files if missing any.
return InitializeNewFiles(filesystem, base_dir, value_type_size,
- max_load_factor_percent);
+ max_load_factor_percent, init_num_buckets);
}
return InitializeExistingFiles(filesystem, base_dir, value_type_size,
max_load_factor_percent);
@@ -397,7 +399,8 @@ libtextclassifier3::StatusOr<Crc32> PersistentHashMap::ComputeChecksum() {
PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
std::string_view base_dir,
int32_t value_type_size,
- int32_t max_load_factor_percent) {
+ int32_t max_load_factor_percent,
+ int32_t init_num_buckets) {
// Create directory.
const std::string dir_path = absl_ports::StrCat(base_dir, "/", kSubDirectory);
if (!filesystem.CreateDirectoryRecursively(dir_path.c_str())) {
@@ -421,8 +424,9 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
filesystem, GetKeyValueStorageFilePath(base_dir),
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- // Initialize one bucket.
- ICING_RETURN_IF_ERROR(bucket_storage->Append(Bucket()));
+ // Initialize buckets.
+ ICING_RETURN_IF_ERROR(
+ bucket_storage->Set(/*idx=*/0, /*len=*/init_num_buckets, Bucket()));
ICING_RETURN_IF_ERROR(bucket_storage->PersistToDisk());
// Create and initialize new info
@@ -511,13 +515,16 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get();
crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get();
ICING_RETURN_IF_ERROR(metadata_mmapped_file->PersistToDisk());
- // TODO(b/193919210): rehash if needed
}
- return std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
- filesystem, base_dir, std::move(metadata_mmapped_file),
- std::move(bucket_storage), std::move(entry_storage),
- std::move(kv_storage)));
+ auto persistent_hash_map =
+ std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
+ filesystem, base_dir, std::move(metadata_mmapped_file),
+ std::move(bucket_storage), std::move(entry_storage),
+ std::move(kv_storage)));
+ ICING_RETURN_IF_ERROR(
+ persistent_hash_map->RehashIfNecessary(/*force_rehash=*/false));
+ return persistent_hash_map;
}
libtextclassifier3::StatusOr<PersistentHashMap::EntryIndexPair>
@@ -594,7 +601,52 @@ libtextclassifier3::Status PersistentHashMap::Insert(int32_t bucket_idx,
Entry(new_kv_idx, mutable_bucket.Get().head_entry_index())));
mutable_bucket.Get().set_head_entry_index(new_entry_idx);
- // TODO: rehash if needed
+ return RehashIfNecessary(/*force_rehash=*/false);
+}
+
+libtextclassifier3::Status PersistentHashMap::RehashIfNecessary(
+ bool force_rehash) {
+ int32_t new_num_bucket = bucket_storage_->num_elements();
+ while (new_num_bucket <= Bucket::kMaxNumBuckets / 2 &&
+ size() > static_cast<int64_t>(new_num_bucket) *
+ info()->max_load_factor_percent / 100) {
+ new_num_bucket *= 2;
+ }
+
+ if (!force_rehash && new_num_bucket == bucket_storage_->num_elements()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ // Resize and reset buckets.
+ ICING_RETURN_IF_ERROR(
+ bucket_storage_->Set(0, new_num_bucket, Bucket(Entry::kInvalidIndex)));
+
+ // Iterate all key value pairs in kv_storage, rehash and insert.
+ Iterator iter = GetIterator();
+ int32_t entry_idx = 0;
+ while (iter.Advance()) {
+ ICING_ASSIGN_OR_RETURN(int32_t bucket_idx,
+ HashKeyToBucketIndex(iter.GetKey(), new_num_bucket));
+ ICING_ASSIGN_OR_RETURN(FileBackedVector<Bucket>::MutableView mutable_bucket,
+ bucket_storage_->GetMutable(bucket_idx));
+
+ // Update entry and bucket.
+ ICING_RETURN_IF_ERROR(entry_storage_->Set(
+ entry_idx,
+ Entry(iter.GetIndex(), mutable_bucket.Get().head_entry_index())));
+ mutable_bucket.Get().set_head_entry_index(entry_idx);
+
+ ++entry_idx;
+ }
+
+ // Since there will be some deleted entries, after rehashing entry_storage_
+ // # of vector elements may be greater than the actual # of entries.
+ // Therefore, we have to truncate entry_storage_ to the correct size.
+ if (entry_idx < entry_storage_->num_elements()) {
+ entry_storage_->TruncateTo(entry_idx);
+ }
+
+ info()->num_deleted_entries = 0;
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index a1ca25d..9a9ca8d 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -49,6 +49,8 @@ class PersistentHashMap {
// True on success, otherwise false.
bool Advance();
+ int32_t GetIndex() const { return curr_kv_idx_; }
+
// Get the key.
//
// REQUIRES: The preceding call for Advance() is true.
@@ -226,7 +228,8 @@ class PersistentHashMap {
"type size");
static constexpr int32_t kVersion = 1;
- static constexpr int32_t kDefaultMaxLoadFactorPercent = 75;
+ static constexpr int32_t kDefaultMaxLoadFactorPercent = 100;
+ static constexpr int32_t kDefaultInitNumBuckets = 8192;
static constexpr std::string_view kFilePrefix = "persistent_hash_map";
// Only metadata, bucket, entry files are stored under this sub-directory, for
@@ -248,6 +251,9 @@ class PersistentHashMap {
// invoked (and # of buckets will be doubled).
// Note that load_factor_percent exceeding 100 is
// considered valid.
+ // init_num_buckets: initial # of buckets for the persistent hash map. It is
+ // used when creating new persistent hash map and ignored
+ // when creating the instance from existing files.
//
// Returns:
// FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
@@ -257,7 +263,8 @@ class PersistentHashMap {
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
Create(const Filesystem& filesystem, std::string_view base_dir,
int32_t value_type_size,
- int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent);
+ int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent,
+ int32_t init_num_buckets = kDefaultInitNumBuckets);
~PersistentHashMap();
@@ -379,7 +386,8 @@ class PersistentHashMap {
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
InitializeNewFiles(const Filesystem& filesystem, std::string_view base_dir,
- int32_t value_type_size, int32_t max_load_factor_percent);
+ int32_t value_type_size, int32_t max_load_factor_percent,
+ int32_t init_num_buckets);
static libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>>
InitializeExistingFiles(const Filesystem& filesystem,
@@ -421,6 +429,15 @@ class PersistentHashMap {
libtextclassifier3::Status Insert(int32_t bucket_idx, std::string_view key,
const void* value);
+ // Rehash function. If force_rehash is true or the hash map loading is greater
+ // than max_load_factor, then it will rehash all keys.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on I/O error or any data inconsistency
+ // Any FileBackedVector errors
+ libtextclassifier3::Status RehashIfNecessary(bool force_rehash);
+
Crcs* crcs() {
return reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
Crcs::kFileOffset);
diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc
index 138320c..8024388 100644
--- a/icing/file/persistent-hash-map_test.cc
+++ b/icing/file/persistent-hash-map_test.cc
@@ -36,12 +36,15 @@ namespace lib {
namespace {
static constexpr int32_t kCorruptedValueOffset = 3;
+static constexpr int32_t kTestInitNumBuckets = 1;
using ::testing::Contains;
using ::testing::Eq;
+using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
using ::testing::Key;
+using ::testing::Lt;
using ::testing::Not;
using ::testing::Pair;
using ::testing::Pointee;
@@ -150,19 +153,52 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) {
.Get()));
}
+TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitBucketSize) {
+ // Create new persistent hash map
+ int custom_init_bucket_size = 123;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ custom_init_bucket_size));
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(custom_init_bucket_size));
+}
+
+TEST_F(PersistentHashMapTest, InitBucketSizeShouldNotAffectExistingFiles) {
+ int init_bucket_size1 = 4;
+ {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(
+ filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ init_bucket_size1));
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(init_bucket_size1));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+
+ int init_bucket_size2 = 8;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ init_bucket_size2));
+ // # of buckets should still be the original value.
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(init_bucket_size1));
+}
+
TEST_F(PersistentHashMapTest,
TestInitializationFailsWithoutPersistToDiskOrDestruction) {
// Create new persistent hash map
- // Set max_load_factor_percent as 1000. Load factor percent is calculated as
- // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
- // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
- // 1000, we would allow the insertion of up to 10 keys before rehashing, to
- // avoid PersistToDisk being called implicitly by rehashing.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*value_type_size=*/sizeof(int)));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
@@ -179,23 +215,16 @@ TEST_F(PersistentHashMapTest,
// Without calling PersistToDisk, checksums will not be recomputed or synced
// to disk, so initializing another instance on the same files should fail.
EXPECT_THAT(PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000),
+ /*value_type_size=*/sizeof(int)),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) {
// Create new persistent hash map
- // Set max_load_factor_percent as 1000. Load factor percent is calculated as
- // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
- // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
- // 1000, we would allow the insertion of up to 10 keys before rehashing, to
- // avoid PersistToDisk being called implicitly by rehashing.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map1,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*value_type_size=*/sizeof(int)));
// Put some key value pairs.
ICING_ASSERT_OK(persistent_hash_map1->Put("a", Serialize(1).data()));
@@ -217,8 +246,7 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map2,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*value_type_size=*/sizeof(int)));
EXPECT_THAT(persistent_hash_map2, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2));
@@ -227,16 +255,10 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsWithPersistToDisk) {
TEST_F(PersistentHashMapTest, TestInitializationSucceedsAfterDestruction) {
{
// Create new persistent hash map
- // Set max_load_factor_percent as 1000. Load factor percent is calculated as
- // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of
- // buckets in an empty PersistentHashMap) and a max_load_factor_percent of
- // 1000, we would allow the insertion of up to 10 keys before rehashing, to
- // avoid PersistToDisk being called implicitly by rehashing.
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*value_type_size=*/sizeof(int)));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
@@ -257,8 +279,7 @@ TEST_F(PersistentHashMapTest, TestInitializationSucceedsAfterDestruction) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*value_type_size=*/sizeof(int)));
EXPECT_THAT(persistent_hash_map, Pointee(SizeIs(2)));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
@@ -513,8 +534,11 @@ TEST_F(PersistentHashMapTest,
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
- PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ PersistentHashMap::Create(
+ filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
@@ -525,7 +549,7 @@ TEST_F(PersistentHashMapTest,
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
- int32_t new_max_load_factor_percent = 100;
+ int32_t new_max_load_factor_percent = 200;
{
ASSERT_THAT(new_max_load_factor_percent,
Not(Eq(PersistentHashMap::kDefaultMaxLoadFactorPercent)));
@@ -563,7 +587,85 @@ TEST_F(PersistentHashMapTest,
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
/*value_type_size=*/sizeof(int),
- new_max_load_factor_percent));
+ new_max_load_factor_percent,
+ kTestInitNumBuckets));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+}
+
+TEST_F(PersistentHashMapTest,
+ InitializeExistingFilesWithDifferentMaxLoadFactorPercentShouldRehash) {
+ double prev_loading_percent;
+ int prev_num_buckets;
+ {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(
+ filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
+ ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data()));
+ ICING_ASSERT_OK(persistent_hash_map->Put("b", Serialize(2).data()));
+ ICING_ASSERT_OK(persistent_hash_map->Put("c", Serialize(3).data()));
+
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(3)));
+ ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+ ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+ ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "c"), IsOkAndHolds(3));
+
+ prev_loading_percent = persistent_hash_map->size() * 100.0 /
+ persistent_hash_map->num_buckets();
+ prev_num_buckets = persistent_hash_map->num_buckets();
+ ASSERT_THAT(prev_loading_percent,
+ Not(Gt(PersistentHashMap::kDefaultMaxLoadFactorPercent)));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+
+ int32_t greater_max_load_factor_percent = 150;
+ {
+ ASSERT_THAT(greater_max_load_factor_percent, Gt(prev_loading_percent));
+ // Attempt to create the persistent hash map with max load factor greater
+ // than previous loading. There should be no rehashing and # of buckets
+ // should remain the same.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ greater_max_load_factor_percent,
+ kTestInitNumBuckets));
+
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(prev_num_buckets));
+
+ ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
+ }
+
+ int32_t smaller_max_load_factor_percent = 25;
+ {
+ ASSERT_THAT(smaller_max_load_factor_percent, Lt(prev_loading_percent));
+ // Attempt to create the persistent hash map with max load factor smaller
+ // than previous loading. There should be rehashing since the loading
+ // exceeds the limit.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ smaller_max_load_factor_percent,
+ kTestInitNumBuckets));
+
+ // After changing max_load_factor_percent, there should be rehashing and the
+ // new loading should not be greater than the new max load factor.
+ EXPECT_THAT(persistent_hash_map->size() * 100.0 /
+ persistent_hash_map->num_buckets(),
+ Not(Gt(smaller_max_load_factor_percent)));
+ EXPECT_THAT(persistent_hash_map->num_buckets(), Not(Eq(prev_num_buckets)));
+
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2));
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "c"), IsOkAndHolds(3));
ICING_ASSERT_OK(persistent_hash_map->PersistToDisk());
}
@@ -574,7 +676,9 @@ TEST_F(PersistentHashMapTest, PutAndGet) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty()));
EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
@@ -603,7 +707,9 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com", Serialize(100).data()));
@@ -624,12 +730,45 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) {
IsOkAndHolds(300));
}
+TEST_F(PersistentHashMapTest, ShouldRehash) {
+ // Create new persistent hash map
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<PersistentHashMap> persistent_hash_map,
+ PersistentHashMap::Create(filesystem_, base_dir_,
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
+
+ int original_num_buckets = persistent_hash_map->num_buckets();
+ // Insert 100 key value pairs. There should be rehashing so the loading of
+ // hash map doesn't exceed max_load_factor.
+ for (int i = 0; i < 100; ++i) {
+ std::string key = "default-google.com-" + std::to_string(i);
+ ICING_ASSERT_OK(persistent_hash_map->Put(key, &i));
+ ASSERT_THAT(persistent_hash_map, Pointee(SizeIs(i + 1)));
+
+ EXPECT_THAT(persistent_hash_map->size() * 100.0 /
+ persistent_hash_map->num_buckets(),
+ Not(Gt(PersistentHashMap::kDefaultMaxLoadFactorPercent)));
+ }
+ EXPECT_THAT(persistent_hash_map->num_buckets(),
+ Not(Eq(original_num_buckets)));
+
+ // After rehashing, we should still be able to get all inserted entries.
+ for (int i = 0; i < 100; ++i) {
+ std::string key = "default-google.com-" + std::to_string(i);
+ EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), key), IsOkAndHolds(i));
+ }
+}
+
TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) {
// Create new persistent hash map
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -648,7 +787,9 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ASSERT_THAT(
persistent_hash_map->Put("default-google.com", Serialize(1).data()),
@@ -670,7 +811,9 @@ TEST_F(PersistentHashMapTest, Delete) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
// Delete a non-existing key should get NOT_FOUND error
EXPECT_THAT(persistent_hash_map->Delete("default-google.com"),
@@ -714,7 +857,9 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
std::unordered_map<std::string, int> existing_keys;
std::unordered_set<std::string> deleted_keys;
@@ -766,7 +911,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) {
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
/*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*max_load_factor_percent=*/1000,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -799,7 +945,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) {
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
/*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*max_load_factor_percent=*/1000,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -831,7 +978,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) {
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
/*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*max_load_factor_percent=*/1000,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -864,7 +1012,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) {
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
/*value_type_size=*/sizeof(int),
- /*max_load_factor_percent=*/1000));
+ /*max_load_factor_percent=*/1000,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com", Serialize(100).data()));
@@ -903,7 +1052,9 @@ TEST_F(PersistentHashMapTest, EmptyHashMapIterator) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
EXPECT_FALSE(persistent_hash_map->GetIterator().Advance());
}
@@ -913,7 +1064,9 @@ TEST_F(PersistentHashMapTest, Iterator) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
std::unordered_map<std::string, int> kvps;
// Insert 100 key value pairs
@@ -933,7 +1086,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -955,7 +1110,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -977,7 +1134,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));
@@ -999,7 +1158,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PersistentHashMap> persistent_hash_map,
PersistentHashMap::Create(filesystem_, base_dir_,
- /*value_type_size=*/sizeof(int)));
+ /*value_type_size=*/sizeof(int),
+ PersistentHashMap::kDefaultMaxLoadFactorPercent,
+ kTestInitNumBuckets));
ICING_ASSERT_OK(
persistent_hash_map->Put("default-google.com-0", Serialize(0).data()));