diff options
32 files changed, 1164 insertions, 537 deletions
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc index ce8310b..729b09a 100644 --- a/icing/file/persistent-hash-map.cc +++ b/icing/file/persistent-hash-map.cc @@ -351,17 +351,18 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, FileBackedVector<Bucket>::Create( filesystem, GetBucketStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize entry_storage pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<Entry>> entry_storage, - FileBackedVector<Entry>::Create( - filesystem, GetEntryStorageFilePath(working_path), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - max_file_size, pre_mapping_mmap_size)); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<FileBackedVector<Entry>> entry_storage, + FileBackedVector<Entry>::Create( + filesystem, GetEntryStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize kv_storage pre_mapping_mmap_size = @@ -373,7 +374,7 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, FileBackedVector<char>::Create( filesystem, GetKeyValueStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize buckets. ICING_RETURN_IF_ERROR(bucket_storage->Set( @@ -441,17 +442,18 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, FileBackedVector<Bucket>::Create( filesystem, GetBucketStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize entry_storage pre_mapping_mmap_size = sizeof(Entry) * options.max_num_entries; max_file_size = pre_mapping_mmap_size + FileBackedVector<Entry>::Header::kHeaderSize; - ICING_ASSIGN_OR_RETURN(std::unique_ptr<FileBackedVector<Entry>> entry_storage, - FileBackedVector<Entry>::Create( - filesystem, GetEntryStorageFilePath(working_path), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - max_file_size, pre_mapping_mmap_size)); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<FileBackedVector<Entry>> entry_storage, + FileBackedVector<Entry>::Create( + filesystem, GetEntryStorageFilePath(working_path), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize kv_storage pre_mapping_mmap_size = @@ -463,7 +465,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, FileBackedVector<char>::Create( filesystem, GetKeyValueStorageFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, - pre_mapping_mmap_size)); + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Create instance. auto persistent_hash_map = diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h index a6d14bb..845b22a 100644 --- a/icing/file/persistent-hash-map.h +++ b/icing/file/persistent-hash-map.h @@ -211,12 +211,14 @@ class PersistentHashMap : public PersistentStorage { int32_t max_num_entries_in = Entry::kMaxNumEntries, int32_t max_load_factor_percent_in = kDefaultMaxLoadFactorPercent, int32_t average_kv_byte_size_in = kDefaultAverageKVByteSize, - int32_t init_num_buckets_in = kDefaultInitNumBuckets) + int32_t init_num_buckets_in = kDefaultInitNumBuckets, + bool pre_mapping_fbv_in = false) : value_type_size(value_type_size_in), max_num_entries(max_num_entries_in), max_load_factor_percent(max_load_factor_percent_in), average_kv_byte_size(average_kv_byte_size_in), - init_num_buckets(init_num_buckets_in) {} + init_num_buckets(init_num_buckets_in), + pre_mapping_fbv(pre_mapping_fbv_in) {} bool IsValid() const; @@ -242,6 +244,10 @@ class PersistentHashMap : public PersistentStorage { // It is used when creating new persistent hash map and ignored when // creating the instance from existing files. int32_t init_num_buckets; + + // Flag indicating whether memory map max possible file size for underlying + // FileBackedVector before growing the actual file size. + bool pre_mapping_fbv; }; static constexpr WorkingPathType kWorkingPathType = diff --git a/icing/file/persistent-hash-map_test.cc b/icing/file/persistent-hash-map_test.cc index 6e9a41b..5535629 100644 --- a/icing/file/persistent-hash-map_test.cc +++ b/icing/file/persistent-hash-map_test.cc @@ -39,6 +39,7 @@ using ::testing::IsEmpty; using ::testing::IsTrue; using ::testing::Key; using ::testing::Lt; +using ::testing::Ne; using ::testing::Not; using ::testing::Pair; using ::testing::Pointee; @@ -59,7 +60,7 @@ using Options = PersistentHashMap::Options; static constexpr int32_t kCorruptedValueOffset = 3; static constexpr int32_t kTestInitNumBuckets = 1; -class PersistentHashMapTest : public ::testing::Test { +class PersistentHashMapTest : public ::testing::TestWithParam<bool> { protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/icing"; @@ -103,7 +104,7 @@ class PersistentHashMapTest : public ::testing::Test { std::string working_path_; }; -TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) { +TEST_P(PersistentHashMapTest, OptionsInvalidValueTypeSize) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -117,7 +118,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidValueTypeSize) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, OptionsInvalidMaxNumEntries) { +TEST_P(PersistentHashMapTest, OptionsInvalidMaxNumEntries) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -131,7 +132,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidMaxNumEntries) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) { +TEST_P(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -142,7 +143,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidMaxLoadFactorPercent) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) { +TEST_P(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -153,7 +154,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidAverageKVByteSize) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, OptionsInvalidInitNumBuckets) { +TEST_P(PersistentHashMapTest, OptionsInvalidInitNumBuckets) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -171,7 +172,7 @@ TEST_F(PersistentHashMapTest, OptionsInvalidInitNumBuckets) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) { +TEST_P(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -180,7 +181,7 @@ TEST_F(PersistentHashMapTest, OptionsNumBucketsRequiredExceedsMaxNumBuckets) { EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, OptionsEstimatedNumKeyValuePairExceedsStorageMaxSize) { Options options(/*value_type_size_in=*/sizeof(int)); ASSERT_TRUE(options.IsValid()); @@ -193,15 +194,16 @@ TEST_F(PersistentHashMapTest, EXPECT_FALSE(options.IsValid()); } -TEST_F(PersistentHashMapTest, InvalidWorkingPath) { +TEST_P(PersistentHashMapTest, InvalidWorkingPath) { EXPECT_THAT(PersistentHashMap::Create( filesystem_, "/dev/null/persistent_hash_map_test", Options(/*value_type_size_in=*/sizeof(int))), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) { +TEST_P(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) { Options invalid_options(/*value_type_size_in=*/-1); + invalid_options.pre_mapping_fbv = GetParam(); ASSERT_FALSE(invalid_options.IsValid()); EXPECT_THAT( @@ -209,13 +211,16 @@ TEST_F(PersistentHashMapTest, CreateWithInvalidOptionsShouldFail) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(PersistentHashMapTest, InitializeNewFiles) { +TEST_P(PersistentHashMapTest, InitializeNewFiles) { { ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); + + Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, PersistentHashMap::Create(filesystem_, working_path_, - Options(/*value_type_size_in=*/sizeof(int)))); + std::move(options))); EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); @@ -245,7 +250,7 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) { PersistentHashMap::kCrcsMetadataFileOffset)); // # of elements in bucket_storage should be 1, so it should have non-zero // all storages crc value. - EXPECT_THAT(crcs.component_crcs.storages_crc, Not(Eq(0))); + EXPECT_THAT(crcs.component_crcs.storages_crc, Ne(0)); EXPECT_THAT(crcs.component_crcs.info_crc, Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info), sizeof(Info))) @@ -257,10 +262,9 @@ TEST_F(PersistentHashMapTest, InitializeNewFiles) { .Get())); } -TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) { +TEST_P(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) { int custom_init_num_buckets = 128; - // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, PersistentHashMap::Create( @@ -271,11 +275,12 @@ TEST_F(PersistentHashMapTest, InitializeNewFilesWithCustomInitNumBuckets) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/custom_init_num_buckets))); + /*init_num_buckets_in=*/custom_init_num_buckets, + /*pre_mapping_fbv=*/GetParam()))); EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(custom_init_num_buckets)); } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeNewFilesWithInitNumBucketsSmallerThanNumBucketsRequired) { int init_num_buckets = 65536; @@ -290,12 +295,14 @@ TEST_F(PersistentHashMapTest, /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/init_num_buckets))); + /*init_num_buckets_in=*/init_num_buckets, + /*pre_mapping_fbv=*/GetParam()))); EXPECT_THAT(persistent_hash_map->num_buckets(), Eq(init_num_buckets)); } -TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) { +TEST_P(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); int original_init_num_buckets = 4; { @@ -324,9 +331,10 @@ TEST_F(PersistentHashMapTest, InitNumBucketsShouldNotAffectExistingFiles) { Eq(original_init_num_buckets)); } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializationShouldFailWithoutPersistToDiskOrDestruction) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( @@ -351,8 +359,9 @@ TEST_F(PersistentHashMapTest, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } -TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) { +TEST_P(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( @@ -384,8 +393,9 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedWithPersistToDisk) { EXPECT_THAT(GetValueByKey(persistent_hash_map2.get(), "b"), IsOkAndHolds(2)); } -TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) { +TEST_P(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -418,9 +428,10 @@ TEST_F(PersistentHashMapTest, InitializationShouldSucceedAfterDestruction) { } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithDifferentMagicShouldFail) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -472,14 +483,15 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithDifferentValueTypeSizeShouldFail) { { // Create new persistent hash map + Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, - PersistentHashMap::Create(filesystem_, working_path_, - Options(/*value_type_size_in=*/sizeof(int)))); + PersistentHashMap::Create(filesystem_, working_path_, options)); ICING_ASSERT_OK(persistent_hash_map->Put("a", Serialize(1).data())); ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); @@ -488,11 +500,13 @@ TEST_F(PersistentHashMapTest, { // Attempt to create the persistent hash map with different value type size. // This should fail. - ASSERT_THAT(sizeof(char), Not(Eq(sizeof(int)))); + ASSERT_THAT(sizeof(char), Ne(sizeof(int))); + + Options options(/*value_type_size_in=*/sizeof(char)); + options.pre_mapping_fbv = GetParam(); libtextclassifier3::StatusOr<std::unique_ptr<PersistentHashMap>> - persistent_hash_map_or = PersistentHashMap::Create( - filesystem_, working_path_, - Options(/*value_type_size_in=*/sizeof(char))); + persistent_hash_map_or = + PersistentHashMap::Create(filesystem_, working_path_, options); EXPECT_THAT(persistent_hash_map_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(persistent_hash_map_or.status().error_message(), @@ -500,9 +514,10 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithMaxNumEntriesSmallerThanSizeShouldFail) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( @@ -541,8 +556,9 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) { +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -583,9 +599,10 @@ TEST_F(PersistentHashMapTest, InitializeExistingFilesWithWrongAllCrc) { } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -625,9 +642,10 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithCorruptedBucketStorage) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -670,9 +688,10 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithCorruptedEntryStorage) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -714,9 +733,10 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithCorruptedKeyValueStorage) { Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); { // Create new persistent hash map @@ -757,14 +777,15 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesAllowDifferentMaxLoadFactorPercent) { Options options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()); { // Create new persistent hash map @@ -786,7 +807,7 @@ TEST_F(PersistentHashMapTest, options.max_load_factor_percent = 200; ASSERT_TRUE(options.IsValid()); ASSERT_THAT(options.max_load_factor_percent, - Not(Eq(Options::kDefaultMaxLoadFactorPercent))); + Ne(Options::kDefaultMaxLoadFactorPercent)); // Attempt to create the persistent hash map with different max load factor // percent. This should succeed and metadata should be modified correctly. @@ -824,14 +845,15 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, +TEST_P(PersistentHashMapTest, InitializeExistingFilesWithDifferentMaxLoadFactorPercentShouldRehash) { Options options( /*value_type_size_in=*/sizeof(int), /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()); double prev_loading_percent; int prev_num_buckets; @@ -894,7 +916,7 @@ TEST_F(PersistentHashMapTest, EXPECT_THAT(persistent_hash_map->size() * 100.0 / persistent_hash_map->num_buckets(), Not(Gt(options.max_load_factor_percent))); - EXPECT_THAT(persistent_hash_map->num_buckets(), Not(Eq(prev_num_buckets))); + EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(prev_num_buckets)); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "a"), IsOkAndHolds(1)); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "b"), IsOkAndHolds(2)); @@ -904,7 +926,7 @@ TEST_F(PersistentHashMapTest, } } -TEST_F(PersistentHashMapTest, PutAndGet) { +TEST_P(PersistentHashMapTest, PutAndGet) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -916,7 +938,8 @@ TEST_F(PersistentHashMapTest, PutAndGet) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()))); EXPECT_THAT(persistent_hash_map, Pointee(IsEmpty())); EXPECT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), @@ -940,7 +963,7 @@ TEST_F(PersistentHashMapTest, PutAndGet) { ICING_ASSERT_OK(persistent_hash_map->PersistToDisk()); } -TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) { +TEST_P(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -952,7 +975,8 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com", Serialize(100).data())); @@ -973,7 +997,7 @@ TEST_F(PersistentHashMapTest, PutShouldOverwriteValueIfKeyExists) { IsOkAndHolds(300)); } -TEST_F(PersistentHashMapTest, ShouldRehash) { +TEST_P(PersistentHashMapTest, ShouldRehash) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -985,7 +1009,8 @@ TEST_F(PersistentHashMapTest, ShouldRehash) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()))); int original_num_buckets = persistent_hash_map->num_buckets(); // Insert 100 key value pairs. There should be rehashing so the loading of @@ -999,8 +1024,7 @@ TEST_F(PersistentHashMapTest, ShouldRehash) { persistent_hash_map->num_buckets(), Not(Gt(Options::kDefaultMaxLoadFactorPercent))); } - EXPECT_THAT(persistent_hash_map->num_buckets(), - Not(Eq(original_num_buckets))); + EXPECT_THAT(persistent_hash_map->num_buckets(), Ne(original_num_buckets)); // After rehashing, we should still be able to get all inserted entries. for (int i = 0; i < 100; ++i) { @@ -1009,7 +1033,7 @@ TEST_F(PersistentHashMapTest, ShouldRehash) { } } -TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) { +TEST_P(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1021,7 +1045,8 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv=*/GetParam()))); ASSERT_THAT(GetValueByKey(persistent_hash_map.get(), "default-google.com"), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -1035,7 +1060,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldPutIfKeyDoesNotExist) { IsOkAndHolds(1)); } -TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { +TEST_P(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1047,7 +1072,8 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ASSERT_THAT( persistent_hash_map->Put("default-google.com", Serialize(1).data()), @@ -1064,7 +1090,7 @@ TEST_F(PersistentHashMapTest, GetOrPutShouldGetIfKeyExists) { IsOkAndHolds(1)); } -TEST_F(PersistentHashMapTest, Delete) { +TEST_P(PersistentHashMapTest, Delete) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1076,7 +1102,8 @@ TEST_F(PersistentHashMapTest, Delete) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); // Delete a non-existing key should get NOT_FOUND error EXPECT_THAT(persistent_hash_map->Delete("default-google.com"), @@ -1115,7 +1142,7 @@ TEST_F(PersistentHashMapTest, Delete) { IsOkAndHolds(50)); } -TEST_F(PersistentHashMapTest, DeleteMultiple) { +TEST_P(PersistentHashMapTest, DeleteMultiple) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1127,7 +1154,8 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); std::unordered_map<std::string, int> existing_keys; std::unordered_set<std::string> deleted_keys; @@ -1168,7 +1196,7 @@ TEST_F(PersistentHashMapTest, DeleteMultiple) { Eq(existing_keys)); } -TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { +TEST_P(PersistentHashMapTest, DeleteBucketHeadElement) { // Create new persistent hash map // Set max_load_factor_percent as 1000. Load factor percent is calculated as // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of @@ -1184,7 +1212,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/1000, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1206,7 +1235,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketHeadElement) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { +TEST_P(PersistentHashMapTest, DeleteBucketIntermediateElement) { // Create new persistent hash map // Set max_load_factor_percent as 1000. Load factor percent is calculated as // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of @@ -1222,7 +1251,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/1000, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1243,7 +1273,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketIntermediateElement) { IsOkAndHolds(2)); } -TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { +TEST_P(PersistentHashMapTest, DeleteBucketTailElement) { // Create new persistent hash map // Set max_load_factor_percent as 1000. Load factor percent is calculated as // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of @@ -1259,7 +1289,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/1000, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1281,7 +1312,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketTailElement) { IsOkAndHolds(2)); } -TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { +TEST_P(PersistentHashMapTest, DeleteBucketOnlySingleElement) { // Create new persistent hash map // Set max_load_factor_percent as 1000. Load factor percent is calculated as // 100 * num_keys / num_buckets. Therefore, with 1 bucket (the initial # of @@ -1297,7 +1328,8 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { /*max_num_entries_in=*/Entry::kMaxNumEntries, /*max_load_factor_percent_in=*/1000, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com", Serialize(100).data())); @@ -1310,7 +1342,7 @@ TEST_F(PersistentHashMapTest, DeleteBucketOnlySingleElement) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) { +TEST_P(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1322,7 +1354,8 @@ TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/1))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com", Serialize(100).data())); @@ -1346,12 +1379,13 @@ TEST_F(PersistentHashMapTest, OperationsWhenReachingMaxNumEntries) { StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); } -TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { +TEST_P(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { // Create new persistent hash map + Options options(/*value_type_size_in=*/sizeof(int)); + options.pre_mapping_fbv = GetParam(); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, - PersistentHashMap::Create(filesystem_, working_path_, - Options(/*value_type_size_in=*/sizeof(int)))); + PersistentHashMap::Create(filesystem_, working_path_, options)); const char invalid_key[] = "a\0bc"; std::string_view invalid_key_view(invalid_key, 4); @@ -1367,7 +1401,7 @@ TEST_F(PersistentHashMapTest, ShouldFailIfKeyContainsTerminationCharacter) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(PersistentHashMapTest, EmptyHashMapIterator) { +TEST_P(PersistentHashMapTest, EmptyHashMapIterator) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1379,12 +1413,13 @@ TEST_F(PersistentHashMapTest, EmptyHashMapIterator) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); } -TEST_F(PersistentHashMapTest, Iterator) { +TEST_P(PersistentHashMapTest, Iterator) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1396,7 +1431,8 @@ TEST_F(PersistentHashMapTest, Iterator) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); std::unordered_map<std::string, int> kvps; // Insert 100 key value pairs @@ -1411,7 +1447,7 @@ TEST_F(PersistentHashMapTest, Iterator) { Eq(kvps)); } -TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { +TEST_P(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1423,7 +1459,8 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1440,7 +1477,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingFirstKeyValuePair) { Pair("default-google.com-2", 2))); } -TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { +TEST_P(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1452,7 +1489,8 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1469,7 +1507,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingIntermediateKeyValuePair) { Pair("default-google.com-2", 2))); } -TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { +TEST_P(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1481,7 +1519,8 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1498,7 +1537,7 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingLastKeyValuePair) { Pair("default-google.com-1", 1))); } -TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { +TEST_P(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { // Create new persistent hash map ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -1510,7 +1549,8 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { /*max_load_factor_percent_in=*/ Options::kDefaultMaxLoadFactorPercent, /*average_kv_byte_size_in=*/Options::kDefaultAverageKVByteSize, - /*init_num_buckets_in=*/kTestInitNumBuckets))); + /*init_num_buckets_in=*/kTestInitNumBuckets, + /*pre_mapping_fbv_in=*/GetParam()))); ICING_ASSERT_OK( persistent_hash_map->Put("default-google.com-0", Serialize(0).data())); @@ -1528,6 +1568,9 @@ TEST_F(PersistentHashMapTest, IteratorAfterDeletingAllKeyValuePairs) { EXPECT_FALSE(persistent_hash_map->GetIterator().Advance()); } +INSTANTIATE_TEST_SUITE_P(PersistentHashMapTest, PersistentHashMapTest, + testing::Values(true, false)); + } // namespace } // namespace lib diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 2dc58a2..d6e25a2 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -660,7 +660,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( IntegerIndex::Discard(*filesystem_, integer_index_dir)); ICING_ASSIGN_OR_RETURN( integer_index_, - IntegerIndex::Create(*filesystem_, std::move(integer_index_dir))); + IntegerIndex::Create(*filesystem_, std::move(integer_index_dir), + options_.pre_mapping_fbv())); // Discard qualified id join index directory and instantiate a new one. std::string qualified_id_join_index_dir = @@ -670,7 +671,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers( ICING_ASSIGN_OR_RETURN( qualified_id_join_index_, QualifiedIdTypeJoinableIndex::Create( - *filesystem_, std::move(qualified_id_join_index_dir))); + *filesystem_, std::move(qualified_id_join_index_dir), + options_.pre_mapping_fbv(), options_.use_persistent_hash_map())); std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer(); IndexRestorationResult restore_result = RestoreIndexIfNeeded(); @@ -815,7 +817,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( std::string integer_index_dir = MakeIntegerIndexWorkingPath(options_.base_dir()); InitializeStatsProto::RecoveryCause integer_index_recovery_cause; - auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir); + auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir, + options_.pre_mapping_fbv()); if (!integer_index_or.ok()) { ICING_RETURN_IF_ERROR( IntegerIndex::Discard(*filesystem_, integer_index_dir)); @@ -825,7 +828,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( // Try recreating it from scratch and re-indexing everything. ICING_ASSIGN_OR_RETURN( integer_index_, - IntegerIndex::Create(*filesystem_, std::move(integer_index_dir))); + IntegerIndex::Create(*filesystem_, std::move(integer_index_dir), + options_.pre_mapping_fbv())); } else { // Integer index was created fine. integer_index_ = std::move(integer_index_or).ValueOrDie(); @@ -840,7 +844,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()); InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause; auto qualified_id_join_index_or = QualifiedIdTypeJoinableIndex::Create( - *filesystem_, qualified_id_join_index_dir); + *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(), + options_.use_persistent_hash_map()); if (!qualified_id_join_index_or.ok()) { ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard( *filesystem_, qualified_id_join_index_dir)); @@ -851,7 +856,8 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex( ICING_ASSIGN_OR_RETURN( qualified_id_join_index_, QualifiedIdTypeJoinableIndex::Create( - *filesystem_, std::move(qualified_id_join_index_dir))); + *filesystem_, std::move(qualified_id_join_index_dir), + options_.pre_mapping_fbv(), options_.use_persistent_hash_map())); } else { // Qualified id join index was created fine. qualified_id_join_index_ = diff --git a/icing/icing-search-engine_initialization_test.cc b/icing/icing-search-engine_initialization_test.cc index 13a2dc3..74cc78f 100644 --- a/icing/icing-search-engine_initialization_test.cc +++ b/icing/icing-search-engine_initialization_test.cc @@ -3196,7 +3196,8 @@ TEST_F(IcingSearchEngineInitializationTest, Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem, GetIntegerIndexDir())); + IntegerIndex::Create(filesystem, GetIntegerIndexDir(), + /*pre_mapping_fbv=*/false)); // Add hits for document 0. ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId); integer_index->set_last_added_document_id(0); @@ -3374,7 +3375,8 @@ TEST_F(IcingSearchEngineInitializationTest, Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem, GetIntegerIndexDir())); + IntegerIndex::Create(filesystem, GetIntegerIndexDir(), + /*pre_mapping_fbv=*/false)); // Add hits for document 4. DocumentId original_last_added_doc_id = integer_index->last_added_document_id(); @@ -3570,8 +3572,9 @@ TEST_F(IcingSearchEngineInitializationTest, Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index, - QualifiedIdTypeJoinableIndex::Create(filesystem, - GetQualifiedIdJoinIndexDir())); + QualifiedIdTypeJoinableIndex::Create( + filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, + /*use_persistent_hash_map=*/false)); // Add data for document 0. ASSERT_THAT(qualified_id_join_index->last_added_document_id(), kInvalidDocumentId); @@ -3639,8 +3642,9 @@ TEST_F(IcingSearchEngineInitializationTest, Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index, - QualifiedIdTypeJoinableIndex::Create(filesystem, - GetQualifiedIdJoinIndexDir())); + QualifiedIdTypeJoinableIndex::Create( + filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, + /*use_persistent_hash_map=*/false)); EXPECT_THAT(qualified_id_join_index->Get( DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -3739,8 +3743,9 @@ TEST_F(IcingSearchEngineInitializationTest, Filesystem filesystem; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index, - QualifiedIdTypeJoinableIndex::Create(filesystem, - GetQualifiedIdJoinIndexDir())); + QualifiedIdTypeJoinableIndex::Create( + filesystem, GetQualifiedIdJoinIndexDir(), /*pre_mapping_fbv=*/false, + /*use_persistent_hash_map=*/false)); // Add data for document 4. DocumentId original_last_added_doc_id = qualified_id_join_index->last_added_document_id(); @@ -5137,12 +5142,14 @@ TEST_P(IcingSearchEngineInitializationVersionChangeTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(*filesystem(), GetIntegerIndexDir())); + IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(), + /*pre_mapping_fbv=*/false)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index, - QualifiedIdTypeJoinableIndex::Create(*filesystem(), - GetQualifiedIdJoinIndexDir())); + QualifiedIdTypeJoinableIndex::Create( + *filesystem(), GetQualifiedIdJoinIndexDir(), + /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<StringSectionIndexingHandler> diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc index 1cbe00d..b6d3c29 100644 --- a/icing/index/index-processor_benchmark.cc +++ b/icing/index/index-processor_benchmark.cc @@ -226,7 +226,8 @@ void BM_IndexDocumentWithOneProperty(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - IntegerIndex::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir, + /*pre_mapping_fbv=*/true)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -300,7 +301,8 @@ void BM_IndexDocumentWithTenProperties(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - IntegerIndex::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir, + /*pre_mapping_fbv=*/true)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -375,7 +377,8 @@ void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - IntegerIndex::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir, + /*pre_mapping_fbv=*/true)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); @@ -450,7 +453,8 @@ void BM_IndexDocumentWithHiragana(benchmark::State& state) { CreateIndex(icing_filesystem, filesystem, index_dir); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumericIndex<int64_t>> integer_index, - IntegerIndex::Create(filesystem, integer_index_dir)); + IntegerIndex::Create(filesystem, integer_index_dir, + /*pre_mapping_fbv=*/true)); language_segmenter_factory::SegmenterOptions options(ULOC_US); std::unique_ptr<LanguageSegmenter> language_segmenter = language_segmenter_factory::Create(std::move(options)).ValueOrDie(); diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index ed9e856..581645a 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -172,11 +172,14 @@ class IndexProcessorTest : public Test { index_, Index::Create(options, &filesystem_, &icing_filesystem_)); ICING_ASSERT_OK_AND_ASSIGN( - integer_index_, IntegerIndex::Create(filesystem_, integer_index_dir_)); + integer_index_, IntegerIndex::Create(filesystem_, integer_index_dir_, + /*pre_mapping_fbv=*/false)); - ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_, - QualifiedIdTypeJoinableIndex::Create( - filesystem_, qualified_id_join_index_dir_)); + ICING_ASSERT_OK_AND_ASSIGN( + qualified_id_join_index_, + QualifiedIdTypeJoinableIndex::Create( + filesystem_, qualified_id_join_index_dir_, + /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc index 706856c..389caef 100644 --- a/icing/index/integer-section-indexing-handler_test.cc +++ b/icing/index/integer-section-indexing-handler_test.cc @@ -105,7 +105,8 @@ class IntegerSectionIndexingHandlerTest : public ::testing::Test { ICING_ASSERT_OK_AND_ASSIGN( integer_index_, - IntegerIndex::Create(filesystem_, integer_index_working_path_)); + IntegerIndex::Create(filesystem_, integer_index_working_path_, + /*pre_mapping_fbv=*/false)); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc index 5165040..fa62b19 100644 --- a/icing/index/numeric/integer-index-storage.cc +++ b/icing/index/numeric/integer-index-storage.cc @@ -731,7 +731,8 @@ IntegerIndexStorage::InitializeNewFiles( FileBackedVector<Bucket>::Create( filesystem, GetSortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size)); + FileBackedVector<Bucket>::kMaxFileSize, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize unsorted_buckets pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold; @@ -740,7 +741,8 @@ IntegerIndexStorage::InitializeNewFiles( FileBackedVector<Bucket>::Create( filesystem, GetUnsortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size)); + FileBackedVector<Bucket>::kMaxFileSize, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize flash_index_storage ICING_ASSIGN_OR_RETURN( @@ -834,7 +836,8 @@ IntegerIndexStorage::InitializeExistingFiles( FileBackedVector<Bucket>::Create( filesystem, GetSortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size)); + FileBackedVector<Bucket>::kMaxFileSize, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize unsorted_buckets pre_mapping_mmap_size = sizeof(Bucket) * kUnsortedBucketsLengthThreshold; @@ -843,7 +846,8 @@ IntegerIndexStorage::InitializeExistingFiles( FileBackedVector<Bucket>::Create( filesystem, GetUnsortedBucketsFilePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, - FileBackedVector<Bucket>::kMaxFileSize, pre_mapping_mmap_size)); + FileBackedVector<Bucket>::kMaxFileSize, + options.pre_mapping_fbv ? pre_mapping_mmap_size : 0)); // Initialize flash_index_storage ICING_ASSIGN_OR_RETURN( diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h index ddd9231..9f2e58c 100644 --- a/icing/index/numeric/integer-index-storage.h +++ b/icing/index/numeric/integer-index-storage.h @@ -146,13 +146,16 @@ class IntegerIndexStorage : public PersistentStorage { "Max # of buckets cannot fit into FileBackedVector"); struct Options { - explicit Options() {} + explicit Options(bool pre_mapping_fbv_in) + : pre_mapping_fbv(pre_mapping_fbv_in) {} explicit Options(std::vector<Bucket> custom_init_sorted_buckets_in, - std::vector<Bucket> custom_init_unsorted_buckets_in) + std::vector<Bucket> custom_init_unsorted_buckets_in, + bool pre_mapping_fbv_in) : custom_init_sorted_buckets(std::move(custom_init_sorted_buckets_in)), custom_init_unsorted_buckets( - std::move(custom_init_unsorted_buckets_in)) {} + std::move(custom_init_unsorted_buckets_in)), + pre_mapping_fbv(pre_mapping_fbv_in) {} bool IsValid() const; @@ -168,6 +171,10 @@ class IntegerIndexStorage : public PersistentStorage { // should be [INT64_MIN, INT64_MAX]. std::vector<Bucket> custom_init_sorted_buckets; std::vector<Bucket> custom_init_unsorted_buckets; + + // Flag indicating whether memory map max possible file size for underlying + // FileBackedVector before growing the actual file size. + bool pre_mapping_fbv; }; // Metadata file layout: <Crcs><Info> diff --git a/icing/index/numeric/integer-index-storage_benchmark.cc b/icing/index/numeric/integer-index-storage_benchmark.cc index 27f35d9..bf5f134 100644 --- a/icing/index/numeric/integer-index-storage_benchmark.cc +++ b/icing/index/numeric/integer-index-storage_benchmark.cc @@ -68,6 +68,8 @@ using ::testing::Eq; using ::testing::IsEmpty; using ::testing::SizeIs; +static constexpr bool kPreMappingFbv = true; + static constexpr SectionId kDefaultSectionId = 12; static constexpr int kDefaultSeed = 12345; @@ -151,7 +153,7 @@ void BM_Index(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage, IntegerIndexStorage::Create( benchmark.filesystem, benchmark.working_path, - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(kPreMappingFbv), &benchmark.posting_list_serializer)); state.ResumeTiming(); @@ -211,7 +213,7 @@ void BM_BatchIndex(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage, IntegerIndexStorage::Create( benchmark.filesystem, benchmark.working_path, - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(kPreMappingFbv), &benchmark.posting_list_serializer)); std::vector<int64_t> keys_copy(keys); state.ResumeTiming(); @@ -262,7 +264,7 @@ void BM_ExactQuery(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path, - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(kPreMappingFbv), &benchmark.posting_list_serializer)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumberGenerator<int64_t>> generator, @@ -339,7 +341,7 @@ void BM_RangeQueryAll(benchmark::State& state) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, IntegerIndexStorage::Create(benchmark.filesystem, benchmark.working_path, - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(kPreMappingFbv), &benchmark.posting_list_serializer)); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<NumberGenerator<int64_t>> generator, diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc index ed7d5db..4d4e665 100644 --- a/icing/index/numeric/integer-index-storage_test.cc +++ b/icing/index/numeric/integer-index-storage_test.cc @@ -71,7 +71,7 @@ static constexpr int32_t kCorruptedValueOffset = 3; static constexpr DocumentId kDefaultDocumentId = 123; static constexpr SectionId kDefaultSectionId = 31; -class IntegerIndexStorageTest : public ::testing::Test { +class IntegerIndexStorageTest : public ::testing::TestWithParam<bool> { protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/icing"; @@ -105,17 +105,18 @@ libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query( return hits; } -TEST_F(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) { - EXPECT_THAT(Options().IsValid(), IsTrue()); +TEST_P(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) { + EXPECT_THAT(Options(/*pre_mapping_fbv_in=*/GetParam()).IsValid(), IsTrue()); } -TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) { +TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) { // Invalid custom init sorted bucket EXPECT_THAT( Options(/*custom_init_sorted_buckets_in=*/ {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)}, /*custom_init_unsorted_buckets_in=*/ - {Bucket(10, std::numeric_limits<int64_t>::max())}) + {Bucket(10, std::numeric_limits<int64_t>::max())}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -124,12 +125,13 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) { Options(/*custom_init_sorted_buckets_in=*/ {Bucket(10, std::numeric_limits<int64_t>::max())}, /*custom_init_unsorted_buckets_in=*/ - {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)}) + {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsPostingListIdentifier) { // Custom init buckets should contain invalid posting list identifier. PostingListIdentifier valid_posting_list_identifier(0, 0, 0); @@ -140,7 +142,8 @@ TEST_F(IntegerIndexStorageTest, {Bucket(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max(), valid_posting_list_identifier)}, - /*custom_init_unsorted_buckets_in=*/{}) + /*custom_init_unsorted_buckets_in=*/{}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -149,17 +152,19 @@ TEST_F(IntegerIndexStorageTest, /*custom_init_unsorted_buckets_in=*/ {Bucket(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max(), - valid_posting_list_identifier)}) + valid_posting_list_identifier)}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); } -TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) { +TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) { // sorted buckets overlap EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/ {Bucket(std::numeric_limits<int64_t>::min(), -100), Bucket(-100, std::numeric_limits<int64_t>::max())}, - /*custom_init_unsorted_buckets_in=*/{}) + /*custom_init_unsorted_buckets_in=*/{}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -167,7 +172,8 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) { EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{}, /*custom_init_unsorted_buckets_in=*/ {Bucket(-100, std::numeric_limits<int64_t>::max()), - Bucket(std::numeric_limits<int64_t>::min(), -100)}) + Bucket(std::numeric_limits<int64_t>::min(), -100)}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -177,18 +183,19 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) { Bucket(-99, 0)}, /*custom_init_unsorted_buckets_in=*/ {Bucket(200, std::numeric_limits<int64_t>::max()), - Bucket(0, 50), Bucket(51, 199)}) + Bucket(0, 50), Bucket(51, 199)}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); } -TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) { +TEST_P(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) { // Missing INT64_MAX EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/ {Bucket(std::numeric_limits<int64_t>::min(), -100), Bucket(-99, 0)}, /*custom_init_unsorted_buckets_in=*/ - {Bucket(1, 1000)}) + {Bucket(1, 1000)}, /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -196,7 +203,8 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) { EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/ {Bucket(-200, -100), Bucket(-99, 0)}, /*custom_init_unsorted_buckets_in=*/ - {Bucket(1, std::numeric_limits<int64_t>::max())}) + {Bucket(1, std::numeric_limits<int64_t>::max())}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); @@ -204,24 +212,27 @@ TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) { EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/ {Bucket(std::numeric_limits<int64_t>::min(), -100)}, /*custom_init_unsorted_buckets_in=*/ - {Bucket(1, std::numeric_limits<int64_t>::max())}) + {Bucket(1, std::numeric_limits<int64_t>::max())}, + /*pre_mapping_fbv_in=*/GetParam()) .IsValid(), IsFalse()); } -TEST_F(IntegerIndexStorageTest, InvalidWorkingPath) { - EXPECT_THAT(IntegerIndexStorage::Create( - filesystem_, "/dev/null/integer_index_storage_test", - Options(), serializer_.get()), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); +TEST_P(IntegerIndexStorageTest, InvalidWorkingPath) { + EXPECT_THAT( + IntegerIndexStorage::Create( + filesystem_, "/dev/null/integer_index_storage_test", + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) { +TEST_P(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) { Options invalid_options( /*custom_init_sorted_buckets_in=*/{}, - /*custom_init_unsorted_buckets_in=*/{ - Bucket(-100, std::numeric_limits<int64_t>::max()), - Bucket(std::numeric_limits<int64_t>::min(), -100)}); + /*custom_init_unsorted_buckets_in=*/ + {Bucket(-100, std::numeric_limits<int64_t>::max()), + Bucket(std::numeric_limits<int64_t>::min(), -100)}, + /*pre_mapping_fbv_in=*/GetParam()); ASSERT_THAT(invalid_options.IsValid(), IsFalse()); EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_, @@ -229,13 +240,14 @@ TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(IntegerIndexStorageTest, InitializeNewFiles) { +TEST_P(IntegerIndexStorageTest, InitializeNewFiles) { { // Create new integer index storage ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->PersistToDisk()); @@ -273,12 +285,13 @@ TEST_F(IntegerIndexStorageTest, InitializeNewFiles) { .Get())); } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, InitializationShouldFailWithoutPersistToDiskOrDestruction) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Insert some data. @@ -291,16 +304,19 @@ TEST_F(IntegerIndexStorageTest, // Without calling PersistToDisk, checksums will not be recomputed or synced // to disk, so initializing another instance on the same files should fail. - EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, working_path_, Options(), - serializer_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT( + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get()), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } -TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { +TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage1, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Insert some data. @@ -323,7 +339,8 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage2, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); EXPECT_THAT( Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(), @@ -332,13 +349,14 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) { ElementsAreArray(doc_hit_info_vec.begin(), doc_hit_info_vec.end()))); } -TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { +TEST_P(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { std::vector<DocHitInfo> doc_hit_info_vec; { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Insert some data. @@ -362,7 +380,8 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { // we should be able to get the same contents. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); EXPECT_THAT( Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(), @@ -372,13 +391,14 @@ TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) { } } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -406,8 +426,9 @@ TEST_F(IntegerIndexStorageTest, // Attempt to create the integer index storage with metadata containing // corrupted all_crc. This should fail. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>> - storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, - Options(), serializer_.get()); + storage_or = IntegerIndexStorage::Create( + filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), @@ -415,13 +436,14 @@ TEST_F(IntegerIndexStorageTest, } } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -450,8 +472,9 @@ TEST_F(IntegerIndexStorageTest, // Attempt to create the integer index storage with info that doesn't match // its checksum and confirm that it fails. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>> - storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, - Options(), serializer_.get()); + storage_or = IntegerIndexStorage::Create( + filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), @@ -459,13 +482,14 @@ TEST_F(IntegerIndexStorageTest, } } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, InitializeExistingFilesWithCorruptedSortedBucketsShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -496,8 +520,9 @@ TEST_F(IntegerIndexStorageTest, // Attempt to create the integer index storage with metadata containing // corrupted sorted_buckets_crc. This should fail. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>> - storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, - Options(), serializer_.get()); + storage_or = IntegerIndexStorage::Create( + filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), @@ -505,13 +530,14 @@ TEST_F(IntegerIndexStorageTest, } } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, InitializeExistingFilesWithCorruptedUnsortedBucketsShouldFail) { { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId, /*new_keys=*/{0, 100, -100})); @@ -544,8 +570,9 @@ TEST_F(IntegerIndexStorageTest, // Attempt to create the integer index storage with metadata containing // corrupted unsorted_buckets_crc. This should fail. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>> - storage_or = IntegerIndexStorage::Create(filesystem_, working_path_, - Options(), serializer_.get()); + storage_or = IntegerIndexStorage::Create( + filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get()); EXPECT_THAT(storage_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(storage_or.status().error_message(), @@ -555,18 +582,19 @@ TEST_F(IntegerIndexStorageTest, // TODO(b/259744228): add test for corrupted flash_index_storage -TEST_F(IntegerIndexStorageTest, InvalidQuery) { +TEST_P(IntegerIndexStorageTest, InvalidQuery) { // Create new integer index storage ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); EXPECT_THAT( storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) { +TEST_P(IntegerIndexStorageTest, ExactQuerySortedBuckets) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -580,7 +608,8 @@ TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into sorted buckets [(-1000,-100), (200,300)]. @@ -620,7 +649,7 @@ TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) { EqualsDocHitInfo(/*document_id=*/4, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) { +TEST_P(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -634,7 +663,8 @@ TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)]. @@ -680,7 +710,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) { EqualsDocHitInfo(/*document_id=*/4, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) { +TEST_P(IntegerIndexStorageTest, ExactQueryIdenticalKeys) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -694,7 +724,8 @@ TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into buckets [(0,100), (1000,INT64_MAX)]. @@ -724,7 +755,7 @@ TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) { EqualsDocHitInfo(/*document_id=*/2, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) { +TEST_P(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) { std::vector<Bucket> custom_init_sorted_buckets = { Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300), Bucket(301, 999)}; @@ -736,7 +767,8 @@ TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); EXPECT_THAT( @@ -745,7 +777,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) { +TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -759,7 +791,8 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into sorted buckets [(-1000,-100), (200,300)]. @@ -799,7 +832,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) { +TEST_P(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -813,7 +846,8 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)]. @@ -856,7 +890,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) { EqualsDocHitInfo(/*document_id=*/2, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) { +TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -870,7 +904,8 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into sorted bucket (0,100). @@ -907,7 +942,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) { +TEST_P(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -921,7 +956,8 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into unsorted buckets (-99,-1). @@ -958,7 +994,7 @@ TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) { +TEST_P(IntegerIndexStorageTest, RangeQueryMultipleBuckets) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -972,7 +1008,8 @@ TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys into buckets [(-1000,-100), (200,300), (1000,INT64_MAX), @@ -1046,7 +1083,7 @@ TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) { EqualsDocHitInfo(/*document_id=*/0, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, BatchAdd) { +TEST_P(IntegerIndexStorageTest, BatchAdd) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -1060,7 +1097,8 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Batch add the following keys (including some edge cases) to test the @@ -1085,10 +1123,11 @@ TEST_F(IntegerIndexStorageTest, BatchAdd) { } } -TEST_F(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) { +TEST_P(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); std::vector<int64_t> keys = {2, 3, 1, 2, 4, -1, -1, 100, 3}; @@ -1098,7 +1137,7 @@ TEST_F(IntegerIndexStorageTest, BatchAddShouldDedupeKeys) { EXPECT_THAT(storage->num_data(), Eq(6)); } -TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) { +TEST_P(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -1112,7 +1151,8 @@ TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys with same document id and section id. @@ -1132,7 +1172,7 @@ TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) { ElementsAre(EqualsDocHitInfo(/*document_id=*/0, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, +TEST_P(IntegerIndexStorageTest, MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. @@ -1147,7 +1187,8 @@ TEST_F(IntegerIndexStorageTest, IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Add some keys with same document id but different section ids. @@ -1194,10 +1235,11 @@ TEST_F(IntegerIndexStorageTest, EqualsDocHitInfo(kDefaultDocumentId, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, SplitBuckets) { +TEST_P(IntegerIndexStorageTest, SplitBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); uint32_t block_size = FlashIndexStorage::SelectBlockSize(); @@ -1266,10 +1308,11 @@ TEST_F(IntegerIndexStorageTest, SplitBuckets) { } } -TEST_F(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) { +TEST_P(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); uint32_t block_size = FlashIndexStorage::SelectBlockSize(); @@ -1338,7 +1381,7 @@ TEST_F(IntegerIndexStorageTest, SplitBucketsTriggerSortBuckets) { } } -TEST_F(IntegerIndexStorageTest, TransferIndex) { +TEST_P(IntegerIndexStorageTest, TransferIndex) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -1352,7 +1395,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndex) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId, @@ -1390,7 +1434,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndex) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT( storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1401,7 +1446,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndex) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); std::vector<SectionId> expected_sections = {kDefaultSectionId}; EXPECT_THAT(new_storage->num_data(), Eq(7)); @@ -1444,10 +1490,11 @@ TEST_F(IntegerIndexStorageTest, TransferIndex) { EqualsDocHitInfo(/*document_id=*/4, expected_sections)))); } -TEST_F(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) { +TEST_P(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem_, working_path_, Options(), + IntegerIndexStorage::Create(filesystem_, working_path_, + Options(/*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId, @@ -1464,7 +1511,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1479,7 +1527,7 @@ TEST_F(IntegerIndexStorageTest, TransferIndexOutOfRangeDocumentId) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, TransferEmptyIndex) { +TEST_P(IntegerIndexStorageTest, TransferEmptyIndex) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -1493,7 +1541,8 @@ TEST_F(IntegerIndexStorageTest, TransferEmptyIndex) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ASSERT_THAT(storage->num_data(), Eq(0)); @@ -1504,7 +1553,8 @@ TEST_F(IntegerIndexStorageTest, TransferEmptyIndex) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT(storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1516,7 +1566,7 @@ TEST_F(IntegerIndexStorageTest, TransferEmptyIndex) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) { +TEST_P(IntegerIndexStorageTest, TransferIndexDeleteAll) { // We use predefined custom buckets to initialize new integer index storage // and create some test keys accordingly. std::vector<Bucket> custom_init_sorted_buckets = { @@ -1530,7 +1580,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, kDefaultSectionId, @@ -1555,7 +1606,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT( storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1566,7 +1618,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, working_path_ + "_temp", - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); std::vector<SectionId> expected_sections = {kDefaultSectionId}; EXPECT_THAT(new_storage->num_data(), Eq(0)); @@ -1576,7 +1629,7 @@ TEST_F(IntegerIndexStorageTest, TransferIndexDeleteAll) { IsOkAndHolds(IsEmpty())); } -TEST_F(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) { +TEST_P(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) { // This test verifies that if TransferIndex invokes bucket merging logic to // ensure sure we're able to avoid having mostly empty buckets after inserting // and deleting data for many rounds. @@ -1594,7 +1647,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, kDefaultSectionId, @@ -1630,7 +1684,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, new_storage_working_path, - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT( storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1651,7 +1706,7 @@ TEST_F(IntegerIndexStorageTest, TransferIndexShouldInvokeMergeBuckets) { EXPECT_THAT(bk1->key_upper(), Eq(std::numeric_limits<int64_t>::max())); } -TEST_F(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) { +TEST_P(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) { // This test verifies that if TransferIndex invokes bucket merging logic and // doesn't merge buckets too aggressively to ensure we won't get a bucket with // too many data. @@ -1669,7 +1724,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) { IntegerIndexStorage::Create( filesystem_, working_path_, Options(std::move(custom_init_sorted_buckets), - std::move(custom_init_unsorted_buckets)), + std::move(custom_init_unsorted_buckets), + /*pre_mapping_fbv_in=*/GetParam()), serializer_.get())); // Insert data into 2 buckets so that total # of these 2 buckets exceed @@ -1705,7 +1761,8 @@ TEST_F(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create(filesystem_, new_storage_working_path, - Options(), serializer_.get())); + Options(/*pre_mapping_fbv_in=*/GetParam()), + serializer_.get())); EXPECT_THAT( storage->TransferIndex(document_id_old_to_new, new_storage.get()), IsOk()); @@ -1729,6 +1786,9 @@ TEST_F(IntegerIndexStorageTest, TransferIndexExceedsMergeThreshold) { EXPECT_THAT(bk2->key_upper(), Eq(std::numeric_limits<int64_t>::max())); } +INSTANTIATE_TEST_SUITE_P(IntegerIndexStorageTest, IntegerIndexStorageTest, + testing::Values(true, false)); + } // namespace } // namespace lib diff --git a/icing/index/numeric/integer-index.cc b/icing/index/numeric/integer-index.cc index e7978bf..de00b85 100644 --- a/icing/index/numeric/integer-index.cc +++ b/icing/index/numeric/integer-index.cc @@ -90,7 +90,8 @@ GetAllExistingPropertyPaths(const Filesystem& filesystem, libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType> GetPropertyIntegerIndexStorageMap( const Filesystem& filesystem, const std::string& working_path, - PostingListIntegerIndexSerializer* posting_list_serializer) { + PostingListIntegerIndexSerializer* posting_list_serializer, + bool pre_mapping_fbv) { ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths, GetAllExistingPropertyPaths(filesystem, working_path)); @@ -101,11 +102,11 @@ GetPropertyIntegerIndexStorageMap( } std::string storage_working_path = GetPropertyIndexStoragePath(working_path, property_path); - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create(filesystem, storage_working_path, - IntegerIndexStorage::Options(), - posting_list_serializer)); + ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerIndexStorage> storage, + IntegerIndexStorage::Create( + filesystem, storage_working_path, + IntegerIndexStorage::Options(pre_mapping_fbv), + posting_list_serializer)); property_to_storage_map.insert( std::make_pair(property_path, std::move(storage))); } @@ -160,7 +161,7 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && { integer_index_.filesystem_, GetPropertyIndexStoragePath(integer_index_.working_path_, kWildcardPropertyIndexFileName), - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(pre_mapping_fbv_), integer_index_.posting_list_serializer_.get())); } ICING_RETURN_IF_ERROR( @@ -174,7 +175,7 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && { integer_index_.filesystem_, GetPropertyIndexStoragePath(integer_index_.working_path_, property_path_), - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(pre_mapping_fbv_), integer_index_.posting_list_serializer_.get())); target_storage = new_storage.get(); integer_index_.property_to_storage_map_.insert( @@ -186,15 +187,18 @@ libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && { } /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> -IntegerIndex::Create(const Filesystem& filesystem, std::string working_path) { +IntegerIndex::Create(const Filesystem& filesystem, std::string working_path, + bool pre_mapping_fbv) { if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) { // Discard working_path if metadata file is missing, and reinitialize. if (filesystem.DirectoryExists(working_path.c_str())) { ICING_RETURN_IF_ERROR(Discard(filesystem, working_path)); } - return InitializeNewFiles(filesystem, std::move(working_path)); + return InitializeNewFiles(filesystem, std::move(working_path), + pre_mapping_fbv); } - return InitializeExistingFiles(filesystem, std::move(working_path)); + return InitializeExistingFiles(filesystem, std::move(working_path), + pre_mapping_fbv); } IntegerIndex::~IntegerIndex() { @@ -265,8 +269,9 @@ libtextclassifier3::Status IntegerIndex::Optimize( // Transfer all indexed data from current integer index to new integer // index. Also PersistToDisk and destruct the instance after finishing, so // we can safely swap directories later. - ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerIndex> new_integer_index, - Create(filesystem_, temp_working_path_ddir.dir())); + ICING_ASSIGN_OR_RETURN( + std::unique_ptr<IntegerIndex> new_integer_index, + Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_)); ICING_RETURN_IF_ERROR( TransferIndex(document_id_old_to_new, new_integer_index.get())); new_integer_index->set_last_added_document_id(new_last_added_document_id); @@ -316,14 +321,15 @@ libtextclassifier3::Status IntegerIndex::Optimize( filesystem_, GetPropertyIndexStoragePath(working_path_, kWildcardPropertyIndexFileName), - IntegerIndexStorage::Options(), posting_list_serializer_.get())); + IntegerIndexStorage::Options(pre_mapping_fbv_), + posting_list_serializer_.get())); } // Initialize all existing integer index storages. - ICING_ASSIGN_OR_RETURN( - property_to_storage_map_, - GetPropertyIntegerIndexStorageMap(filesystem_, working_path_, - posting_list_serializer_.get())); + ICING_ASSIGN_OR_RETURN(property_to_storage_map_, + GetPropertyIntegerIndexStorageMap( + filesystem_, working_path_, + posting_list_serializer_.get(), pre_mapping_fbv_)); return libtextclassifier3::Status::OK; } @@ -359,7 +365,8 @@ libtextclassifier3::Status IntegerIndex::Clear() { /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> IntegerIndex::InitializeNewFiles(const Filesystem& filesystem, - std::string&& working_path) { + std::string&& working_path, + bool pre_mapping_fbv) { // Create working directory. if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) { return absl_ports::InternalError( @@ -390,7 +397,8 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem, std::make_unique<PostingListIntegerIndexSerializer>(), std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)), /*property_to_storage_map=*/{}, std::move(wildcard_property_storage), - /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr)); + /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr, + pre_mapping_fbv)); // Initialize info content by writing mapped memory directly. Info& info_ref = new_integer_index->info(); @@ -405,7 +413,8 @@ IntegerIndex::InitializeNewFiles(const Filesystem& filesystem, /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, - std::string&& working_path) { + std::string&& working_path, + bool pre_mapping_fbv) { // Mmap the content of the crcs and info. ICING_ASSIGN_OR_RETURN( MemoryMappedFile metadata_mmapped_file, @@ -422,10 +431,10 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, std::make_unique<PostingListIntegerIndexSerializer>(); // Initialize all existing integer index storages. - ICING_ASSIGN_OR_RETURN( - PropertyToStorageMapType property_to_storage_map, - GetPropertyIntegerIndexStorageMap(filesystem, working_path, - posting_list_serializer.get())); + ICING_ASSIGN_OR_RETURN(PropertyToStorageMapType property_to_storage_map, + GetPropertyIntegerIndexStorageMap( + filesystem, working_path, + posting_list_serializer.get(), pre_mapping_fbv)); std::string wildcard_property_path = GetWildcardPropertyStorageFilePath(working_path); @@ -445,7 +454,8 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, filesystem, GetPropertyIndexStoragePath(working_path, kWildcardPropertyIndexFileName), - IntegerIndexStorage::Options(), posting_list_serializer.get())); + IntegerIndexStorage::Options(pre_mapping_fbv), + posting_list_serializer.get())); } // Create instance. @@ -453,7 +463,8 @@ IntegerIndex::InitializeExistingFiles(const Filesystem& filesystem, filesystem, std::move(working_path), std::move(posting_list_serializer), std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)), std::move(property_to_storage_map), std::move(wildcard_property_storage), - std::move(wildcard_properties_set), std::move(wildcard_index_storage))); + std::move(wildcard_properties_set), std::move(wildcard_index_storage), + pre_mapping_fbv)); // Initialize existing PersistentStorage. Checksums will be validated. ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage()); @@ -476,7 +487,7 @@ IntegerIndex::TransferIntegerIndexStorage( std::unique_ptr<IntegerIndexStorage> new_storage, IntegerIndexStorage::Create( new_integer_index->filesystem_, new_storage_working_path, - IntegerIndexStorage::Options(), + IntegerIndexStorage::Options(pre_mapping_fbv_), new_integer_index->posting_list_serializer_.get())); ICING_RETURN_IF_ERROR( diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h index 303bb41..23b463f 100644 --- a/icing/index/numeric/integer-index.h +++ b/icing/index/numeric/integer-index.h @@ -90,6 +90,9 @@ class IntegerIndex : public NumericIndex<int64_t> { // related files will be stored under this directory. See // PersistentStorage for more details about the concept of // working_path. + // pre_mapping_fbv: flag indicating whether memory map max possible file size + // for underlying FileBackedVector before growing the actual + // file size. // // Returns: // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored @@ -97,7 +100,8 @@ class IntegerIndex : public NumericIndex<int64_t> { // - INTERNAL_ERROR on I/O errors. // - Any FileBackedVector/MemoryMappedFile errors. static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create( - const Filesystem& filesystem, std::string working_path); + const Filesystem& filesystem, std::string working_path, + bool pre_mapping_fbv); // Deletes IntegerIndex under working_path. // @@ -118,7 +122,7 @@ class IntegerIndex : public NumericIndex<int64_t> { std::string_view property_path, DocumentId document_id, SectionId section_id) override { return std::make_unique<Editor>(property_path, document_id, section_id, - *this); + *this, pre_mapping_fbv_); } // Returns a DocHitInfoIterator for iterating through all docs which have the @@ -184,9 +188,11 @@ class IntegerIndex : public NumericIndex<int64_t> { class Editor : public NumericIndex<int64_t>::Editor { public: explicit Editor(std::string_view property_path, DocumentId document_id, - SectionId section_id, IntegerIndex& integer_index) + SectionId section_id, IntegerIndex& integer_index, + bool pre_mapping_fbv) : NumericIndex<int64_t>::Editor(property_path, document_id, section_id), - integer_index_(integer_index) {} + integer_index_(integer_index), + pre_mapping_fbv_(pre_mapping_fbv) {} ~Editor() override = default; @@ -204,6 +210,10 @@ class IntegerIndex : public NumericIndex<int64_t> { std::vector<int64_t> seen_keys_; IntegerIndex& integer_index_; // Does not own. + + // Flag indicating whether memory map max possible file size for underlying + // FileBackedVector before growing the actual file size. + bool pre_mapping_fbv_; }; explicit IntegerIndex( @@ -215,7 +225,8 @@ class IntegerIndex : public NumericIndex<int64_t> { std::unique_ptr<FileBackedProto<WildcardPropertyStorage>> wildcard_property_storage, std::unordered_set<std::string> wildcard_properties_set, - std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage) + std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage, + bool pre_mapping_fbv) : NumericIndex<int64_t>(filesystem, std::move(working_path), kWorkingPathType), posting_list_serializer_(std::move(posting_list_serializer)), @@ -223,14 +234,16 @@ class IntegerIndex : public NumericIndex<int64_t> { property_to_storage_map_(std::move(property_to_storage_map)), wildcard_property_storage_(std::move(wildcard_property_storage)), wildcard_properties_set_(std::move(wildcard_properties_set)), - wildcard_index_storage_(std::move(wildcard_index_storage)) {} + wildcard_index_storage_(std::move(wildcard_index_storage)), + pre_mapping_fbv_(pre_mapping_fbv) {} static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> - InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path); + InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path, + bool pre_mapping_fbv); static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> InitializeExistingFiles(const Filesystem& filesystem, - std::string&& working_path); + std::string&& working_path, bool pre_mapping_fbv); // Adds the property path to the list of properties using wildcard storage. // This will both update the in-memory list (wildcard_properties_set_) and @@ -346,6 +359,10 @@ class IntegerIndex : public NumericIndex<int64_t> { // The index storage that is used once we have already created // kMaxPropertyStorages in property_to_storage_map. std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_; + + // Flag indicating whether memory map max possible file size for underlying + // FileBackedVector before growing the actual file size. + bool pre_mapping_fbv_; }; } // namespace lib diff --git a/icing/index/numeric/integer-index_test.cc b/icing/index/numeric/integer-index_test.cc index 92433e1..d3901cc 100644 --- a/icing/index/numeric/integer-index_test.cc +++ b/icing/index/numeric/integer-index_test.cc @@ -114,7 +114,8 @@ class NumericIndexIntegerTest : public ::testing::Test { template <> libtextclassifier3::StatusOr<std::unique_ptr<NumericIndex<int64_t>>> CreateIntegerIndex<IntegerIndex>() { - return IntegerIndex::Create(filesystem_, working_path_); + return IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/false); } template <typename NotIntegerIndexType> @@ -1125,19 +1126,22 @@ TYPED_TEST(NumericIndexIntegerTest, Clear) { } // Tests for persistent integer index only -class IntegerIndexTest : public NumericIndexIntegerTest<IntegerIndex> {}; +class IntegerIndexTest : public NumericIndexIntegerTest<IntegerIndex>, + public ::testing::WithParamInterface<bool> {}; -TEST_F(IntegerIndexTest, InvalidWorkingPath) { - EXPECT_THAT(IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test"), +TEST_P(IntegerIndexTest, InvalidWorkingPath) { + EXPECT_THAT(IntegerIndex::Create(filesystem_, "/dev/null/integer_index_test", + /*pre_mapping_fbv=*/GetParam()), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(IntegerIndexTest, InitializeNewFiles) { +TEST_P(IntegerIndexTest, InitializeNewFiles) { { ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); ICING_ASSERT_OK(integer_index->PersistToDisk()); } @@ -1173,10 +1177,12 @@ TEST_F(IntegerIndexTest, InitializeNewFiles) { .Get())); } -TEST_F(IntegerIndexTest, +TEST_P(IntegerIndexTest, InitializationShouldFailWithoutPersistToDiskOrDestruction) { - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0, @@ -1188,13 +1194,16 @@ TEST_F(IntegerIndexTest, // Without calling PersistToDisk, checksums will not be recomputed or synced // to disk, so initializing another instance on the same files should fail. - EXPECT_THAT(IntegerIndex::Create(filesystem_, working_path_), + EXPECT_THAT(IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam()), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); } -TEST_F(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) { - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index1, - IntegerIndex::Create(filesystem_, working_path_)); +TEST_P(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index1, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index1.get(), kDefaultTestPropertyPath, /*document_id=*/0, @@ -1215,8 +1224,10 @@ TEST_F(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) { // should succeed, and we should be able to get the same contents. ICING_EXPECT_OK(integer_index1->PersistToDisk()); - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index2, - IntegerIndex::Create(filesystem_, working_path_)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index2, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); EXPECT_THAT(integer_index2->last_added_document_id(), Eq(2)); EXPECT_THAT(Query(integer_index2.get(), kDefaultTestPropertyPath, /*key_lower=*/std::numeric_limits<int64_t>::min(), @@ -1225,12 +1236,13 @@ TEST_F(IntegerIndexTest, InitializationShouldSucceedWithPersistToDisk) { doc_hit_info_vec.end()))); } -TEST_F(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) { +TEST_P(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) { std::vector<DocHitInfo> doc_hit_info_vec; { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0, @@ -1254,7 +1266,8 @@ TEST_F(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) { // we should be able to get the same contents. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); EXPECT_THAT(integer_index->last_added_document_id(), Eq(2)); EXPECT_THAT(Query(integer_index.get(), kDefaultTestPropertyPath, /*key_lower=*/std::numeric_limits<int64_t>::min(), @@ -1264,11 +1277,12 @@ TEST_F(IntegerIndexTest, InitializationShouldSucceedAfterDestruction) { } } -TEST_F(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { +TEST_P(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0, /*section_id=*/20, /*keys=*/{0, 100, -100}); @@ -1300,7 +1314,8 @@ TEST_F(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { // Attempt to create the integer index with metadata containing corrupted // all_crc. This should fail. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> - integer_index_or = IntegerIndex::Create(filesystem_, working_path_); + integer_index_or = IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam()); EXPECT_THAT(integer_index_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(integer_index_or.status().error_message(), @@ -1308,11 +1323,12 @@ TEST_F(IntegerIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { } } -TEST_F(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { +TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0, /*section_id=*/20, /*keys=*/{0, 100, -100}); @@ -1345,7 +1361,8 @@ TEST_F(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { // Attempt to create the integer index with info that doesn't match its // checksum and confirm that it fails. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> - integer_index_or = IntegerIndex::Create(filesystem_, working_path_); + integer_index_or = IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam()); EXPECT_THAT(integer_index_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(integer_index_or.status().error_message(), @@ -1353,12 +1370,13 @@ TEST_F(IntegerIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { } } -TEST_F(IntegerIndexTest, +TEST_P(IntegerIndexTest, InitializeExistingFilesWithCorruptedStoragesShouldFail) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Insert some data. Index(integer_index.get(), kDefaultTestPropertyPath, /*document_id=*/0, /*section_id=*/20, /*keys=*/{0, 100, -100}); @@ -1377,11 +1395,12 @@ TEST_F(IntegerIndexTest, absl_ports::StrCat(working_path_, "/", kDefaultTestPropertyPath); ASSERT_TRUE(filesystem_.DirectoryExists(storage_working_path.c_str())); - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndexStorage> storage, - IntegerIndexStorage::Create( - filesystem_, std::move(storage_working_path), - IntegerIndexStorage::Options(), - &posting_list_integer_index_serializer)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndexStorage> storage, + IntegerIndexStorage::Create( + filesystem_, std::move(storage_working_path), + IntegerIndexStorage::Options(/*pre_mapping_fbv=*/GetParam()), + &posting_list_integer_index_serializer)); ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/3, /*section_id=*/4, /*new_keys=*/{3, 4, 5})); @@ -1392,7 +1411,8 @@ TEST_F(IntegerIndexTest, // Attempt to create the integer index with corrupted storages. This should // fail. libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> - integer_index_or = IntegerIndex::Create(filesystem_, working_path_); + integer_index_or = IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam()); EXPECT_THAT(integer_index_or, StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); EXPECT_THAT(integer_index_or.status().error_message(), @@ -1400,7 +1420,7 @@ TEST_F(IntegerIndexTest, } } -TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) { +TEST_P(IntegerIndexTest, WildcardStoragePersistenceQuery) { // This test sets its schema assuming that max property storages == 32. ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32)); @@ -1564,7 +1584,8 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Index numeric content for other properties to force our property into the // wildcard storage. @@ -1626,8 +1647,10 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) { typeb_desired_prop_id, /*keys=*/{2}); } - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); EXPECT_THAT(integer_index->num_property_indices(), Eq(33)); @@ -1656,7 +1679,7 @@ TEST_F(IntegerIndexTest, WildcardStoragePersistenceQuery) { EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea)))); } -TEST_F(IntegerIndexTest, +TEST_P(IntegerIndexTest, IntegerIndexShouldWorkAfterOptimizeAndReinitialization) { constexpr std::string_view kPropertyPath1 = "prop1"; constexpr SectionId kSectionId1 = 0; @@ -1666,7 +1689,8 @@ TEST_F(IntegerIndexTest, { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Doc id = 1: insert 2 data for "prop1", "prop2" Index(integer_index.get(), kPropertyPath2, /*document_id=*/1, kSectionId2, @@ -1716,7 +1740,8 @@ TEST_F(IntegerIndexTest, // normally. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Key = 1 EXPECT_THAT(Query(integer_index.get(), kPropertyPath1, /*key_lower=*/1, @@ -1773,7 +1798,7 @@ TEST_F(IntegerIndexTest, } } -TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { +TEST_P(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { // This test sets its schema assuming that max property storages == 32. ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32)); @@ -1941,7 +1966,8 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Index numeric content for other properties to force our property into the // wildcard storage. @@ -2035,8 +2061,10 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { EqualsDocHitInfo(/*document_id=*/0, expected_sections_typea)))); } - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); EXPECT_THAT(integer_index->num_property_indices(), Eq(33)); @@ -2064,8 +2092,8 @@ TEST_F(IntegerIndexTest, WildcardStorageWorksAfterOptimize) { // the individual index storages (because they don't have any hits anymore). // In this case, any properties that added content to the wildcard storage (even // if all of their content was also deleted) should still be placed in the -// wilcard storage. -TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { +// wildcard storage. +TEST_P(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { // This test sets its schema assuming that max property storages == 32. ASSERT_THAT(IntegerIndex::kMaxPropertyStorages, Eq(32)); @@ -2204,7 +2232,8 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); // Index numeric content for other properties to force our property into the // wildcard storage. @@ -2276,8 +2305,10 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { EqualsDocHitInfo(/*document_id=*/1 - 1, expected_sections_typea)))); } - ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerIndex> integer_index, - IntegerIndex::Create(filesystem_, working_path_)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerIndex> integer_index, + IntegerIndex::Create(filesystem_, working_path_, + /*pre_mapping_fbv=*/GetParam())); EXPECT_THAT(integer_index->num_property_indices(), Eq(1)); @@ -2323,6 +2354,9 @@ TEST_F(IntegerIndexTest, WildcardStorageAvailableIndicesAfterOptimize) { /*document_id=*/7, expected_sections_typea)))); } +INSTANTIATE_TEST_SUITE_P(IntegerIndexTest, IntegerIndexTest, + testing::Values(true, false)); + } // namespace } // namespace lib diff --git a/icing/join/join-processor_test.cc b/icing/join/join-processor_test.cc index ec92349..0d3e57c 100644 --- a/icing/join/join-processor_test.cc +++ b/icing/join/join-processor_test.cc @@ -127,9 +127,11 @@ class JoinProcessorTest : public ::testing::Test { /*initialize_stats=*/nullptr)); doc_store_ = std::move(create_result.document_store); - ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_, - QualifiedIdTypeJoinableIndex::Create( - filesystem_, qualified_id_join_index_dir_)); + ICING_ASSERT_OK_AND_ASSIGN( + qualified_id_join_index_, + QualifiedIdTypeJoinableIndex::Create( + filesystem_, qualified_id_join_index_dir_, + /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); } void TearDown() override { diff --git a/icing/join/qualified-id-join-indexing-handler_test.cc b/icing/join/qualified-id-join-indexing-handler_test.cc index daddc4c..e48dc33 100644 --- a/icing/join/qualified-id-join-indexing-handler_test.cc +++ b/icing/join/qualified-id-join-indexing-handler_test.cc @@ -90,9 +90,11 @@ class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test { qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index"; schema_store_dir_ = base_dir_ + "/schema_store"; - ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_, - QualifiedIdTypeJoinableIndex::Create( - filesystem_, qualified_id_join_index_dir_)); + ICING_ASSERT_OK_AND_ASSIGN( + qualified_id_join_index_, + QualifiedIdTypeJoinableIndex::Create( + filesystem_, qualified_id_join_index_dir_, + /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); ICING_ASSERT_OK_AND_ASSIGN( diff --git a/icing/join/qualified-id-type-joinable-index.cc b/icing/join/qualified-id-type-joinable-index.cc index 9c25e62..a1df3d0 100644 --- a/icing/join/qualified-id-type-joinable-index.cc +++ b/icing/join/qualified-id-type-joinable-index.cc @@ -30,6 +30,7 @@ #include "icing/file/memory-mapped-file.h" #include "icing/join/doc-join-info.h" #include "icing/store/document-id.h" +#include "icing/store/dynamic-trie-key-mapper.h" #include "icing/store/key-mapper.h" #include "icing/store/persistent-hash-map-key-mapper.h" #include "icing/util/crc32.h" @@ -42,6 +43,9 @@ namespace lib { namespace { +static constexpr int32_t kDocJoinInfoMapperDynamicTrieMaxSize = + 128 * 1024 * 1024; // 128 MiB + DocumentId GetNewDocumentId( const std::vector<DocumentId>& document_id_old_to_new, DocumentId old_document_id) { @@ -68,7 +72,9 @@ std::string GetQualifiedIdStoragePath(std::string_view working_path) { /* static */ libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> QualifiedIdTypeJoinableIndex::Create(const Filesystem& filesystem, - std::string working_path) { + std::string working_path, + bool pre_mapping_fbv, + bool use_persistent_hash_map) { if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str()) || !filesystem.DirectoryExists( GetDocJoinInfoMapperPath(working_path).c_str()) || @@ -77,9 +83,11 @@ QualifiedIdTypeJoinableIndex::Create(const Filesystem& filesystem, if (filesystem.DirectoryExists(working_path.c_str())) { ICING_RETURN_IF_ERROR(Discard(filesystem, working_path)); } - return InitializeNewFiles(filesystem, std::move(working_path)); + return InitializeNewFiles(filesystem, std::move(working_path), + pre_mapping_fbv, use_persistent_hash_map); } - return InitializeExistingFiles(filesystem, std::move(working_path)); + return InitializeExistingFiles(filesystem, std::move(working_path), + pre_mapping_fbv, use_persistent_hash_map); } QualifiedIdTypeJoinableIndex::~QualifiedIdTypeJoinableIndex() { @@ -151,7 +159,8 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize( // we can safely swap directories later. ICING_ASSIGN_OR_RETURN( std::unique_ptr<QualifiedIdTypeJoinableIndex> new_index, - Create(filesystem_, temp_working_path_ddir.dir())); + Create(filesystem_, temp_working_path_ddir.dir(), pre_mapping_fbv_, + use_persistent_hash_map_)); ICING_RETURN_IF_ERROR( TransferIndex(document_id_old_to_new, new_index.get())); new_index->set_last_added_document_id(new_last_added_document_id); @@ -176,10 +185,19 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize( /*offset=*/0)) { return absl_ports::InternalError("Fail to read metadata file"); } - ICING_ASSIGN_OR_RETURN( - doc_join_info_mapper_, - PersistentHashMapKeyMapper<int32_t>::Create( - filesystem_, GetDocJoinInfoMapperPath(working_path_))); + if (use_persistent_hash_map_) { + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper_, + PersistentHashMapKeyMapper<int32_t>::Create( + filesystem_, GetDocJoinInfoMapperPath(working_path_), + pre_mapping_fbv_)); + } else { + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper_, + DynamicTrieKeyMapper<int32_t>::Create( + filesystem_, GetDocJoinInfoMapperPath(working_path_), + kDocJoinInfoMapperDynamicTrieMaxSize)); + } ICING_ASSIGN_OR_RETURN( qualified_id_storage_, @@ -187,7 +205,7 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Optimize( filesystem_, GetQualifiedIdStoragePath(working_path_), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, FileBackedVector<char>::kMaxFileSize, - /*pre_mapping_mmap_size=*/1024 * 1024)); + /*pre_mapping_mmap_size=*/pre_mapping_fbv_ ? 1024 * 1024 : 0)); return libtextclassifier3::Status::OK; } @@ -197,12 +215,22 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Clear() { // Discard and reinitialize doc join info mapper. std::string doc_join_info_mapper_path = GetDocJoinInfoMapperPath(working_path_); - ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<int32_t>::Delete( - filesystem_, doc_join_info_mapper_path)); - ICING_ASSIGN_OR_RETURN( - doc_join_info_mapper_, - PersistentHashMapKeyMapper<int32_t>::Create( - filesystem_, std::move(doc_join_info_mapper_path))); + if (use_persistent_hash_map_) { + ICING_RETURN_IF_ERROR(PersistentHashMapKeyMapper<int32_t>::Delete( + filesystem_, doc_join_info_mapper_path)); + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper_, + PersistentHashMapKeyMapper<int32_t>::Create( + filesystem_, std::move(doc_join_info_mapper_path), + pre_mapping_fbv_)); + } else { + ICING_RETURN_IF_ERROR(DynamicTrieKeyMapper<int32_t>::Delete( + filesystem_, doc_join_info_mapper_path)); + ICING_ASSIGN_OR_RETURN(doc_join_info_mapper_, + DynamicTrieKeyMapper<int32_t>::Create( + filesystem_, doc_join_info_mapper_path, + kDocJoinInfoMapperDynamicTrieMaxSize)); + } // Clear qualified_id_storage_. if (qualified_id_storage_->num_elements() > 0) { @@ -218,7 +246,9 @@ libtextclassifier3::Status QualifiedIdTypeJoinableIndex::Clear() { /* static */ libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> QualifiedIdTypeJoinableIndex::InitializeNewFiles(const Filesystem& filesystem, - std::string&& working_path) { + std::string&& working_path, + bool pre_mapping_fbv, + bool use_persistent_hash_map) { // Create working directory. if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) { return absl_ports::InternalError( @@ -226,11 +256,21 @@ QualifiedIdTypeJoinableIndex::InitializeNewFiles(const Filesystem& filesystem, } // Initialize doc_join_info_mapper - // TODO(b/263890397): decide PersistentHashMapKeyMapper size - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper, - PersistentHashMapKeyMapper<int32_t>::Create( - filesystem, GetDocJoinInfoMapperPath(working_path))); + std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper; + if (use_persistent_hash_map) { + // TODO(b/263890397): decide PersistentHashMapKeyMapper size + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper, + PersistentHashMapKeyMapper<int32_t>::Create( + filesystem, GetDocJoinInfoMapperPath(working_path), + pre_mapping_fbv)); + } else { + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper, + DynamicTrieKeyMapper<int32_t>::Create( + filesystem, GetDocJoinInfoMapperPath(working_path), + kDocJoinInfoMapperDynamicTrieMaxSize)); + } // Initialize qualified_id_storage ICING_ASSIGN_OR_RETURN( @@ -239,14 +279,15 @@ QualifiedIdTypeJoinableIndex::InitializeNewFiles(const Filesystem& filesystem, filesystem, GetQualifiedIdStoragePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, FileBackedVector<char>::kMaxFileSize, - /*pre_mapping_mmap_size=*/1024 * 1024)); + /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0)); // Create instance. auto new_index = std::unique_ptr<QualifiedIdTypeJoinableIndex>( new QualifiedIdTypeJoinableIndex( filesystem, std::move(working_path), /*metadata_buffer=*/std::make_unique<uint8_t[]>(kMetadataFileSize), - std::move(doc_join_info_mapper), std::move(qualified_id_storage))); + std::move(doc_join_info_mapper), std::move(qualified_id_storage), + pre_mapping_fbv, use_persistent_hash_map)); // Initialize info content. new_index->info().magic = Info::kMagic; new_index->info().last_added_document_id = kInvalidDocumentId; @@ -260,7 +301,8 @@ QualifiedIdTypeJoinableIndex::InitializeNewFiles(const Filesystem& filesystem, /* static */ libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> QualifiedIdTypeJoinableIndex::InitializeExistingFiles( - const Filesystem& filesystem, std::string&& working_path) { + const Filesystem& filesystem, std::string&& working_path, + bool pre_mapping_fbv, bool use_persistent_hash_map) { // PRead metadata file. auto metadata_buffer = std::make_unique<uint8_t[]>(kMetadataFileSize); if (!filesystem.PRead(GetMetadataFilePath(working_path).c_str(), @@ -270,10 +312,31 @@ QualifiedIdTypeJoinableIndex::InitializeExistingFiles( } // Initialize doc_join_info_mapper - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper, - PersistentHashMapKeyMapper<int32_t>::Create( - filesystem, GetDocJoinInfoMapperPath(working_path))); + bool dynamic_trie_key_mapper_dir_exists = filesystem.DirectoryExists( + absl_ports::StrCat(GetDocJoinInfoMapperPath(working_path), + "/key_mapper_dir") + .c_str()); + if ((use_persistent_hash_map && dynamic_trie_key_mapper_dir_exists) || + (!use_persistent_hash_map && !dynamic_trie_key_mapper_dir_exists)) { + // Return a failure here so that the caller can properly delete and rebuild + // this component. + return absl_ports::FailedPreconditionError("Key mapper type mismatch"); + } + + std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper; + if (use_persistent_hash_map) { + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper, + PersistentHashMapKeyMapper<int32_t>::Create( + filesystem, GetDocJoinInfoMapperPath(working_path), + pre_mapping_fbv)); + } else { + ICING_ASSIGN_OR_RETURN( + doc_join_info_mapper, + DynamicTrieKeyMapper<int32_t>::Create( + filesystem, GetDocJoinInfoMapperPath(working_path), + kDocJoinInfoMapperDynamicTrieMaxSize)); + } // Initialize qualified_id_storage ICING_ASSIGN_OR_RETURN( @@ -282,13 +345,14 @@ QualifiedIdTypeJoinableIndex::InitializeExistingFiles( filesystem, GetQualifiedIdStoragePath(working_path), MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, FileBackedVector<char>::kMaxFileSize, - /*pre_mapping_mmap_size=*/1024 * 1024)); + /*pre_mapping_mmap_size=*/pre_mapping_fbv ? 1024 * 1024 : 0)); // Create instance. auto type_joinable_index = std::unique_ptr<QualifiedIdTypeJoinableIndex>( new QualifiedIdTypeJoinableIndex( filesystem, std::move(working_path), std::move(metadata_buffer), - std::move(doc_join_info_mapper), std::move(qualified_id_storage))); + std::move(doc_join_info_mapper), std::move(qualified_id_storage), + pre_mapping_fbv, use_persistent_hash_map)); // Initialize existing PersistentStorage. Checksums will be validated. ICING_RETURN_IF_ERROR(type_joinable_index->InitializeExistingStorage()); diff --git a/icing/join/qualified-id-type-joinable-index.h b/icing/join/qualified-id-type-joinable-index.h index 1127641..4844433 100644 --- a/icing/join/qualified-id-type-joinable-index.h +++ b/icing/join/qualified-id-type-joinable-index.h @@ -78,6 +78,12 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage { // directory of working_path_, and it is responsible for parent // directory creation/deletion. See PersistentStorage for more // details about the concept of working_path. + // pre_mapping_fbv: flag indicating whether memory map max possible file size + // for underlying FileBackedVector before growing the actual + // file size. + // use_persistent_hash_map: flag indicating whether use persistent hash map as + // the key mapper (if false, then fall back to + // dynamic trie key mapper). // // Returns: // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored @@ -86,7 +92,8 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage { // - Any KeyMapper errors static libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> - Create(const Filesystem& filesystem, std::string working_path); + Create(const Filesystem& filesystem, std::string working_path, + bool pre_mapping_fbv, bool use_persistent_hash_map); // Deletes QualifiedIdTypeJoinableIndex under working_path. // @@ -180,21 +187,26 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage { const Filesystem& filesystem, std::string&& working_path, std::unique_ptr<uint8_t[]> metadata_buffer, std::unique_ptr<KeyMapper<int32_t>> doc_join_info_mapper, - std::unique_ptr<FileBackedVector<char>> qualified_id_storage) + std::unique_ptr<FileBackedVector<char>> qualified_id_storage, + bool pre_mapping_fbv, bool use_persistent_hash_map) : PersistentStorage(filesystem, std::move(working_path), kWorkingPathType), metadata_buffer_(std::move(metadata_buffer)), doc_join_info_mapper_(std::move(doc_join_info_mapper)), - qualified_id_storage_(std::move(qualified_id_storage)) {} + qualified_id_storage_(std::move(qualified_id_storage)), + pre_mapping_fbv_(pre_mapping_fbv), + use_persistent_hash_map_(use_persistent_hash_map) {} static libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> - InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path); + InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path, + bool pre_mapping_fbv, bool use_persistent_hash_map); static libtextclassifier3::StatusOr< std::unique_ptr<QualifiedIdTypeJoinableIndex>> InitializeExistingFiles(const Filesystem& filesystem, - std::string&& working_path); + std::string&& working_path, bool pre_mapping_fbv, + bool use_persistent_hash_map); // Transfers qualified id type joinable index data from the current to // new_index and convert to new document id according to @@ -266,6 +278,14 @@ class QualifiedIdTypeJoinableIndex : public PersistentStorage { std::unique_ptr<FileBackedVector<char>> qualified_id_storage_; // TODO(b/268521214): add delete propagation storage + + // Flag indicating whether memory map max possible file size for underlying + // FileBackedVector before growing the actual file size. + bool pre_mapping_fbv_; + + // Flag indicating whether use persistent hash map as the key mapper (if + // false, then fall back to dynamic trie key mapper). + bool use_persistent_hash_map_; }; } // namespace lib diff --git a/icing/join/qualified-id-type-joinable-index_test.cc b/icing/join/qualified-id-type-joinable-index_test.cc index 745b0c1..8ef9167 100644 --- a/icing/join/qualified-id-type-joinable-index_test.cc +++ b/icing/join/qualified-id-type-joinable-index_test.cc @@ -26,6 +26,8 @@ #include "icing/file/persistent-storage.h" #include "icing/join/doc-join-info.h" #include "icing/store/document-id.h" +#include "icing/store/dynamic-trie-key-mapper.h" +#include "icing/store/key-mapper.h" #include "icing/store/persistent-hash-map-key-mapper.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" @@ -51,7 +53,18 @@ using Info = QualifiedIdTypeJoinableIndex::Info; static constexpr int32_t kCorruptedValueOffset = 3; -class QualifiedIdTypeJoinableIndexTest : public ::testing::Test { +struct QualifiedIdJoinIndexTestParam { + bool pre_mapping_fbv; + bool use_persistent_hash_map; + + explicit QualifiedIdJoinIndexTestParam(bool pre_mapping_fbv_in, + bool use_persistent_hash_map_in) + : pre_mapping_fbv(pre_mapping_fbv_in), + use_persistent_hash_map(use_persistent_hash_map_in) {} +}; + +class QualifiedIdTypeJoinableIndexTest + : public ::testing::TestWithParam<QualifiedIdJoinIndexTestParam> { protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/icing"; @@ -70,20 +83,27 @@ class QualifiedIdTypeJoinableIndexTest : public ::testing::Test { std::string working_path_; }; -TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidWorkingPath) { +TEST_P(QualifiedIdTypeJoinableIndexTest, InvalidWorkingPath) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + EXPECT_THAT( QualifiedIdTypeJoinableIndex::Create( - filesystem_, "/dev/null/qualified_id_type_joinable_index_test"), + filesystem_, "/dev/null/qualified_id_type_joinable_index_test", + param.pre_mapping_fbv, param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, InitializeNewFiles) { +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeNewFiles) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ASSERT_FALSE(filesystem_.DirectoryExists(working_path_.c_str())); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index, Pointee(IsEmpty())); ICING_ASSERT_OK(index->PersistToDisk()); @@ -126,17 +146,22 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, InitializeNewFiles) { .Get())); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializationShouldFailWithoutPersistToDiskOrDestruction) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); // Insert some data. ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); + ICING_ASSERT_OK(index->PersistToDisk()); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriB")); @@ -146,16 +171,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Without calling PersistToDisk, checksums will not be recomputed or synced // to disk, so initializing another instance on the same files should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), + StatusIs(param.use_persistent_hash_map + ? libtextclassifier3::StatusCode::FAILED_PRECONDITION + : libtextclassifier3::StatusCode::INTERNAL)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializationShouldSucceedWithPersistToDisk) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index1, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); // Insert some data. ICING_ASSERT_OK( @@ -176,7 +209,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index2, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index2, Pointee(SizeIs(3))); EXPECT_THAT( index2->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)), @@ -189,13 +224,17 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, IsOkAndHolds(/*ref_qualified_id_str=*/"namespace#uriC")); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializationShouldSucceedAfterDestruction) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); // Insert some data. ICING_ASSERT_OK( @@ -217,7 +256,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // we should be able to get the same contents. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index, Pointee(SizeIs(3))); EXPECT_THAT(index->Get(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20)), @@ -231,13 +272,17 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, } } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeExistingFilesWithDifferentMagicShouldFail) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); @@ -278,18 +323,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Attempt to create the qualified id type joinable index with different // magic. This should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION, HasSubstr("Incorrect magic value"))); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeExistingFilesWithWrongAllCrcShouldFail) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); @@ -325,18 +376,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Attempt to create the qualified id type joinable index with metadata // containing corrupted all_crc. This should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION, HasSubstr("Invalid all crc"))); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeExistingFilesWithCorruptedInfoShouldFail) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); @@ -373,18 +430,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Attempt to create the qualified id type joinable index with info that // doesn't match its checksum. This should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION, HasSubstr("Invalid info crc"))); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeExistingFilesWithCorruptedDocJoinInfoMapperShouldFail) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); @@ -392,14 +455,22 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, ICING_ASSERT_OK(index->PersistToDisk()); } + // Corrupt doc_join_info_mapper manually. { - // Corrupt doc_join_info_mapper manually. std::string mapper_working_path = absl_ports::StrCat(working_path_, "/doc_join_info_mapper"); - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<PersistentHashMapKeyMapper<int32_t>> mapper, - PersistentHashMapKeyMapper<int32_t>::Create( - filesystem_, std::move(mapper_working_path))); + std::unique_ptr<KeyMapper<int32_t>> mapper; + if (param.use_persistent_hash_map) { + ICING_ASSERT_OK_AND_ASSIGN( + mapper, PersistentHashMapKeyMapper<int32_t>::Create( + filesystem_, std::move(mapper_working_path), + param.pre_mapping_fbv)); + } else { + ICING_ASSERT_OK_AND_ASSIGN(mapper, + DynamicTrieKeyMapper<int32_t>::Create( + filesystem_, mapper_working_path, + /*maximum_size_bytes=*/128 * 1024 * 1024)); + } ICING_ASSERT_OK_AND_ASSIGN(Crc32 old_crc, mapper->ComputeChecksum()); ICING_ASSERT_OK(mapper->Put("foo", 12345)); ICING_ASSERT_OK(mapper->PersistToDisk()); @@ -409,18 +480,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Attempt to create the qualified id type joinable index with corrupted // doc_join_info_mapper. This should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION, HasSubstr("Invalid storages crc"))); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, InitializeExistingFilesWithCorruptedQualifiedIdStorageShouldFail) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), /*ref_qualified_id_str=*/"namespace#uriA")); @@ -449,16 +526,22 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, // Attempt to create the qualified id type joinable index with corrupted // qualified_id_storage. This should fail. - EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_), + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map), StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION, HasSubstr("Invalid storages crc"))); } -TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidPut) { +TEST_P(QualifiedIdTypeJoinableIndexTest, InvalidPut) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); DocJoinInfo default_invalid; EXPECT_THAT( @@ -466,18 +549,24 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidPut) { StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, InvalidGet) { +TEST_P(QualifiedIdTypeJoinableIndexTest, InvalidGet) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); DocJoinInfo default_invalid; EXPECT_THAT(index->Get(default_invalid), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) { +TEST_P(QualifiedIdTypeJoinableIndexTest, PutAndGet) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20); std::string_view ref_qualified_id_str_a = "namespace#uriA"; @@ -491,7 +580,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index->Put(target_info1, ref_qualified_id_str_a), IsOk()); EXPECT_THAT(index->Put(target_info2, ref_qualified_id_str_b), IsOk()); @@ -508,22 +599,28 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, PutAndGet) { // Verify we can get all of them after destructing and re-initializing. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index, Pointee(SizeIs(3))); EXPECT_THAT(index->Get(target_info1), IsOkAndHolds(ref_qualified_id_str_a)); EXPECT_THAT(index->Get(target_info2), IsOkAndHolds(ref_qualified_id_str_b)); EXPECT_THAT(index->Get(target_info3), IsOkAndHolds(ref_qualified_id_str_c)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, +TEST_P(QualifiedIdTypeJoinableIndexTest, GetShouldReturnNotFoundErrorIfNotExist) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + DocJoinInfo target_info(/*document_id=*/1, /*joinable_property_id=*/20); std::string_view ref_qualified_id_str = "namespace#uriA"; // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); // Verify entry is not found in the beginning. EXPECT_THAT(index->Get(target_info), @@ -539,10 +636,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, SetLastAddedDocumentId) { +TEST_P(QualifiedIdTypeJoinableIndexTest, SetLastAddedDocumentId) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index->last_added_document_id(), Eq(kInvalidDocumentId)); @@ -555,12 +656,16 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, SetLastAddedDocumentId) { EXPECT_THAT(index->last_added_document_id(), Eq(kNextDocumentId)); } -TEST_F( +TEST_P( QualifiedIdTypeJoinableIndexTest, SetLastAddedDocumentIdShouldIgnoreNewDocumentIdNotGreaterThanTheCurrent) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); constexpr DocumentId kDocumentId = 123; index->set_last_added_document_id(kDocumentId); @@ -573,10 +678,14 @@ TEST_F( EXPECT_THAT(index->last_added_document_id(), Eq(kDocumentId)); } -TEST_F(QualifiedIdTypeJoinableIndexTest, Optimize) { +TEST_P(QualifiedIdTypeJoinableIndexTest, Optimize) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10), @@ -650,10 +759,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Optimize) { IsOkAndHolds("namespace#uriD")); } -TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeOutOfRangeDocumentId) { +TEST_P(QualifiedIdTypeJoinableIndexTest, OptimizeOutOfRangeDocumentId) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/99, /*joinable_property_id=*/10), @@ -675,10 +788,14 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeOutOfRangeDocumentId) { EXPECT_THAT(index, Pointee(IsEmpty())); } -TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeDeleteAll) { +TEST_P(QualifiedIdTypeJoinableIndexTest, OptimizeDeleteAll) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(DocJoinInfo(/*document_id=*/3, /*joinable_property_id=*/10), @@ -710,7 +827,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, OptimizeDeleteAll) { EXPECT_THAT(index, Pointee(IsEmpty())); } -TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) { +TEST_P(QualifiedIdTypeJoinableIndexTest, Clear) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + DocJoinInfo target_info1(/*document_id=*/1, /*joinable_property_id=*/20); DocJoinInfo target_info2(/*document_id=*/3, /*joinable_property_id=*/5); DocJoinInfo target_info3(/*document_id=*/6, /*joinable_property_id=*/13); @@ -718,7 +837,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) { // Create new qualified id type joinable index ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdTypeJoinableIndex> index, - QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); ICING_ASSERT_OK( index->Put(target_info1, /*ref_qualified_id_str=*/"namespace#uriA")); ICING_ASSERT_OK( @@ -755,7 +876,9 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) { // Verify index after reconstructing. ICING_ASSERT_OK_AND_ASSIGN( - index, QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_)); + index, QualifiedIdTypeJoinableIndex::Create( + filesystem_, working_path_, param.pre_mapping_fbv, + param.use_persistent_hash_map)); EXPECT_THAT(index->last_added_document_id(), Eq(2)); EXPECT_THAT(index->Get(target_info1), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -766,6 +889,42 @@ TEST_F(QualifiedIdTypeJoinableIndexTest, Clear) { EXPECT_THAT(index->Get(target_info4), IsOkAndHolds("namespace#uriD")); } +TEST_P(QualifiedIdTypeJoinableIndexTest, SwitchKeyMapperTypeShouldReturnError) { + const QualifiedIdJoinIndexTestParam& param = GetParam(); + + { + // Create new qualified id type joinable index + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdTypeJoinableIndex> index, + QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + param.use_persistent_hash_map)); + ICING_ASSERT_OK( + index->Put(DocJoinInfo(/*document_id=*/1, /*joinable_property_id=*/20), + /*ref_qualified_id_str=*/"namespace#uriA")); + + ICING_ASSERT_OK(index->PersistToDisk()); + } + + bool switch_key_mapper_flag = !param.use_persistent_hash_map; + EXPECT_THAT(QualifiedIdTypeJoinableIndex::Create(filesystem_, working_path_, + param.pre_mapping_fbv, + switch_key_mapper_flag), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); +} + +INSTANTIATE_TEST_SUITE_P( + QualifiedIdTypeJoinableIndexTest, QualifiedIdTypeJoinableIndexTest, + testing::Values( + QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true, + /*use_persistent_hash_map_in=*/true), + QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/true, + /*use_persistent_hash_map_in=*/false), + QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false, + /*use_persistent_hash_map_in=*/true), + QualifiedIdJoinIndexTestParam(/*pre_mapping_fbv_in=*/false, + /*use_persistent_hash_map_in=*/false))); + } // namespace } // namespace lib diff --git a/icing/schema-builder.h b/icing/schema-builder.h index 9e384c5..e8be483 100644 --- a/icing/schema-builder.h +++ b/icing/schema-builder.h @@ -44,6 +44,8 @@ constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM = StringIndexingConfig::TokenizerType::VERBATIM; constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_RFC822 = StringIndexingConfig::TokenizerType::RFC822; +constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_URL = + StringIndexingConfig::TokenizerType::URL; constexpr TermMatchType::Code TERM_MATCH_UNKNOWN = TermMatchType::UNKNOWN; constexpr TermMatchType::Code TERM_MATCH_EXACT = TermMatchType::EXACT_ONLY; diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc index c85cc87..371ed00 100644 --- a/icing/schema/schema-util.cc +++ b/icing/schema/schema-util.cc @@ -53,23 +53,6 @@ bool ArePropertiesEqual(const PropertyConfigProto& old_property, new_property.document_indexing_config().index_nested_properties(); } -bool IsIndexedProperty(const PropertyConfigProto& property_config) { - switch (property_config.data_type()) { - case PropertyConfigProto::DataType::STRING: - return property_config.string_indexing_config().term_match_type() != - TermMatchType::UNKNOWN; - case PropertyConfigProto::DataType::INT64: - return property_config.integer_indexing_config().numeric_match_type() != - IntegerIndexingConfig::NumericMatchType::UNKNOWN; - case PropertyConfigProto::DataType::UNKNOWN: - case PropertyConfigProto::DataType::DOUBLE: - case PropertyConfigProto::DataType::BOOLEAN: - case PropertyConfigProto::DataType::BYTES: - case PropertyConfigProto::DataType::DOCUMENT: - return false; - } -} - bool IsCardinalityCompatible(const PropertyConfigProto& old_property, const PropertyConfigProto& new_property) { if (old_property.cardinality() < new_property.cardinality()) { @@ -768,6 +751,26 @@ libtextclassifier3::Status SchemaUtil::ValidateJoinableConfig( return libtextclassifier3::Status::OK; } +/* static */ bool SchemaUtil::IsIndexedProperty( + const PropertyConfigProto& property_config) { + switch (property_config.data_type()) { + case PropertyConfigProto::DataType::STRING: + return property_config.string_indexing_config().term_match_type() != + TermMatchType::UNKNOWN && + property_config.string_indexing_config().tokenizer_type() != + StringIndexingConfig::TokenizerType::NONE; + case PropertyConfigProto::DataType::INT64: + return property_config.integer_indexing_config().numeric_match_type() != + IntegerIndexingConfig::NumericMatchType::UNKNOWN; + case PropertyConfigProto::DataType::UNKNOWN: + case PropertyConfigProto::DataType::DOUBLE: + case PropertyConfigProto::DataType::BOOLEAN: + case PropertyConfigProto::DataType::BYTES: + case PropertyConfigProto::DataType::DOCUMENT: + return false; + } +} + bool SchemaUtil::IsParent(const SchemaUtil::InheritanceMap& inheritance_map, std::string_view parent_type, std::string_view child_type) { diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h index 445affd..e707758 100644 --- a/icing/schema/schema-util.h +++ b/icing/schema/schema-util.h @@ -258,6 +258,8 @@ class SchemaUtil { static libtextclassifier3::Status ValidatePropertyName( std::string_view property_name, std::string_view schema_type = ""); + static bool IsIndexedProperty(const PropertyConfigProto& property_config); + private: // Validates the 'schema_type' field // diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc index 3ea855c..40e30b0 100644 --- a/icing/schema/schema-util_test.cc +++ b/icing/schema/schema-util_test.cc @@ -4246,6 +4246,149 @@ INSTANTIATE_TEST_SUITE_P( SchemaUtilTest, SchemaUtilTest, testing::Values(/*allow_circular_schema_definitions=*/true, false)); +struct IsIndexedPropertyTestParam { + PropertyConfigProto property_config; + bool expected_result; + + explicit IsIndexedPropertyTestParam(PropertyConfigProto property_config_in, + bool expected_result_in) + : property_config(std::move(property_config_in)), + expected_result(expected_result_in) {} +}; + +class SchemaUtilIsIndexedPropertyTest + : public ::testing::TestWithParam<IsIndexedPropertyTestParam> {}; + +TEST_P(SchemaUtilIsIndexedPropertyTest, IsIndexedProperty) { + const IsIndexedPropertyTestParam& test_param = GetParam(); + EXPECT_THAT(SchemaUtil::IsIndexedProperty(test_param.property_config), + Eq(test_param.expected_result)); +} + +INSTANTIATE_TEST_SUITE_P( + SchemaUtilIsIndexedPropertyTest, SchemaUtilIsIndexedPropertyTest, + testing::Values( + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, + TOKENIZER_NONE) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, + TOKENIZER_PLAIN) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, + TOKENIZER_VERBATIM) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, + TOKENIZER_RFC822) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, + TOKENIZER_URL) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_NONE) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_PLAIN) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_VERBATIM) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_RFC822) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_URL) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_NONE) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_VERBATIM) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_RFC822) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_URL) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .Build(), + true), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataType(TYPE_DOUBLE) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataType(TYPE_BOOLEAN) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataType(TYPE_BYTES) + .Build(), + false), + IsIndexedPropertyTestParam(PropertyConfigBuilder() + .SetName("property") + .SetDataType(TYPE_DOCUMENT) + .Build(), + false))); + } // namespace } // namespace lib diff --git a/icing/schema/section-manager-builder_test.cc b/icing/schema/section-manager-builder_test.cc index ef4b077..60dd507 100644 --- a/icing/schema/section-manager-builder_test.cc +++ b/icing/schema/section-manager-builder_test.cc @@ -276,7 +276,7 @@ TEST_P(NonIndexableSectionManagerBuilderTest, Build) { } // The following types are considered non-indexable: -// - String with TERM_MATCH_UNKNOWN, TOKENIZER_NONE +// - String with TERM_MATCH_UNKNOWN or TOKENIZER_NONE // - Int64 with NUMERIC_MATCH_UNKNOWN // - Double // - Boolean @@ -292,6 +292,16 @@ INSTANTIATE_TEST_SUITE_P( .Build(), PropertyConfigBuilder() .SetName("property") + .SetDataTypeString(TERM_MATCH_UNKNOWN, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL) + .Build(), + PropertyConfigBuilder() + .SetName("property") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_NONE) + .SetCardinality(CARDINALITY_OPTIONAL) + .Build(), + PropertyConfigBuilder() + .SetName("property") .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN) .SetCardinality(CARDINALITY_OPTIONAL) .Build(), diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc index c3cd1cd..38042d0 100644 --- a/icing/schema/section-manager.cc +++ b/icing/schema/section-manager.cc @@ -35,6 +35,7 @@ #include "icing/proto/schema.pb.h" #include "icing/proto/term.pb.h" #include "icing/schema/property-util.h" +#include "icing/schema/schema-util.h" #include "icing/schema/section.h" #include "icing/store/document-filter-data.h" #include "icing/store/key-mapper.h" @@ -49,10 +50,7 @@ namespace { libtextclassifier3::Status AppendNewSectionMetadata( std::vector<SectionMetadata>* metadata_list, std::string&& concatenated_path, - PropertyConfigProto::DataType::Code data_type, - StringIndexingConfig::TokenizerType::Code string_tokenizer_type, - TermMatchType::Code term_match_type, - IntegerIndexingConfig::NumericMatchType::Code numeric_match_type) { + const PropertyConfigProto& property_config) { // Validates next section id, makes sure that section id is the same as the // list index so that we could find any section metadata by id in O(1) later. SectionId new_section_id = static_cast<SectionId>(metadata_list->size()); @@ -66,8 +64,11 @@ libtextclassifier3::Status AppendNewSectionMetadata( // Creates section metadata metadata_list->push_back(SectionMetadata( - new_section_id, data_type, string_tokenizer_type, term_match_type, - numeric_match_type, std::move(concatenated_path))); + new_section_id, property_config.data_type(), + property_config.string_indexing_config().tokenizer_type(), + property_config.string_indexing_config().term_match_type(), + property_config.integer_indexing_config().numeric_match_type(), + std::move(concatenated_path))); return libtextclassifier3::Status::OK; } @@ -98,35 +99,12 @@ SectionManager::Builder::ProcessSchemaTypePropertyConfig( return absl_ports::InvalidArgumentError("Invalid schema type id"); } - switch (property_config.data_type()) { - case PropertyConfigProto::DataType::STRING: { - if (property_config.string_indexing_config().term_match_type() != - TermMatchType::UNKNOWN) { - ICING_RETURN_IF_ERROR(AppendNewSectionMetadata( - §ion_metadata_cache_[schema_type_id], std::move(property_path), - PropertyConfigProto::DataType::STRING, - property_config.string_indexing_config().tokenizer_type(), - property_config.string_indexing_config().term_match_type(), - IntegerIndexingConfig::NumericMatchType::UNKNOWN)); - } - break; - } - case PropertyConfigProto::DataType::INT64: { - if (property_config.integer_indexing_config().numeric_match_type() != - IntegerIndexingConfig::NumericMatchType::UNKNOWN) { - ICING_RETURN_IF_ERROR(AppendNewSectionMetadata( - §ion_metadata_cache_[schema_type_id], std::move(property_path), - PropertyConfigProto::DataType::INT64, - StringIndexingConfig::TokenizerType::NONE, TermMatchType::UNKNOWN, - property_config.integer_indexing_config().numeric_match_type())); - } - break; - } - default: { - // Skip other data types. - break; - } + if (SchemaUtil::IsIndexedProperty(property_config)) { + ICING_RETURN_IF_ERROR( + AppendNewSectionMetadata(§ion_metadata_cache_[schema_type_id], + std::move(property_path), property_config)); } + return libtextclassifier3::Status::OK; } diff --git a/icing/store/key-mapper_benchmark.cc b/icing/store/key-mapper_benchmark.cc index 1ce54c7..c25fe30 100644 --- a/icing/store/key-mapper_benchmark.cc +++ b/icing/store/key-mapper_benchmark.cc @@ -82,8 +82,8 @@ class KeyMapperBenchmark { std::string working_path = absl_ports::StrCat(base_dir, "/", "key_mapper_dir"); return PersistentHashMapKeyMapper<int>::Create( - filesystem, std::move(working_path), max_num_entries, - /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int), + filesystem, std::move(working_path), /*pre_mapping_fbv=*/true, + max_num_entries, /*average_kv_byte_size=*/kKeyLength + 1 + sizeof(int), /*max_load_factor_percent=*/100); } diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc index 1367c2d..fa7d1e8 100644 --- a/icing/store/key-mapper_test.cc +++ b/icing/store/key-mapper_test.cc @@ -16,14 +16,12 @@ #include <memory> #include <string> -#include <type_traits> #include <unordered_map> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "icing/absl_ports/canonical_errors.h" #include "icing/file/filesystem.h" #include "icing/store/document-id.h" #include "icing/store/dynamic-trie-key-mapper.h" @@ -43,11 +41,23 @@ namespace { constexpr int kMaxDynamicTrieKeyMapperSize = 3 * 1024 * 1024; // 3 MiB -template <typename T> -class KeyMapperTest : public ::testing::Test { - protected: - using KeyMapperType = T; +enum class KeyMapperType { + kDynamicTrie, + kPersistentHashMap, +}; + +struct KeyMapperTestParam { + KeyMapperType key_mapper_type; + bool pre_mapping_fbv; + explicit KeyMapperTestParam(KeyMapperType key_mapper_type_in, + bool pre_mapping_fbv_in) + : key_mapper_type(key_mapper_type_in), + pre_mapping_fbv(pre_mapping_fbv_in) {} +}; + +class KeyMapperTest : public ::testing::TestWithParam<KeyMapperTestParam> { + protected: void SetUp() override { base_dir_ = GetTestTempDir() + "/icing"; ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), @@ -60,24 +70,29 @@ class KeyMapperTest : public ::testing::Test { filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()); } - template <typename UnknownKeyMapperType> libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>> CreateKeyMapper() { - return absl_ports::InvalidArgumentError("Unknown type"); + const KeyMapperTestParam& param = GetParam(); + switch (param.key_mapper_type) { + case KeyMapperType::kDynamicTrie: + return DynamicTrieKeyMapper<DocumentId>::Create( + filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize); + case KeyMapperType::kPersistentHashMap: + return PersistentHashMapKeyMapper<DocumentId>::Create( + filesystem_, working_dir_, param.pre_mapping_fbv); + } } - template <> - libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>> - CreateKeyMapper<DynamicTrieKeyMapper<DocumentId>>() { - return DynamicTrieKeyMapper<DocumentId>::Create( - filesystem_, working_dir_, kMaxDynamicTrieKeyMapperSize); - } - - template <> - libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<DocumentId>>> - CreateKeyMapper<PersistentHashMapKeyMapper<DocumentId>>() { - return PersistentHashMapKeyMapper<DocumentId>::Create(filesystem_, - working_dir_); + libtextclassifier3::Status DeleteKeyMapper() { + const KeyMapperTestParam& param = GetParam(); + switch (param.key_mapper_type) { + case KeyMapperType::kDynamicTrie: + return DynamicTrieKeyMapper<DocumentId>::Delete(filesystem_, + working_dir_); + case KeyMapperType::kPersistentHashMap: + return PersistentHashMapKeyMapper<DocumentId>::Delete(filesystem_, + working_dir_); + } } std::string base_dir_; @@ -85,10 +100,6 @@ class KeyMapperTest : public ::testing::Test { Filesystem filesystem_; }; -using TestTypes = ::testing::Types<DynamicTrieKeyMapper<DocumentId>, - PersistentHashMapKeyMapper<DocumentId>>; -TYPED_TEST_SUITE(KeyMapperTest, TestTypes); - std::unordered_map<std::string, DocumentId> GetAllKeyValuePairs( const KeyMapper<DocumentId>* key_mapper) { std::unordered_map<std::string, DocumentId> ret; @@ -101,15 +112,15 @@ std::unordered_map<std::string, DocumentId> GetAllKeyValuePairs( return ret; } -TYPED_TEST(KeyMapperTest, CreateNewKeyMapper) { +TEST_P(KeyMapperTest, CreateNewKeyMapper) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); EXPECT_THAT(key_mapper->num_keys(), 0); } -TYPED_TEST(KeyMapperTest, CanUpdateSameKeyMultipleTimes) { +TEST_P(KeyMapperTest, CanUpdateSameKeyMultipleTimes) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50)); @@ -125,9 +136,9 @@ TYPED_TEST(KeyMapperTest, CanUpdateSameKeyMultipleTimes) { EXPECT_THAT(key_mapper->num_keys(), 2); } -TYPED_TEST(KeyMapperTest, GetOrPutOk) { +TEST_P(KeyMapperTest, GetOrPutOk) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); EXPECT_THAT(key_mapper->Get("foo"), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -135,9 +146,9 @@ TYPED_TEST(KeyMapperTest, GetOrPutOk) { EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1)); } -TYPED_TEST(KeyMapperTest, CanPersistToDiskRegularly) { +TEST_P(KeyMapperTest, CanPersistToDiskRegularly) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); // Can persist an empty DynamicTrieKeyMapper. ICING_EXPECT_OK(key_mapper->PersistToDisk()); @@ -160,16 +171,15 @@ TYPED_TEST(KeyMapperTest, CanPersistToDiskRegularly) { EXPECT_THAT(key_mapper->num_keys(), 2); } -TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) { +TEST_P(KeyMapperTest, CanUseAcrossMultipleInstances) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); key_mapper.reset(); - ICING_ASSERT_OK_AND_ASSIGN(key_mapper, - this->template CreateKeyMapper<TypeParam>()); + ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper()); EXPECT_THAT(key_mapper->num_keys(), 1); EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100)); @@ -181,29 +191,26 @@ TYPED_TEST(KeyMapperTest, CanUseAcrossMultipleInstances) { EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300)); } -TYPED_TEST(KeyMapperTest, CanDeleteAndRestartKeyMapping) { +TEST_P(KeyMapperTest, CanDeleteAndRestartKeyMapping) { // Can delete even if there's nothing there - ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_, - this->working_dir_)); + ICING_EXPECT_OK(DeleteKeyMapper()); ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); ICING_EXPECT_OK(key_mapper->PersistToDisk()); - ICING_EXPECT_OK(TestFixture::KeyMapperType::Delete(this->filesystem_, - this->working_dir_)); + ICING_EXPECT_OK(DeleteKeyMapper()); key_mapper.reset(); - ICING_ASSERT_OK_AND_ASSIGN(key_mapper, - this->template CreateKeyMapper<TypeParam>()); + ICING_ASSERT_OK_AND_ASSIGN(key_mapper, CreateKeyMapper()); EXPECT_THAT(key_mapper->num_keys(), 0); ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100)); EXPECT_THAT(key_mapper->num_keys(), 1); } -TYPED_TEST(KeyMapperTest, Iterator) { +TEST_P(KeyMapperTest, Iterator) { ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyMapper<DocumentId>> key_mapper, - this->template CreateKeyMapper<TypeParam>()); + CreateKeyMapper()); EXPECT_THAT(GetAllKeyValuePairs(key_mapper.get()), IsEmpty()); ICING_EXPECT_OK(key_mapper->Put("foo", /*value=*/1)); @@ -217,6 +224,15 @@ TYPED_TEST(KeyMapperTest, Iterator) { UnorderedElementsAre(Pair("foo", 1), Pair("bar", 2), Pair("baz", 3))); } +INSTANTIATE_TEST_SUITE_P( + KeyMapperTest, KeyMapperTest, + testing::Values(KeyMapperTestParam(KeyMapperType::kDynamicTrie, + /*pre_mapping_fbv_in=*/true), + KeyMapperTestParam(KeyMapperType::kPersistentHashMap, + /*pre_mapping_fbv_in=*/true), + KeyMapperTestParam(KeyMapperType::kPersistentHashMap, + /*pre_mapping_fbv_in=*/false))); + } // namespace } // namespace lib diff --git a/icing/store/persistent-hash-map-key-mapper.h b/icing/store/persistent-hash-map-key-mapper.h index 5f83e6f..0596fe3 100644 --- a/icing/store/persistent-hash-map-key-mapper.h +++ b/icing/store/persistent-hash-map-key-mapper.h @@ -38,6 +38,13 @@ namespace lib { template <typename T, typename Formatter = absl_ports::DefaultFormatter> class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> { public: + static constexpr int32_t kDefaultMaxNumEntries = + PersistentHashMap::Entry::kMaxNumEntries; + static constexpr int32_t kDefaultAverageKVByteSize = + PersistentHashMap::Options::kDefaultAverageKVByteSize; + static constexpr int32_t kDefaultMaxLoadFactorPercent = + PersistentHashMap::Options::kDefaultMaxLoadFactorPercent; + // Returns an initialized instance of PersistentHashMapKeyMapper that can // immediately handle read/write operations. // Returns any encountered IO errors. @@ -50,6 +57,9 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> { // PersistentHashMapKeyMapper would be created. See // PersistentStorage for more details about the concept of // working_path. + // pre_mapping_fbv: flag indicating whether memory map max possible file size + // for underlying FileBackedVector before growing the actual + // file size. // max_num_entries: max # of kvps. It will be used to compute 3 storages size. // average_kv_byte_size: average byte size of a single key + serialized value. // It will be used to compute kv_storage size. @@ -63,11 +73,9 @@ class PersistentHashMapKeyMapper : public KeyMapper<T, Formatter> { static libtextclassifier3::StatusOr< std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>> Create(const Filesystem& filesystem, std::string working_path, - int32_t max_num_entries = PersistentHashMap::Entry::kMaxNumEntries, - int32_t average_kv_byte_size = - PersistentHashMap::Options::kDefaultAverageKVByteSize, - int32_t max_load_factor_percent = - PersistentHashMap::Options::kDefaultMaxLoadFactorPercent); + bool pre_mapping_fbv, int32_t max_num_entries = kDefaultMaxNumEntries, + int32_t average_kv_byte_size = kDefaultAverageKVByteSize, + int32_t max_load_factor_percent = kDefaultMaxLoadFactorPercent); // Deletes working_path (and all the files under it recursively) associated // with the PersistentHashMapKeyMapper. @@ -166,7 +174,7 @@ template <typename T, typename Formatter> std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>> PersistentHashMapKeyMapper<T, Formatter>::Create( const Filesystem& filesystem, std::string working_path, - int32_t max_num_entries, int32_t average_kv_byte_size, + bool pre_mapping_fbv, int32_t max_num_entries, int32_t average_kv_byte_size, int32_t max_load_factor_percent) { ICING_ASSIGN_OR_RETURN( std::unique_ptr<PersistentHashMap> persistent_hash_map, @@ -176,7 +184,10 @@ PersistentHashMapKeyMapper<T, Formatter>::Create( /*value_type_size_in=*/sizeof(T), /*max_num_entries_in=*/max_num_entries, /*max_load_factor_percent_in=*/max_load_factor_percent, - /*average_kv_byte_size_in=*/average_kv_byte_size))); + /*average_kv_byte_size_in=*/average_kv_byte_size, + /*init_num_buckets_in=*/ + PersistentHashMap::Options::kDefaultInitNumBuckets, + /*pre_mapping_fbv_in=*/pre_mapping_fbv))); return std::unique_ptr<PersistentHashMapKeyMapper<T, Formatter>>( new PersistentHashMapKeyMapper<T, Formatter>( std::move(persistent_hash_map))); diff --git a/icing/store/persistent-hash-map-key-mapper_test.cc b/icing/store/persistent-hash-map-key-mapper_test.cc index c937c43..0d610e9 100644 --- a/icing/store/persistent-hash-map-key-mapper_test.cc +++ b/icing/store/persistent-hash-map-key-mapper_test.cc @@ -41,9 +41,9 @@ class PersistentHashMapKeyMapperTest : public testing::Test { }; TEST_F(PersistentHashMapKeyMapperTest, InvalidBaseDir) { - EXPECT_THAT( - PersistentHashMapKeyMapper<DocumentId>::Create(filesystem_, "/dev/null"), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT(PersistentHashMapKeyMapper<DocumentId>::Create( + filesystem_, "/dev/null", /*pre_mapping_fbv=*/false), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } } // namespace diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto index 18884c6..d4b1aee 100644 --- a/proto/icing/proto/initialize.proto +++ b/proto/icing/proto/initialize.proto @@ -23,7 +23,7 @@ option java_package = "com.google.android.icing.proto"; option java_multiple_files = true; option objc_class_prefix = "ICNG"; -// Next tag: 9 +// Next tag: 11 message IcingSearchEngineOptions { // Directory to persist files for Icing. Required. // If Icing was previously initialized with this directory, it will reload @@ -96,6 +96,14 @@ message IcingSearchEngineOptions { // The default value is false. optional bool allow_circular_schema_definitions = 8; + // Whether memory map max possible file size for FileBackedVector before + // growing the actual file size. + optional bool pre_mapping_fbv = 9; + + // Whether use persistent hash map as the key mapper (if false, then fall back + // to dynamic trie key mapper). + optional bool use_persistent_hash_map = 10; + reserved 2; } diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 8807fa7..8e176f8 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=532167936) +set(synced_AOSP_CL_number=533319030) |