aboutsummaryrefslogtreecommitdiff
path: root/icing/index/numeric/integer-index.h
diff options
context:
space:
mode:
authorYen-Chieh Sung <sungyc@google.com>2023-07-06 18:01:13 -0700
committerYen-Chieh Sung <sungyc@google.com>2023-07-06 19:39:11 -0700
commit4ac48569941d4f48b1a9bffdaf4abb2a3e8e9269 (patch)
tree076d8d961026f2800a6ca41106aa11d54527a29b /icing/index/numeric/integer-index.h
parentf1f564329ad560f580c03f9e928057ad4e4fadc9 (diff)
downloadicing-4ac48569941d4f48b1a9bffdaf4abb2a3e8e9269.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== [Icing] Extend document store uri mapper ======================================================================== Implement dirty flag in PersistentStorage ======================================================================== [Icing][version 2][1/x] Implement ShouldRebuildDerivedFiles ======================================================================== [Icing][version 2][2/x] Bump kVersion to 2 ======================================================================== [NumericSearch][optimization] Change numeric index bucket splitting threshold ======================================================================== [NumericSearch][optimization] Flag guard integer index bucket splitting threshold ======================================================================== Bug: 193919210 Bug: 280094535 Bug: 288969109 Bug: 259743562 NO_IFTTT="Path is only valid in G3." Change-Id: Ie1e6f5969549f6c945ea32d03c724850ff5600a9
Diffstat (limited to 'icing/index/numeric/integer-index.h')
-rw-r--r--icing/index/numeric/integer-index.h62
1 files changed, 50 insertions, 12 deletions
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index 30f9852..e7a3127 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -55,25 +55,29 @@ class IntegerIndex : public NumericIndex<int64_t> {
// 'wildcard' storage.
static constexpr int kMaxPropertyStorages = 32;
+ static constexpr int32_t kDefaultNumDataThresholdForBucketSplit =
+ IntegerIndexStorage::kDefaultNumDataThresholdForBucketSplit;
+
struct Info {
- static constexpr int32_t kMagic = 0x238a3dcb;
+ static constexpr int32_t kMagic = 0x5d8a1e8a;
int32_t magic;
DocumentId last_added_document_id;
+ int32_t num_data_threshold_for_bucket_split;
Crc32 ComputeChecksum() const {
return Crc32(
std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
}
} __attribute__((packed));
- static_assert(sizeof(Info) == 8, "");
+ static_assert(sizeof(Info) == 12, "");
// Metadata file layout: <Crcs><Info>
static constexpr int32_t kCrcsMetadataFileOffset = 0;
static constexpr int32_t kInfoMetadataFileOffset =
static_cast<int32_t>(sizeof(Crcs));
static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
- static_assert(kMetadataFileSize == 20, "");
+ static_assert(kMetadataFileSize == 24, "");
static constexpr WorkingPathType kWorkingPathType =
WorkingPathType::kDirectory;
@@ -90,6 +94,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
// related files will be stored under this directory. See
// PersistentStorage for more details about the concept of
// working_path.
+ // num_data_threshold_for_bucket_split: see IntegerIndexStorage::Options for
+ // more details.
// pre_mapping_fbv: flag indicating whether memory map max possible file size
// for underlying FileBackedVector before growing the actual
// file size.
@@ -101,7 +107,7 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - Any FileBackedVector/MemoryMappedFile errors.
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
const Filesystem& filesystem, std::string working_path,
- bool pre_mapping_fbv);
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv);
// Deletes IntegerIndex under working_path.
//
@@ -122,7 +128,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
std::string_view property_path, DocumentId document_id,
SectionId section_id) override {
return std::make_unique<Editor>(property_path, document_id, section_id,
- *this, pre_mapping_fbv_);
+ *this, num_data_threshold_for_bucket_split_,
+ pre_mapping_fbv_);
}
// Returns a DocHitInfoIterator for iterating through all docs which have the
@@ -172,6 +179,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
}
void set_last_added_document_id(DocumentId document_id) override {
+ SetInfoDirty();
+
Info& info_ref = info();
if (info_ref.last_added_document_id == kInvalidDocumentId ||
document_id > info_ref.last_added_document_id) {
@@ -189,9 +198,12 @@ class IntegerIndex : public NumericIndex<int64_t> {
public:
explicit Editor(std::string_view property_path, DocumentId document_id,
SectionId section_id, IntegerIndex& integer_index,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv)
: NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
integer_index_(integer_index),
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
pre_mapping_fbv_(pre_mapping_fbv) {}
~Editor() override = default;
@@ -211,6 +223,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
IntegerIndex& integer_index_; // Does not own.
+ int32_t num_data_threshold_for_bucket_split_;
+
// Flag indicating whether memory map max possible file size for underlying
// FileBackedVector before growing the actual file size.
bool pre_mapping_fbv_;
@@ -226,7 +240,7 @@ class IntegerIndex : public NumericIndex<int64_t> {
wildcard_property_storage,
std::unordered_set<std::string> wildcard_properties_set,
std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage,
- bool pre_mapping_fbv)
+ int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv)
: NumericIndex<int64_t>(filesystem, std::move(working_path),
kWorkingPathType),
posting_list_serializer_(std::move(posting_list_serializer)),
@@ -235,15 +249,22 @@ class IntegerIndex : public NumericIndex<int64_t> {
wildcard_property_storage_(std::move(wildcard_property_storage)),
wildcard_properties_set_(std::move(wildcard_properties_set)),
wildcard_index_storage_(std::move(wildcard_index_storage)),
- pre_mapping_fbv_(pre_mapping_fbv) {}
+ num_data_threshold_for_bucket_split_(
+ num_data_threshold_for_bucket_split),
+ pre_mapping_fbv_(pre_mapping_fbv),
+ is_info_dirty_(false),
+ is_storage_dirty_(false) {}
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
bool pre_mapping_fbv);
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem& filesystem,
- std::string&& working_path, bool pre_mapping_fbv);
+ std::string&& working_path,
+ int32_t num_data_threshold_for_bucket_split,
+ bool pre_mapping_fbv);
// Adds the property path to the list of properties using wildcard storage.
// This will both update the in-memory list (wildcard_properties_set_) and
@@ -296,20 +317,20 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistStoragesToDisk() override;
+ libtextclassifier3::Status PersistStoragesToDisk(bool force) override;
// Flushes contents of metadata file.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status PersistMetadataToDisk() override;
+ libtextclassifier3::Status PersistMetadataToDisk(bool force) override;
// Computes and returns Info checksum.
//
// Returns:
// - Crc of the Info on success
- libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(bool force) override;
// Computes and returns all storages checksum. Checksums of (storage_crc,
// property_path) for all existing property paths will be combined together by
@@ -318,7 +339,8 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Returns:
// - Crc of all storages on success
// - INTERNAL_ERROR if any data inconsistency
- libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() override;
+ libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
+ bool force) override;
Crcs& crcs() override {
return *reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
@@ -340,6 +362,17 @@ class IntegerIndex : public NumericIndex<int64_t> {
kInfoMetadataFileOffset);
}
+ void SetInfoDirty() { is_info_dirty_ = true; }
+ // When storage is dirty, we have to set info dirty as well. So just expose
+ // SetDirty to set both.
+ void SetDirty() {
+ is_info_dirty_ = true;
+ is_storage_dirty_ = true;
+ }
+
+ bool is_info_dirty() const { return is_info_dirty_; }
+ bool is_storage_dirty() const { return is_storage_dirty_; }
+
std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
@@ -360,9 +393,14 @@ class IntegerIndex : public NumericIndex<int64_t> {
// kMaxPropertyStorages in property_to_storage_map.
std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
+ int32_t num_data_threshold_for_bucket_split_;
+
// Flag indicating whether memory map max possible file size for underlying
// FileBackedVector before growing the actual file size.
bool pre_mapping_fbv_;
+
+ bool is_info_dirty_;
+ bool is_storage_dirty_;
};
} // namespace lib