aboutsummaryrefslogtreecommitdiff
path: root/icing/index/numeric/integer-index.h
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2023-03-01 00:41:52 -0800
committerTerry Wang <tytytyww@google.com>2023-03-01 00:41:52 -0800
commite103b8ea56212b2a5abc082ce888843f19c7d567 (patch)
tree6621759619d3adf3d7285e110969ee082d7d2e0e /icing/index/numeric/integer-index.h
parent5a41ca990be33387b0d5d15836a465bbe5ff5a28 (diff)
downloadicing-e103b8ea56212b2a5abc082ce888843f19c7d567.tar.gz
Update Icing from upstream.
Descriptions: ====================================================================== Add PropertyUtil for all property name/path related operations ====================================================================== [JoinableCache][2.0/x] Create SchemaPropertyIterator ====================================================================== [JoinableCache][2.1/x] Handle nested indexable flag ====================================================================== [JoinableCache][2.2/x] Add schema cycle dependency detection for SchemaPropertyIterator ====================================================================== [JoinableCache][3.0/x] Refactor SectionManager ====================================================================== [JoinableCache][3.1/x] Add unit tests for SectionManager::Builder and SchemaTypeManager ====================================================================== [NumericSearch][Storage][12/x] Implement Edit and GetIterator for IntegerIndex ====================================================================== [NumericSearch][Storage][13.0/x] Rename numeric-index_test as integer-index_test ====================================================================== [NumericSearch][Storage][13.1/x] Add IntegerIndexTest ====================================================================== Support the "len", "sum" and "avg" functions in advanced scoring. ====================================================================== Support the "this.childrenScores()" function to allow expressing children scores of joins in advanced scoring. ====================================================================== Create an integration test for Join with advanced scoring ====================================================================== Rename the word "children" to "args" for function related ScoreExpression ====================================================================== Improve IndexBlock by PRead/PWrite instead of repeating mmap/msync/unmap ====================================================================== Refactor QueryVisitor to prepare for support for function calls. ====================================================================== Add support for function calls. ====================================================================== Fix breakage in score-and-rank_benchmark. ====================================================================== [NumericSearch][Storage][adhoc][ez] Fix comment for IntegerIndex ====================================================================== [NumericSearch][Storage][14/x] Create first IntegerIndexStorage benchmark ====================================================================== Rename Icing schema related terminology to prepare for polymorphism support ====================================================================== [JoinableCache][4.0/x] Move common methods from SectionManager to PropertyUtil ====================================================================== [JoinableCache][4.1/x] Retire GetSectionContent ====================================================================== [JoinableCache][4.2/x] Polish SectionManagerTest ====================================================================== Modify QueryVisitor to do: ====================================================================== [NumericSearch][Storage][15/x] Implement TransferIndex for IntegerIndexStorage ====================================================================== [NumericSearch][Storage][16/x] Implement Optimize and last added document id for IntegerIndex ====================================================================== [NumericSearch][rollout][1/x] Include indexable int64 into SchemaDelta and backward compatibility ====================================================================== Add backwards compatibility test for Icing schema storage migration. ====================================================================== Implement trim the right-most node from the doc-hit-info-iterator. ====================================================================== Add TrimmedNode structure into doc-hit-info-iterator. ====================================================================== [JoinableCache][5/x] Implement JoinableProperty and JoinablePropertyManager ====================================================================== [JoinableCache][6/x] Add JoinablePropertyManager into SchemaTypeManager ====================================================================== [JoinableCache][7/x] Implement ExtractJoinableProperties ====================================================================== [JoinableCache][8/x] Create class QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][9/x] Implement factory method for QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][10/x] Implement Get and Put for QualifiedIdTypeJoinableCache ====================================================================== [JoinableCache][11/x] Add unit tests for QualifiedIdTypeJoinableCache ====================================================================== Modify DocHitInfoIteratorSectionRestrict to allow multi-property restricts ====================================================================== Fix the definition of LiteIndex::WantsMerge. ====================================================================== [NumericSearch][rollout][2.0/x] Rollout persistent IntegerIndex ====================================================================== [NumericSearch][rollout][2.1/x] Add more tests for integer index restoration and optimization ====================================================================== [JoinableCache][adhoc][ez] Remove qualified id type joinable cache size info from document storage info ====================================================================== Integrate trim right node into suggestion processor. Bug: 208654892 Bug: 228240987 Bug: 249829533 Bug: 256081830 Bug: 259744228 Bug: 261474063 Bug: 263890397 Bug: 266103594 Bug: 268738297 Bug: 269295094 Change-Id: I5f1b3f3ed0b5d6933dc8c2ab3279904f7706b23e
Diffstat (limited to 'icing/index/numeric/integer-index.h')
-rw-r--r--icing/index/numeric/integer-index.h119
1 files changed, 107 insertions, 12 deletions
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index a00d339..98c26ef 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -90,25 +90,107 @@ class IntegerIndex : public NumericIndex<int64_t> {
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> Create(
const Filesystem& filesystem, std::string working_path);
+ // Deletes IntegerIndex under working_path.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ static libtextclassifier3::Status Discard(const Filesystem& filesystem,
+ const std::string& working_path) {
+ return PersistentStorage::Discard(filesystem, working_path,
+ kWorkingPathType);
+ }
+
~IntegerIndex() override;
- // TODO(b/249829533): implement these functions and add comments.
+ // Returns an Editor instance for adding new records into integer index for a
+ // given property, DocumentId and SectionId. See Editor for more details.
std::unique_ptr<typename NumericIndex<int64_t>::Editor> Edit(
std::string_view property_path, DocumentId document_id,
- SectionId section_id) override;
+ SectionId section_id) override {
+ return std::make_unique<Editor>(property_path, document_id, section_id,
+ *this);
+ }
+ // Returns a DocHitInfoIterator for iterating through all docs which have the
+ // specified (integer) property contents in range [query_key_lower,
+ // query_key_upper].
+ // When iterating through all relevant doc hits, it:
+ // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+ // SectionIdMask and constructs DocHitInfo.
+ // - Returns DocHitInfo in descending DocumentId order.
+ //
+ // Returns:
+ // - On success: a DocHitInfoIterator instance
+ // - NOT_FOUND_ERROR if the given property_path doesn't exist
+ // - Any IntegerIndexStorage errors
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
std::string_view property_path, int64_t key_lower,
int64_t key_upper) const override;
- // Clears all integer index data.
+ // Reduces internal file sizes by reclaiming space and ids of deleted
+ // documents. Integer index will convert all data (hits) to the new document
+ // ids and regenerate all index files. If all data in a property path are
+ // completely deleted, then the underlying storage will be discarded as well.
+ //
+ // - document_id_old_to_new: a map for converting old document id to new
+ // document id.
+ // - new_last_added_document_id: will be used to update the last added
+ // document id in the integer index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on IO error
+ libtextclassifier3::Status Optimize(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ DocumentId new_last_added_document_id) override;
+
+ // Clears all integer index data by discarding all existing storages, and set
+ // last_added_document_id to kInvalidDocumentId.
//
// Returns:
// - OK on success
// - INTERNAL_ERROR on I/O error
- libtextclassifier3::Status Reset() override;
+ libtextclassifier3::Status Clear() override;
+
+ DocumentId last_added_document_id() const override {
+ return info().last_added_document_id;
+ }
+
+ void set_last_added_document_id(DocumentId document_id) override {
+ Info& info_ref = info();
+ if (info_ref.last_added_document_id == kInvalidDocumentId ||
+ document_id > info_ref.last_added_document_id) {
+ info_ref.last_added_document_id = document_id;
+ }
+ }
private:
+ class Editor : public NumericIndex<int64_t>::Editor {
+ public:
+ explicit Editor(std::string_view property_path, DocumentId document_id,
+ SectionId section_id, IntegerIndex& integer_index)
+ : NumericIndex<int64_t>::Editor(property_path, document_id, section_id),
+ integer_index_(integer_index) {}
+
+ ~Editor() override = default;
+
+ libtextclassifier3::Status BufferKey(int64_t key) override {
+ seen_keys_.push_back(key);
+ return libtextclassifier3::Status::OK;
+ }
+
+ libtextclassifier3::Status IndexAllBufferedKeys() && override;
+
+ private:
+ // Vector for caching all seen keys. Since IntegerIndexStorage::AddKeys
+ // sorts and dedupes keys, we can just simply use vector here and move it to
+ // AddKeys().
+ std::vector<int64_t> seen_keys_;
+
+ IntegerIndex& integer_index_; // Does not own.
+ };
+
explicit IntegerIndex(const Filesystem& filesystem,
std::string&& working_path,
std::unique_ptr<PostingListIntegerIndexSerializer>
@@ -128,6 +210,18 @@ class IntegerIndex : public NumericIndex<int64_t> {
InitializeExistingFiles(const Filesystem& filesystem,
std::string&& working_path);
+ // Transfers integer index data from the current integer index to
+ // new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it property (e.g.
+ // discard and rebuild)
+ libtextclassifier3::Status TransferIndex(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ IntegerIndex* new_integer_index) const;
+
// Flushes contents of all storages to underlying files.
//
// Returns:
@@ -148,8 +242,9 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - Crc of the Info on success
libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() override;
- // Computes and returns all storages checksum. Checksums of bucket_storage_,
- // entry_storage_ and kv_storage_ will be combined together by XOR.
+ // Computes and returns all storages checksum. Checksums of (storage_crc,
+ // property_path) for all existing property paths will be combined together by
+ // XOR.
//
// Returns:
// - Crc of all storages on success
@@ -166,14 +261,14 @@ class IntegerIndex : public NumericIndex<int64_t> {
kCrcsMetadataFileOffset);
}
- Info* info() {
- return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
- kInfoMetadataFileOffset);
+ Info& info() {
+ return *reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ kInfoMetadataFileOffset);
}
- const Info* info() const {
- return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
- kInfoMetadataFileOffset);
+ const Info& info() const {
+ return *reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ kInfoMetadataFileOffset);
}
std::unique_ptr<PostingListIntegerIndexSerializer> posting_list_serializer_;