diff options
author | Tim Barron <tjbarron@google.com> | 2023-03-13 17:00:29 -0700 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2023-03-14 08:41:33 -0700 |
commit | d5c81ae0c41ae9c1aefb3601f3836570b9f686c7 (patch) | |
tree | 75040182238304328ac85e570c0baae90b7fe53e /icing/index/numeric/integer-index.h | |
parent | 3fe6aa4251989fb27863fdbf51e18d8c1f9e42dd (diff) | |
download | icing-d5c81ae0c41ae9c1aefb3601f3836570b9f686c7.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
Cache an instance of UBreakIterator to reduce unnecessary creations.
========================================================================
Cap number of individual IntegerIndexStorages that IntegerIndex creates.
========================================================================
Change error in trimRightMostNode from Unimplemented to InvalidArgument.
========================================================================
Add detection for new language features of List Filters Query Language.
========================================================================
Add option to control threshold to rebuild index during optimize by flag
========================================================================
Add option to control use of namespace id to build urimapper by flag.
========================================================================
Enforce schema validation for joinable config.
========================================================================
Adopt bucket splitting for IntegerIndexStorage.
========================================================================
Implement bucket splitting function.
========================================================================
Add Icing initialization unit tests for QualifiedIdTypeJoinableIndex.
========================================================================
Add Icing schema change unit tests for QualifiedIdTypeJoinableIndex.
========================================================================
Add Icing optimization unit tests for QualifiedIdTypeJoinableIndex.
========================================================================
Integrate QualifiedIdTypeJoinableIndex into IcingSearchEngine.
========================================================================
Implement QualifiedIdJoinablePropertyIndexingHandler.
========================================================================
Change QualifiedIdTypeJoinableIndex to store raw qualified id string.
========================================================================
Pass info about unnormalized query terms through lexer/parser/visitor.
========================================================================
Bug: 208654892
Bug: 263890397
Bug: 259743562
Bug: 272145329
Bug: 227356108
Change-Id: I438a390ddda5673cf2b5781af502f2b7cfeaee74
Diffstat (limited to 'icing/index/numeric/integer-index.h')
-rw-r--r-- | icing/index/numeric/integer-index.h | 87 |
1 files changed, 78 insertions, 9 deletions
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h index 050a143..303bb41 100644 --- a/icing/index/numeric/integer-index.h +++ b/icing/index/numeric/integer-index.h @@ -23,12 +23,16 @@ #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/file/file-backed-proto.h" #include "icing/file/filesystem.h" #include "icing/file/memory-mapped-file.h" #include "icing/index/numeric/integer-index-storage.h" #include "icing/index/numeric/numeric-index.h" #include "icing/index/numeric/posting-list-integer-index-serializer.h" +#include "icing/index/numeric/wildcard-property-storage.pb.h" +#include "icing/schema/schema-store.h" #include "icing/store/document-id.h" +#include "icing/store/document-store.h" #include "icing/util/crc32.h" namespace icing { @@ -46,6 +50,11 @@ class IntegerIndex : public NumericIndex<int64_t> { using PropertyToStorageMapType = std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>; + // Maximum number of individual property storages that this index will allow + // before falling back to placing hits for any new properties into the + // 'wildcard' storage. + static constexpr int kMaxPropertyStorages = 32; + struct Info { static constexpr int32_t kMagic = 0x238a3dcb; @@ -125,8 +134,9 @@ class IntegerIndex : public NumericIndex<int64_t> { // - NOT_FOUND_ERROR if the given property_path doesn't exist // - Any IntegerIndexStorage errors libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator( - std::string_view property_path, int64_t key_lower, - int64_t key_upper) const override; + std::string_view property_path, int64_t key_lower, int64_t key_upper, + const DocumentStore& document_store, + const SchemaStore& schema_store) const override; // Reduces internal file sizes by reclaiming space and ids of deleted // documents. Integer index will convert all data (hits) to the new document @@ -165,6 +175,11 @@ class IntegerIndex : public NumericIndex<int64_t> { } } + int num_property_indices() const override { + return property_to_storage_map_.size() + + ((wildcard_index_storage_ == nullptr) ? 0 : 1); + } + private: class Editor : public NumericIndex<int64_t>::Editor { public: @@ -191,17 +206,24 @@ class IntegerIndex : public NumericIndex<int64_t> { IntegerIndex& integer_index_; // Does not own. }; - explicit IntegerIndex(const Filesystem& filesystem, - std::string&& working_path, - std::unique_ptr<PostingListIntegerIndexSerializer> - posting_list_serializer, - std::unique_ptr<MemoryMappedFile> metadata_mmapped_file, - PropertyToStorageMapType&& property_to_storage_map) + explicit IntegerIndex( + const Filesystem& filesystem, std::string&& working_path, + std::unique_ptr<PostingListIntegerIndexSerializer> + posting_list_serializer, + std::unique_ptr<MemoryMappedFile> metadata_mmapped_file, + PropertyToStorageMapType&& property_to_storage_map, + std::unique_ptr<FileBackedProto<WildcardPropertyStorage>> + wildcard_property_storage, + std::unordered_set<std::string> wildcard_properties_set, + std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage) : NumericIndex<int64_t>(filesystem, std::move(working_path), kWorkingPathType), posting_list_serializer_(std::move(posting_list_serializer)), metadata_mmapped_file_(std::move(metadata_mmapped_file)), - property_to_storage_map_(std::move(property_to_storage_map)) {} + property_to_storage_map_(std::move(property_to_storage_map)), + wildcard_property_storage_(std::move(wildcard_property_storage)), + wildcard_properties_set_(std::move(wildcard_properties_set)), + wildcard_index_storage_(std::move(wildcard_index_storage)) {} static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>> InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path); @@ -210,6 +232,17 @@ class IntegerIndex : public NumericIndex<int64_t> { InitializeExistingFiles(const Filesystem& filesystem, std::string&& working_path); + // Adds the property path to the list of properties using wildcard storage. + // This will both update the in-memory list (wildcard_properties_set_) and + // the persistent list (wilcard_property_storage_). + // + // RETURNS: + // - OK on success + // - INTERNAL_ERROR if unable to successfully persist updated properties + // list in wildcard_property_storage_. + libtextclassifier3::Status AddPropertyToWildcardStorage( + const std::string& property_path); + // Transfers integer index data from the current integer index to // new_integer_index. // @@ -222,6 +255,29 @@ class IntegerIndex : public NumericIndex<int64_t> { const std::vector<DocumentId>& document_id_old_to_new, IntegerIndex* new_integer_index) const; + // Transfers integer index data from old_storage to new_integer_index. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error. This could potentially leave the storages + // in an invalid state and the caller should handle it properly (e.g. + // discard and rebuild) + libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>> + TransferIntegerIndexStorage( + const std::vector<DocumentId>& document_id_old_to_new, + const IntegerIndexStorage* old_storage, const std::string& property_path, + IntegerIndex* new_integer_index) const; + + // Transfers the persistent and in-memory list of properties using the + // wildcard storage from old_storage to new_integer_index. + // + // RETURNS: + // - OK on success + // - INTERNAL_ERROR if unable to successfully persist updated properties + // list in new_integer_index. + libtextclassifier3::Status TransferWildcardStorage( + IntegerIndex* new_integer_index) const; + // Flushes contents of all storages to underlying files. // // Returns: @@ -277,6 +333,19 @@ class IntegerIndex : public NumericIndex<int64_t> { // Property path to integer index storage map. PropertyToStorageMapType property_to_storage_map_; + + // Persistent list of properties that have added content to + // wildcard_index_storage_. + std::unique_ptr<FileBackedProto<WildcardPropertyStorage>> + wildcard_property_storage_; + + // In-memory list of properties that have added content to + // wildcard_index_storage_. + std::unordered_set<std::string> wildcard_properties_set_; + + // The index storage that is used once we have already created + // kMaxPropertyStorages in property_to_storage_map. + std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_; }; } // namespace lib |