aboutsummaryrefslogtreecommitdiff
path: root/icing/index/numeric/integer-index.h
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2023-03-13 17:00:29 -0700
committerTim Barron <tjbarron@google.com>2023-03-14 08:41:33 -0700
commitd5c81ae0c41ae9c1aefb3601f3836570b9f686c7 (patch)
tree75040182238304328ac85e570c0baae90b7fe53e /icing/index/numeric/integer-index.h
parent3fe6aa4251989fb27863fdbf51e18d8c1f9e42dd (diff)
downloadicing-d5c81ae0c41ae9c1aefb3601f3836570b9f686c7.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== Cache an instance of UBreakIterator to reduce unnecessary creations. ======================================================================== Cap number of individual IntegerIndexStorages that IntegerIndex creates. ======================================================================== Change error in trimRightMostNode from Unimplemented to InvalidArgument. ======================================================================== Add detection for new language features of List Filters Query Language. ======================================================================== Add option to control threshold to rebuild index during optimize by flag ======================================================================== Add option to control use of namespace id to build urimapper by flag. ======================================================================== Enforce schema validation for joinable config. ======================================================================== Adopt bucket splitting for IntegerIndexStorage. ======================================================================== Implement bucket splitting function. ======================================================================== Add Icing initialization unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Add Icing schema change unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Add Icing optimization unit tests for QualifiedIdTypeJoinableIndex. ======================================================================== Integrate QualifiedIdTypeJoinableIndex into IcingSearchEngine. ======================================================================== Implement QualifiedIdJoinablePropertyIndexingHandler. ======================================================================== Change QualifiedIdTypeJoinableIndex to store raw qualified id string. ======================================================================== Pass info about unnormalized query terms through lexer/parser/visitor. ======================================================================== Bug: 208654892 Bug: 263890397 Bug: 259743562 Bug: 272145329 Bug: 227356108 Change-Id: I438a390ddda5673cf2b5781af502f2b7cfeaee74
Diffstat (limited to 'icing/index/numeric/integer-index.h')
-rw-r--r--icing/index/numeric/integer-index.h87
1 files changed, 78 insertions, 9 deletions
diff --git a/icing/index/numeric/integer-index.h b/icing/index/numeric/integer-index.h
index 050a143..303bb41 100644
--- a/icing/index/numeric/integer-index.h
+++ b/icing/index/numeric/integer-index.h
@@ -23,12 +23,16 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/index/numeric/integer-index-storage.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/index/numeric/wildcard-property-storage.pb.h"
+#include "icing/schema/schema-store.h"
#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -46,6 +50,11 @@ class IntegerIndex : public NumericIndex<int64_t> {
using PropertyToStorageMapType =
std::unordered_map<std::string, std::unique_ptr<IntegerIndexStorage>>;
+ // Maximum number of individual property storages that this index will allow
+ // before falling back to placing hits for any new properties into the
+ // 'wildcard' storage.
+ static constexpr int kMaxPropertyStorages = 32;
+
struct Info {
static constexpr int32_t kMagic = 0x238a3dcb;
@@ -125,8 +134,9 @@ class IntegerIndex : public NumericIndex<int64_t> {
// - NOT_FOUND_ERROR if the given property_path doesn't exist
// - Any IntegerIndexStorage errors
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
- std::string_view property_path, int64_t key_lower,
- int64_t key_upper) const override;
+ std::string_view property_path, int64_t key_lower, int64_t key_upper,
+ const DocumentStore& document_store,
+ const SchemaStore& schema_store) const override;
// Reduces internal file sizes by reclaiming space and ids of deleted
// documents. Integer index will convert all data (hits) to the new document
@@ -165,6 +175,11 @@ class IntegerIndex : public NumericIndex<int64_t> {
}
}
+ int num_property_indices() const override {
+ return property_to_storage_map_.size() +
+ ((wildcard_index_storage_ == nullptr) ? 0 : 1);
+ }
+
private:
class Editor : public NumericIndex<int64_t>::Editor {
public:
@@ -191,17 +206,24 @@ class IntegerIndex : public NumericIndex<int64_t> {
IntegerIndex& integer_index_; // Does not own.
};
- explicit IntegerIndex(const Filesystem& filesystem,
- std::string&& working_path,
- std::unique_ptr<PostingListIntegerIndexSerializer>
- posting_list_serializer,
- std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
- PropertyToStorageMapType&& property_to_storage_map)
+ explicit IntegerIndex(
+ const Filesystem& filesystem, std::string&& working_path,
+ std::unique_ptr<PostingListIntegerIndexSerializer>
+ posting_list_serializer,
+ std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+ PropertyToStorageMapType&& property_to_storage_map,
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage,
+ std::unordered_set<std::string> wildcard_properties_set,
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage)
: NumericIndex<int64_t>(filesystem, std::move(working_path),
kWorkingPathType),
posting_list_serializer_(std::move(posting_list_serializer)),
metadata_mmapped_file_(std::move(metadata_mmapped_file)),
- property_to_storage_map_(std::move(property_to_storage_map)) {}
+ property_to_storage_map_(std::move(property_to_storage_map)),
+ wildcard_property_storage_(std::move(wildcard_property_storage)),
+ wildcard_properties_set_(std::move(wildcard_properties_set)),
+ wildcard_index_storage_(std::move(wildcard_index_storage)) {}
static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem& filesystem, std::string&& working_path);
@@ -210,6 +232,17 @@ class IntegerIndex : public NumericIndex<int64_t> {
InitializeExistingFiles(const Filesystem& filesystem,
std::string&& working_path);
+ // Adds the property path to the list of properties using wildcard storage.
+ // This will both update the in-memory list (wildcard_properties_set_) and
+ // the persistent list (wilcard_property_storage_).
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in wildcard_property_storage_.
+ libtextclassifier3::Status AddPropertyToWildcardStorage(
+ const std::string& property_path);
+
// Transfers integer index data from the current integer index to
// new_integer_index.
//
@@ -222,6 +255,29 @@ class IntegerIndex : public NumericIndex<int64_t> {
const std::vector<DocumentId>& document_id_old_to_new,
IntegerIndex* new_integer_index) const;
+ // Transfers integer index data from old_storage to new_integer_index.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error. This could potentially leave the storages
+ // in an invalid state and the caller should handle it properly (e.g.
+ // discard and rebuild)
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ TransferIntegerIndexStorage(
+ const std::vector<DocumentId>& document_id_old_to_new,
+ const IntegerIndexStorage* old_storage, const std::string& property_path,
+ IntegerIndex* new_integer_index) const;
+
+ // Transfers the persistent and in-memory list of properties using the
+ // wildcard storage from old_storage to new_integer_index.
+ //
+ // RETURNS:
+ // - OK on success
+ // - INTERNAL_ERROR if unable to successfully persist updated properties
+ // list in new_integer_index.
+ libtextclassifier3::Status TransferWildcardStorage(
+ IntegerIndex* new_integer_index) const;
+
// Flushes contents of all storages to underlying files.
//
// Returns:
@@ -277,6 +333,19 @@ class IntegerIndex : public NumericIndex<int64_t> {
// Property path to integer index storage map.
PropertyToStorageMapType property_to_storage_map_;
+
+ // Persistent list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unique_ptr<FileBackedProto<WildcardPropertyStorage>>
+ wildcard_property_storage_;
+
+ // In-memory list of properties that have added content to
+ // wildcard_index_storage_.
+ std::unordered_set<std::string> wildcard_properties_set_;
+
+ // The index storage that is used once we have already created
+ // kMaxPropertyStorages in property_to_storage_map.
+ std::unique_ptr<icing::lib::IntegerIndexStorage> wildcard_index_storage_;
};
} // namespace lib