aboutsummaryrefslogtreecommitdiff
path: root/icing/index/numeric
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2023-01-23 14:54:45 -0800
committerTerry Wang <tytytyww@google.com>2023-01-23 14:54:45 -0800
commitcccafab8dfcae94d7072eb49ea971e3c688bdfc4 (patch)
treec5658335e3a0287ab3a7b25a3f18585920515f75 /icing/index/numeric
parentec29b14d5a908748d6e0699df7c85842a95a7b3c (diff)
downloadicing-cccafab8dfcae94d7072eb49ea971e3c688bdfc4.tar.gz
Update Icing from upstream.
Descriptions: ====================================================================== Fix time complexity regression for snippet retriever ====================================================================== Add support for QueryTerms SectionRestrict Map in Advanced Query. ====================================================================== [NumericSearch][Storage][9/x] Create unit tests for AddKeys and GetIterator ====================================================================== [NumericSearch][Storage][8/x] Add options ====================================================================== Fix the stack overflow issue for icing advanced scoring/query parsers/visitors by limiting the number of tokens allowed ====================================================================== Improves the has joinspec check ====================================================================== Fix the bug of DocHitInfoIteratorTermMain found by monkey test ====================================================================== Rename tests defined in DestructibleDirectoryTest to include "directory" instead of "file". ====================================================================== Support projection in icing monkey test ====================================================================== [NumericSearch][Storage][7/x] Implement iterator for query ====================================================================== [NumericSearch][Storage][6/x] Implement AddKeys ====================================================================== Add support for relevance scoring by enabling term_frequency retrieval in DocHitInfoIterators and by populating the QueryTermIteratorsMap. ====================================================================== [JoinableCache][1/x] Compute SchemaDelta for joinable properties ====================================================================== [NumericSearch][Storage][5/x] PersistToDisk ====================================================================== [NumericSearch][Storage][4.1/x] Create unit tests for initialize ====================================================================== [NumericSearch][Storage][4.0/x] Implement Create and Initialize functions ====================================================================== Rename PostingListUsedSerializer related classes Bug: 193244409 Bug: 208654892 Bug: 246984163 Bug: 249829533 Bug: 256022027 Bug: 265258364 Bug: 265834832 Change-Id: Ibc7011a793ef5ed09eace0fb05d168adf5b4faca
Diffstat (limited to 'icing/index/numeric')
-rw-r--r--icing/index/numeric/integer-index-storage.cc872
-rw-r--r--icing/index/numeric/integer-index-storage.h214
-rw-r--r--icing/index/numeric/integer-index-storage_test.cc1147
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.cc (renamed from icing/index/numeric/posting-list-integer-index-data-accessor.cc)30
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor.h (renamed from icing/index/numeric/posting-list-integer-index-data-accessor.h)38
-rw-r--r--icing/index/numeric/posting-list-integer-index-accessor_test.cc (renamed from icing/index/numeric/posting-list-integer-index-data-accessor_test.cc)94
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.cc (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer.cc)45
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer.h (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer.h)9
-rw-r--r--icing/index/numeric/posting-list-integer-index-serializer_test.cc (renamed from icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc)54
9 files changed, 2355 insertions, 148 deletions
diff --git a/icing/index/numeric/integer-index-storage.cc b/icing/index/numeric/integer-index-storage.cc
new file mode 100644
index 0000000..fa8fa3e
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage.cc
@@ -0,0 +1,872 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <queue>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/posting_list/flash-index-storage.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
+#include "icing/index/numeric/integer-index-data.h"
+#include "icing/index/numeric/numeric-index.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Helper function to PWrite crcs and info to metadata_file_path.
+libtextclassifier3::Status WriteMetadata(
+ const Filesystem& filesystem, const std::string& metadata_file_path,
+ const IntegerIndexStorage::Crcs* crcs,
+ const IntegerIndexStorage::Info* info) {
+ ScopedFd sfd(filesystem.OpenForWrite(metadata_file_path.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError("Failed to create metadata file");
+ }
+
+ // Write crcs and info. File layout: <Crcs><Info>
+ ICING_RETURN_IF_ERROR(crcs->Serialize(filesystem, sfd.get()));
+ ICING_RETURN_IF_ERROR(info->Serialize(filesystem, sfd.get()));
+
+ return libtextclassifier3::Status::OK;
+}
+
+// Helper function to update checksums from info and storages to a Crcs
+// instance.
+libtextclassifier3::Status UpdateChecksums(
+ IntegerIndexStorage::Crcs* crcs, IntegerIndexStorage::Info* info,
+ FileBackedVector<IntegerIndexStorage::Bucket>* sorted_buckets,
+ FileBackedVector<IntegerIndexStorage::Bucket>* unsorted_buckets,
+ FlashIndexStorage* flash_index_storage) {
+ // Compute crcs
+ ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
+ sorted_buckets->ComputeChecksum());
+ ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
+ unsorted_buckets->ComputeChecksum());
+
+ crcs->component_crcs.info_crc = info->ComputeChecksum().Get();
+ crcs->component_crcs.sorted_buckets_crc = sorted_buckets_crc.Get();
+ crcs->component_crcs.unsorted_buckets_crc = unsorted_buckets_crc.Get();
+ // TODO(b/259744228): implement and update flash_index_storage checksum
+ crcs->component_crcs.flash_index_storage_crc = 0;
+ crcs->all_crc = crcs->component_crcs.ComputeChecksum().Get();
+
+ return libtextclassifier3::Status::OK;
+}
+
+// Helper function to validate checksums.
+libtextclassifier3::Status ValidateChecksums(
+ const IntegerIndexStorage::Crcs* crcs,
+ const IntegerIndexStorage::Info* info,
+ FileBackedVector<IntegerIndexStorage::Bucket>* sorted_buckets,
+ FileBackedVector<IntegerIndexStorage::Bucket>* unsorted_buckets,
+ FlashIndexStorage* flash_index_storage) {
+ if (crcs->all_crc != crcs->component_crcs.ComputeChecksum().Get()) {
+ return absl_ports::FailedPreconditionError(
+ "Invalid all crc for IntegerIndexStorage");
+ }
+
+ if (crcs->component_crcs.info_crc != info->ComputeChecksum().Get()) {
+ return absl_ports::FailedPreconditionError(
+ "Invalid info crc for IntegerIndexStorage");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 sorted_buckets_crc,
+ sorted_buckets->ComputeChecksum());
+ if (crcs->component_crcs.sorted_buckets_crc != sorted_buckets_crc.Get()) {
+ return absl_ports::FailedPreconditionError(
+ "Mismatch crc with IntegerIndexStorage sorted buckets");
+ }
+
+ ICING_ASSIGN_OR_RETURN(Crc32 unsorted_buckets_crc,
+ unsorted_buckets->ComputeChecksum());
+ if (crcs->component_crcs.unsorted_buckets_crc != unsorted_buckets_crc.Get()) {
+ return absl_ports::FailedPreconditionError(
+ "Mismatch crc with IntegerIndexStorage unsorted buckets");
+ }
+
+ // TODO(b/259744228): implement and verify flash_index_storage checksum
+
+ return libtextclassifier3::Status::OK;
+}
+
+// The following 4 methods are helper functions to get the correct file path of
+// metadata/sorted_buckets/unsorted_buckets/flash_index_storage, according to
+// the given working directory.
+std::string GetMetadataFilePath(std::string_view working_dir) {
+ return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+ ".m");
+}
+
+std::string GetSortedBucketsFilePath(std::string_view working_dir) {
+ return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+ ".s");
+}
+
+std::string GetUnsortedBucketsFilePath(std::string_view working_dir) {
+ return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+ ".u");
+}
+
+std::string GetFlashIndexStorageFilePath(std::string_view working_dir) {
+ return absl_ports::StrCat(working_dir, "/", IntegerIndexStorage::kFilePrefix,
+ ".f");
+}
+
+} // namespace
+
+// We add (BasicHits, key) into a bucket in DocumentId descending and SectionId
+// ascending order. When doing range query, we may access buckets and want to
+// return BasicHits to callers sorted by DocumentId. Therefore, this problem is
+// actually "merge K sorted lists".
+// To implement this algorithm via priority_queue, we create this wrapper class
+// to store PostingListIntegerIndexAccessor for iterating through the posting
+// list chain.
+// - Non-relevant (i.e. not in range [key_lower, key_upper]) will be skipped.
+// - Relevant BasicHits will be returned.
+class BucketPostingListIterator {
+ public:
+ class Comparator {
+ public:
+ // REQUIRES: 2 BucketPostingListIterator* instances (lhs, rhs) should be
+ // valid, i.e. the preceding AdvanceAndFilter() succeeded.
+ bool operator()(const BucketPostingListIterator* lhs,
+ const BucketPostingListIterator* rhs) const {
+ // std::priority_queue is a max heap and we should return BasicHits in
+ // DocumentId descending order.
+ // - BucketPostingListIterator::operator< should have the same order as
+ // DocumentId.
+ // - BasicHit encodes inverted document id and BasicHit::operator<
+ // compares the encoded raw value directly.
+ // - Therefore, BucketPostingListIterator::operator< should compare
+ // BasicHit reversely.
+ // - This will make priority_queue return buckets in DocumentId
+ // descending and SectionId ascending order.
+ // - Whatever direction we sort SectionId by (or pop by priority_queue)
+ // doesn't matter because all hits for the same DocumentId will be
+ // merged into a single DocHitInfo.
+ return rhs->GetCurrentBasicHit() < lhs->GetCurrentBasicHit();
+ }
+ };
+
+ explicit BucketPostingListIterator(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor)
+ : pl_accessor_(std::move(pl_accessor)),
+ should_retrieve_next_batch_(true) {}
+
+ // Advances to the next relevant data. The posting list of a bucket contains
+ // keys within range [bucket.key_lower, bucket.key_upper], but some of them
+ // may be out of [query_key_lower, query_key_upper], so when advancing we have
+ // to filter out those non-relevant keys.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if reaching the end (i.e. no more relevant data)
+ // - Any other PostingListIntegerIndexAccessor errors
+ libtextclassifier3::Status AdvanceAndFilter(int64_t query_key_lower,
+ int64_t query_key_upper) {
+ // Move curr_ until reaching a relevant data (i.e. key in range
+ // [query_key_lower, query_key_upper])
+ do {
+ if (!should_retrieve_next_batch_) {
+ ++curr_;
+ should_retrieve_next_batch_ =
+ curr_ >= cached_batch_integer_index_data_.cend();
+ }
+ if (should_retrieve_next_batch_) {
+ ICING_RETURN_IF_ERROR(GetNextDataBatch());
+ should_retrieve_next_batch_ = false;
+ }
+ } while (curr_->key() < query_key_lower || curr_->key() > query_key_upper);
+
+ return libtextclassifier3::Status::OK;
+ }
+
+ const BasicHit& GetCurrentBasicHit() const { return curr_->basic_hit(); }
+
+ private:
+ // Gets next batch of data from the posting list chain, caches in
+ // cached_batch_integer_index_data_, and sets curr_ to the begin of the cache.
+ libtextclassifier3::Status GetNextDataBatch() {
+ auto cached_batch_integer_index_data_or = pl_accessor_->GetNextDataBatch();
+ if (!cached_batch_integer_index_data_or.ok()) {
+ ICING_LOG(WARNING)
+ << "Fail to get next batch data from posting list due to: "
+ << cached_batch_integer_index_data_or.status().error_message();
+ return std::move(cached_batch_integer_index_data_or).status();
+ }
+
+ cached_batch_integer_index_data_ =
+ std::move(cached_batch_integer_index_data_or).ValueOrDie();
+ curr_ = cached_batch_integer_index_data_.cbegin();
+
+ if (cached_batch_integer_index_data_.empty()) {
+ return absl_ports::OutOfRangeError("End of iterator");
+ }
+
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor_;
+ std::vector<IntegerIndexData> cached_batch_integer_index_data_;
+ std::vector<IntegerIndexData>::const_iterator curr_;
+ bool should_retrieve_next_batch_;
+};
+
+// Wrapper class to iterate through IntegerIndexStorage to get relevant data.
+// It uses multiple BucketPostingListIterator instances from different candidate
+// buckets and merges all relevant BasicHits from these buckets by
+// std::priority_queue in DocumentId descending order. Also different SectionIds
+// of the same DocumentId will be merged into SectionIdMask and returned as a
+// single DocHitInfo.
+class IntegerIndexStorageIterator : public NumericIndex<int64_t>::Iterator {
+ public:
+ explicit IntegerIndexStorageIterator(
+ int64_t query_key_lower, int64_t query_key_upper,
+ std::vector<std::unique_ptr<BucketPostingListIterator>>&& bucket_pl_iters)
+ : NumericIndex<int64_t>::Iterator(query_key_lower, query_key_upper) {
+ std::vector<BucketPostingListIterator*> bucket_pl_iters_raw_ptrs;
+ for (std::unique_ptr<BucketPostingListIterator>& bucket_pl_itr :
+ bucket_pl_iters) {
+ // Before adding BucketPostingListIterator* into the priority queue, we
+ // have to advance the bucket iterator to the first valid data since the
+ // priority queue needs valid data to compare the order.
+ // Note: it is possible that the bucket iterator fails to advance for the
+ // first round, because data could be filtered out by [query_key_lower,
+ // query_key_upper]. In this case, just discard the iterator.
+ if (bucket_pl_itr->AdvanceAndFilter(query_key_lower, query_key_upper)
+ .ok()) {
+ bucket_pl_iters_raw_ptrs.push_back(bucket_pl_itr.get());
+ bucket_pl_iters_.push_back(std::move(bucket_pl_itr));
+ }
+ }
+
+ pq_ = std::priority_queue<BucketPostingListIterator*,
+ std::vector<BucketPostingListIterator*>,
+ BucketPostingListIterator::Comparator>(
+ comparator_, std::move(bucket_pl_iters_raw_ptrs));
+ }
+
+ ~IntegerIndexStorageIterator() override = default;
+
+ // Advances to the next DocHitInfo. Note: several BucketPostingListIterator
+ // instances may be advanced if they point to data with the same DocumentId.
+ //
+ // Returns:
+ // - OK on success
+ // - OUT_OF_RANGE_ERROR if reaching the end (i.e. no more relevant data)
+ // - Any BucketPostingListIterator errors
+ libtextclassifier3::Status Advance() override;
+
+ DocHitInfo GetDocHitInfo() const override { return doc_hit_info_; }
+
+ private:
+ BucketPostingListIterator::Comparator comparator_;
+
+ // We have to fetch and pop the top BucketPostingListIterator from
+ // std::priority_queue to perform "merge K sorted lists algorithm".
+ // - Since std::priority_queue::pop() doesn't return the top element, we have
+ // to call top() and pop() together.
+ // - std::move the top() element by const_cast is not an appropriate way
+ // because it introduces transient unstable state for std::priority_queue.
+ // - We don't want to copy BucketPostingListIterator, either.
+ // - Therefore, add bucket_pl_iters_ for the ownership of all
+ // BucketPostingListIterator instances and std::priority_queue uses the raw
+ // pointer. So when calling top(), we can simply copy the raw pointer via
+ // top() and avoid transient unstable state.
+ std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters_;
+ std::priority_queue<BucketPostingListIterator*,
+ std::vector<BucketPostingListIterator*>,
+ BucketPostingListIterator::Comparator>
+ pq_;
+
+ DocHitInfo doc_hit_info_;
+};
+
+libtextclassifier3::Status IntegerIndexStorageIterator::Advance() {
+ if (pq_.empty()) {
+ return absl_ports::OutOfRangeError("End of iterator");
+ }
+
+ DocumentId document_id = pq_.top()->GetCurrentBasicHit().document_id();
+ doc_hit_info_ = DocHitInfo(document_id);
+ // Merge sections with same document_id into a single DocHitInfo
+ while (!pq_.empty() &&
+ pq_.top()->GetCurrentBasicHit().document_id() == document_id) {
+ doc_hit_info_.UpdateSection(pq_.top()->GetCurrentBasicHit().section_id());
+
+ BucketPostingListIterator* bucket_itr = pq_.top();
+ pq_.pop();
+
+ if (bucket_itr->AdvanceAndFilter(key_lower_, key_upper_).ok()) {
+ pq_.push(bucket_itr);
+ }
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+bool IntegerIndexStorage::Options::IsValid() const {
+ if (!HasCustomInitBuckets()) {
+ return true;
+ }
+
+ // Verify if the range of buckets are disjoint and the range union is
+ // [INT64_MIN, INT64_MAX].
+ std::vector<Bucket> buckets;
+ buckets.reserve(custom_init_sorted_buckets.size() +
+ custom_init_unsorted_buckets.size());
+ buckets.insert(buckets.end(), custom_init_sorted_buckets.begin(),
+ custom_init_sorted_buckets.end());
+ buckets.insert(buckets.end(), custom_init_unsorted_buckets.begin(),
+ custom_init_unsorted_buckets.end());
+ if (buckets.empty()) {
+ return false;
+ }
+ std::sort(buckets.begin(), buckets.end());
+ int64_t expected_lower = std::numeric_limits<int64_t>::min();
+ for (int i = 0; i < buckets.size(); ++i) {
+ // key_lower should not be greater than key_upper and init bucket should
+ // have invalid posting list identifier.
+ if (buckets[i].key_lower() > buckets[i].key_upper() ||
+ buckets[i].posting_list_identifier().is_valid()) {
+ return false;
+ }
+
+ if (buckets[i].key_lower() != expected_lower) {
+ return false;
+ }
+
+ // If it is the last bucket, then key_upper should be INT64_MAX. Otherwise
+ // it should not be INT64_MAX. Use XOR for this logic.
+ if ((buckets[i].key_upper() == std::numeric_limits<int64_t>::max()) ^
+ (i == buckets.size() - 1)) {
+ return false;
+ }
+ expected_lower = buckets[i].key_upper() + 1;
+ }
+
+ return true;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::Create(
+ const Filesystem& filesystem, std::string_view base_dir, Options options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ if (!options.IsValid()) {
+ return absl_ports::InvalidArgumentError(
+ "Invalid IntegerIndexStorage options");
+ }
+
+ std::string working_dir = absl_ports::StrCat(base_dir, "/", kSubDirectory);
+ if (!filesystem.FileExists(GetMetadataFilePath(working_dir).c_str()) ||
+ !filesystem.FileExists(GetSortedBucketsFilePath(working_dir).c_str()) ||
+ !filesystem.FileExists(GetUnsortedBucketsFilePath(working_dir).c_str()) ||
+ !filesystem.FileExists(
+ GetFlashIndexStorageFilePath(working_dir).c_str())) {
+ // Delete working_dir if any of them is missing, and reinitialize.
+ if (!filesystem.DeleteDirectoryRecursively(working_dir.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to delete directory: ", working_dir));
+ }
+ return InitializeNewFiles(filesystem, std::move(working_dir),
+ std::move(options), posting_list_serializer);
+ }
+ return InitializeExistingFiles(filesystem, std::move(working_dir),
+ std::move(options), posting_list_serializer);
+}
+
+IntegerIndexStorage::~IntegerIndexStorage() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING)
+ << "Failed to persist hash map to disk while destructing "
+ << working_dir_;
+ }
+}
+
+class IntegerIndexStorageComparator {
+ public:
+ bool operator()(const IntegerIndexStorage::Bucket& lhs, int64_t rhs) const {
+ return lhs.key_upper() < rhs;
+ }
+} kComparator;
+
+libtextclassifier3::Status IntegerIndexStorage::AddKeys(
+ DocumentId document_id, SectionId section_id,
+ std::vector<int64_t>&& new_keys) {
+ if (new_keys.empty()) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ std::sort(new_keys.begin(), new_keys.end());
+
+ // Dedupe
+ auto last = std::unique(new_keys.begin(), new_keys.end());
+ new_keys.erase(last, new_keys.end());
+
+ // When adding keys into a bucket, we potentially split it into 2 new buckets
+ // and one of them will be added into the unsorted bucket array.
+ // When handling keys belonging to buckets in the unsorted bucket array, we
+ // don't have to (and must not) handle these newly split buckets. Therefore,
+ // collect all newly split buckets in another vector and append them into the
+ // unsorted bucket array after adding all keys.
+ std::vector<Bucket> new_buckets;
+
+ // Binary search range of the sorted bucket array.
+ const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+ const Bucket* sorted_bucket_arr_end =
+ sorted_buckets_->array() + sorted_buckets_->num_elements();
+
+ // Step 1: handle keys belonging to buckets in the sorted bucket array. Skip
+ // keys belonging to the unsorted bucket array and deal with them in
+ // the next step.
+ // - Iterate through new_keys by it_start.
+ // - Binary search (std::lower_bound comparing key with bucket.key_upper()) to
+ // find the first bucket in the sorted bucket array with key_upper is not
+ // smaller than (>=) the key.
+ // - Skip (and advance it_start) all keys smaller than the target bucket's
+ // key_lower. It means these keys belong to buckets in the unsorted bucket
+ // array and we will deal with them later.
+ // - Find it_end such that all keys within range [it_start, it_end) belong to
+ // the target bucket.
+ // - Batch add keys within range [it_start, it_end) into the target bucket.
+ auto it_start = new_keys.cbegin();
+ while (it_start != new_keys.cend() &&
+ sorted_bucket_arr_begin < sorted_bucket_arr_end) {
+ // Use std::lower_bound to find the first bucket in the sorted bucket array
+ // with key_upper >= *it_start.
+ const Bucket* target_bucket = std::lower_bound(
+ sorted_bucket_arr_begin, sorted_bucket_arr_end, *it_start, kComparator);
+ if (target_bucket >= sorted_bucket_arr_end) {
+ // Keys in range [it_start, new_keys.cend()) are greater than all sorted
+ // buckets' key_upper, so we can end step 1. In fact, they belong to
+ // buckets in the unsorted bucket array and we will deal with them in
+ // step 2.
+ break;
+ }
+
+ // Sequential instead of binary search to advance it_start and it_end for
+ // several reasons:
+ // - Eventually we have to iterate through all keys within range [it_start,
+ // it_end) and add them into the posting list, so binary search doesn't
+ // improve the overall time complexity.
+ // - Binary search may jump to far-away indices, which potentially
+ // downgrades the cache performance.
+
+ // After binary search, we've ensured *it_start <=
+ // target_bucket->key_upper(), but it is still possible that *it_start (and
+ // the next several keys) is still smaller than target_bucket->key_lower(),
+ // so we have to skip them. In fact, they belong to buckets in the unsorted
+ // bucket array.
+ //
+ // For example:
+ // - sorted bucket array: [(INT_MIN, 0), (1, 5), (100, 300), (301, 550)]
+ // - unsorted bucket array: [(550, INT_MAX), (6, 99)]
+ // - new_keys: [10, 20, 40, 102, 150, 200, 500, 600]
+ // std::lower_bound (target = 10) will get target_bucket = (100, 300), but
+ // we have to skip 10, 20, 40 because they are smaller than 100 (the
+ // bucket's key_lower). We should move it_start pointing to key 102.
+ while (it_start != new_keys.cend() &&
+ *it_start < target_bucket->key_lower()) {
+ ++it_start;
+ }
+
+ // Locate it_end such that all keys within range [it_start, it_end) belong
+ // to target_bucket and all keys outside this range don't belong to
+ // target_bucket.
+ //
+ // For example (continue above), we should locate it_end to point to key
+ // 500.
+ auto it_end = it_start;
+ while (it_end != new_keys.cend() && *it_end <= target_bucket->key_upper()) {
+ ++it_end;
+ }
+
+ // Now, keys within range [it_start, it_end) belong to target_bucket, so
+ // construct IntegerIndexData and add them into the bucket's posting list.
+ if (it_start != it_end) {
+ ICING_ASSIGN_OR_RETURN(
+ FileBackedVector<Bucket>::MutableView mutable_bucket,
+ sorted_buckets_->GetMutable(target_bucket -
+ sorted_buckets_->array()));
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<Bucket> round_new_buckets,
+ AddKeysIntoBucketAndSplitIfNecessary(
+ document_id, section_id, it_start, it_end, mutable_bucket));
+ new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+ round_new_buckets.end());
+ }
+
+ it_start = it_end;
+ sorted_bucket_arr_begin = target_bucket + 1;
+ }
+
+ // Step 2: handle keys belonging to buckets in the unsorted bucket array. They
+ // were skipped in step 1.
+ // For each bucket in the unsorted bucket array, find [it_start, it_end) such
+ // that all keys within this range belong to the bucket and add them.
+ // - Binary search (std::lower_bound comparing bucket.key_lower() with key) to
+ // find it_start.
+ // - Sequential advance (start from it_start) to find it_end. Same reason as
+ // above for choosing sequential advance instead of binary search.
+ // - Add keys within range [it_start, it_end) into the bucket.
+ for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+ ICING_ASSIGN_OR_RETURN(FileBackedVector<Bucket>::MutableView mutable_bucket,
+ unsorted_buckets_->GetMutable(i));
+ auto it_start = std::lower_bound(new_keys.cbegin(), new_keys.cend(),
+ mutable_bucket.Get().key_lower());
+ if (it_start == new_keys.cend()) {
+ continue;
+ }
+
+ // Sequential advance instead of binary search to find the correct position
+ // of it_end for the same reasons mentioned above in step 1.
+ auto it_end = it_start;
+ while (it_end != new_keys.cend() &&
+ *it_end <= mutable_bucket.Get().key_upper()) {
+ ++it_end;
+ }
+
+ // Now, key within range [it_start, it_end) belong to the bucket, so
+ // construct IntegerIndexData and add them into the bucket's posting list.
+ if (it_start != it_end) {
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<Bucket> round_new_buckets,
+ AddKeysIntoBucketAndSplitIfNecessary(
+ document_id, section_id, it_start, it_end, mutable_bucket));
+ new_buckets.insert(new_buckets.end(), round_new_buckets.begin(),
+ round_new_buckets.end());
+ }
+ }
+
+ // Step 3: append new buckets into the unsorted bucket array.
+ if (!new_buckets.empty()) {
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Bucket>::MutableArrayView mutable_new_arr,
+ unsorted_buckets_->Allocate(new_buckets.size()));
+ mutable_new_arr.SetArray(/*idx=*/0, new_buckets.data(), new_buckets.size());
+ }
+
+ // Step 4: merge the unsorted bucket array into the sorted bucket array if the
+ // length of the unsorted bucket array exceeds the threshold.
+ // TODO(b/259743562): [Optimization 1] implement merge
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+IntegerIndexStorage::GetIterator(int64_t query_key_lower,
+ int64_t query_key_upper) const {
+ if (query_key_lower > query_key_upper) {
+ return absl_ports::InvalidArgumentError(
+ "key_lower should not be greater than key_upper");
+ }
+
+ std::vector<std::unique_ptr<BucketPostingListIterator>> bucket_pl_iters;
+
+ // Sorted bucket array
+ const Bucket* sorted_bucket_arr_begin = sorted_buckets_->array();
+ const Bucket* sorted_bucket_arr_end =
+ sorted_buckets_->array() + sorted_buckets_->num_elements();
+ for (const Bucket* bucket =
+ std::lower_bound(sorted_bucket_arr_begin, sorted_bucket_arr_end,
+ query_key_lower, kComparator);
+ bucket < sorted_bucket_arr_end && bucket->key_lower() <= query_key_upper;
+ ++bucket) {
+ if (!bucket->posting_list_identifier().is_valid()) {
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket->posting_list_identifier()));
+ bucket_pl_iters.push_back(
+ std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+ }
+
+ // Unsorted bucket array
+ for (int32_t i = 0; i < unsorted_buckets_->num_elements(); ++i) {
+ ICING_ASSIGN_OR_RETURN(const Bucket* bucket, unsorted_buckets_->Get(i));
+ if (query_key_upper < bucket->key_lower() ||
+ query_key_lower > bucket->key_upper() ||
+ !bucket->posting_list_identifier().is_valid()) {
+ // Skip bucket whose range doesn't overlap with [query_key_lower,
+ // query_key_upper] or posting_list_identifier is invalid.
+ continue;
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ bucket->posting_list_identifier()));
+ bucket_pl_iters.push_back(
+ std::make_unique<BucketPostingListIterator>(std::move(pl_accessor)));
+ }
+
+ return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
+ std::make_unique<IntegerIndexStorageIterator>(
+ query_key_lower, query_key_upper, std::move(bucket_pl_iters)));
+}
+
+libtextclassifier3::Status IntegerIndexStorage::PersistToDisk() {
+ ICING_RETURN_IF_ERROR(sorted_buckets_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(unsorted_buckets_->PersistToDisk());
+ if (!flash_index_storage_->PersistToDisk()) {
+ return absl_ports::InternalError(
+ "Fail to persist FlashIndexStorage to disk");
+ }
+
+ ICING_RETURN_IF_ERROR(UpdateChecksums(crcs(), info(), sorted_buckets_.get(),
+ unsorted_buckets_.get(),
+ flash_index_storage_.get()));
+ // Changes should have been applied to the underlying file when using
+ // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
+ // extra safety step to ensure they are written out.
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file_->PersistToDisk());
+
+ return libtextclassifier3::Status::OK;
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeNewFiles(
+ const Filesystem& filesystem, std::string&& working_dir, Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ // Create working directory.
+ if (!filesystem.CreateDirectoryRecursively(working_dir.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to create directory: ", working_dir));
+ }
+
+ // TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets
+ // threshold
+ // Initialize sorted_buckets
+ int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+ int32_t max_file_size =
+ pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetSortedBucketsFilePath(working_dir),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
+
+ // Initialize unsorted_buckets
+ pre_mapping_mmap_size = sizeof(Bucket) * 100;
+ max_file_size =
+ pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetUnsortedBucketsFilePath(working_dir),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
+
+ // Initialize flash_index_storage
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir),
+ &filesystem, posting_list_serializer));
+
+ if (options.HasCustomInitBuckets()) {
+ // Insert custom init buckets.
+ std::sort(options.custom_init_sorted_buckets.begin(),
+ options.custom_init_sorted_buckets.end());
+ ICING_ASSIGN_OR_RETURN(
+ typename FileBackedVector<Bucket>::MutableArrayView
+ mutable_new_sorted_bucket_arr,
+ sorted_buckets->Allocate(options.custom_init_sorted_buckets.size()));
+ mutable_new_sorted_bucket_arr.SetArray(
+ /*idx=*/0, options.custom_init_sorted_buckets.data(),
+ options.custom_init_sorted_buckets.size());
+
+ ICING_ASSIGN_OR_RETURN(typename FileBackedVector<Bucket>::MutableArrayView
+ mutable_new_unsorted_bucket_arr,
+ unsorted_buckets->Allocate(
+ options.custom_init_unsorted_buckets.size()));
+ mutable_new_unsorted_bucket_arr.SetArray(
+ /*idx=*/0, options.custom_init_unsorted_buckets.data(),
+ options.custom_init_unsorted_buckets.size());
+
+ // After inserting buckets, we can clear vectors since there is no need to
+ // cache them.
+ options.custom_init_sorted_buckets.clear();
+ options.custom_init_unsorted_buckets.clear();
+ } else {
+ // Insert one bucket with range [INT64_MIN, INT64_MAX].
+ ICING_RETURN_IF_ERROR(sorted_buckets->Append(Bucket(
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max())));
+ }
+ ICING_RETURN_IF_ERROR(sorted_buckets->PersistToDisk());
+
+ // Create and initialize new info
+ Info new_info;
+ new_info.magic = Info::kMagic;
+ new_info.num_keys = 0;
+
+ // Compute checksums
+ Crcs new_crcs;
+ ICING_RETURN_IF_ERROR(
+ UpdateChecksums(&new_crcs, &new_info, sorted_buckets.get(),
+ unsorted_buckets.get(), &flash_index_storage));
+
+ const std::string metadata_file_path = GetMetadataFilePath(working_dir);
+ // Write new metadata file
+ ICING_RETURN_IF_ERROR(
+ WriteMetadata(filesystem, metadata_file_path, &new_crcs, &new_info));
+
+ // Mmap the content of the crcs and info.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, metadata_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+
+ return std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_dir), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+}
+
+/* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+IntegerIndexStorage::InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_dir, Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer) {
+ // Mmap the content of the crcs and info.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_dir),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/kMetadataFileSize,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/kMetadataFileSize));
+
+ // TODO(b/259743562): [Optimization 1] decide max # buckets, unsorted buckets
+ // threshold
+ // Initialize sorted_buckets
+ int32_t pre_mapping_mmap_size = sizeof(Bucket) * (1 << 10);
+ int32_t max_file_size =
+ pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetSortedBucketsFilePath(working_dir),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
+
+ // Initialize unsorted_buckets
+ pre_mapping_mmap_size = sizeof(Bucket) * 100;
+ max_file_size =
+ pre_mapping_mmap_size + FileBackedVector<Bucket>::Header::kHeaderSize;
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
+ FileBackedVector<Bucket>::Create(
+ filesystem, GetUnsortedBucketsFilePath(working_dir),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ pre_mapping_mmap_size));
+
+ // Initialize flash_index_storage
+ ICING_ASSIGN_OR_RETURN(
+ FlashIndexStorage flash_index_storage,
+ FlashIndexStorage::Create(GetFlashIndexStorageFilePath(working_dir),
+ &filesystem, posting_list_serializer));
+
+ Crcs* crcs_ptr = reinterpret_cast<Crcs*>(
+ metadata_mmapped_file.mutable_region() + Crcs::kFileOffset);
+ Info* info_ptr = reinterpret_cast<Info*>(
+ metadata_mmapped_file.mutable_region() + Info::kFileOffset);
+ // Validate checksums of info and 3 storages.
+ ICING_RETURN_IF_ERROR(
+ ValidateChecksums(crcs_ptr, info_ptr, sorted_buckets.get(),
+ unsorted_buckets.get(), &flash_index_storage));
+
+ return std::unique_ptr<IntegerIndexStorage>(new IntegerIndexStorage(
+ filesystem, std::move(working_dir), std::move(options),
+ posting_list_serializer,
+ std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
+ std::move(sorted_buckets), std::move(unsorted_buckets),
+ std::make_unique<FlashIndexStorage>(std::move(flash_index_storage))));
+}
+
+libtextclassifier3::StatusOr<std::vector<IntegerIndexStorage::Bucket>>
+IntegerIndexStorage::AddKeysIntoBucketAndSplitIfNecessary(
+ DocumentId document_id, SectionId section_id,
+ const std::vector<int64_t>::const_iterator& it_start,
+ const std::vector<int64_t>::const_iterator& it_end,
+ FileBackedVector<Bucket>::MutableView& mutable_bucket) {
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor;
+ if (mutable_bucket.Get().posting_list_identifier().is_valid()) {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor, PostingListIntegerIndexAccessor::CreateFromExisting(
+ flash_index_storage_.get(), posting_list_serializer_,
+ mutable_bucket.Get().posting_list_identifier()));
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ pl_accessor, PostingListIntegerIndexAccessor::Create(
+ flash_index_storage_.get(), posting_list_serializer_));
+ }
+
+ for (auto it = it_start; it != it_end; ++it) {
+ // TODO(b/259743562): [Optimization 1] implement split bucket if pl is full
+ // and the bucket is splittable
+ ICING_RETURN_IF_ERROR(pl_accessor->PrependData(
+ IntegerIndexData(section_id, document_id, *it)));
+ }
+
+ // TODO(b/259743562): [Optimization 1] implement split and return new buckets.
+ // We will change the original bucket (mutable_bucket)
+ // in-place to one of the new buckets, and the rest will
+ // be returned and added into unsorted buckets in AddKeys.
+ PostingListAccessor::FinalizeResult result =
+ std::move(*pl_accessor).Finalize();
+ if (!result.status.ok()) {
+ return result.status;
+ }
+
+ mutable_bucket.Get().set_posting_list_identifier(result.id);
+
+ return std::vector<Bucket>();
+}
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/integer-index-storage.h b/icing/index/numeric/integer-index-storage.h
index 2048e76..562060b 100644
--- a/icing/index/numeric/integer-index-storage.h
+++ b/icing/index/numeric/integer-index-storage.h
@@ -19,13 +19,19 @@
#include <memory>
#include <string>
#include <string_view>
+#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/file/file-backed-vector.h"
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -91,6 +97,14 @@ class IntegerIndexStorage {
}
} __attribute__((packed));
+ libtextclassifier3::Status Serialize(const Filesystem& filesystem,
+ int fd) const {
+ if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) {
+ return absl_ports::InternalError("Failed to write crcs into file");
+ }
+ return libtextclassifier3::Status::OK;
+ }
+
bool operator==(const Crcs& other) const {
return all_crc == other.all_crc && component_crcs == other.component_crcs;
}
@@ -108,6 +122,14 @@ class IntegerIndexStorage {
int32_t magic;
int32_t num_keys;
+ libtextclassifier3::Status Serialize(const Filesystem& filesystem,
+ int fd) const {
+ if (!filesystem.PWrite(fd, kFileOffset, this, sizeof(*this))) {
+ return absl_ports::InternalError("Failed to write info into file");
+ }
+ return libtextclassifier3::Status::OK;
+ }
+
Crc32 ComputeChecksum() const {
return Crc32(
std::string_view(reinterpret_cast<const char*>(this), sizeof(Info)));
@@ -115,6 +137,9 @@ class IntegerIndexStorage {
} __attribute__((packed));
static_assert(sizeof(Info) == 8, "");
+ static constexpr int32_t kMetadataFileSize = sizeof(Crcs) + sizeof(Info);
+ static_assert(kMetadataFileSize == 28);
+
// Bucket
class Bucket {
public:
@@ -126,17 +151,26 @@ class IntegerIndexStorage {
static constexpr int32_t kMaxNumBuckets = 1 << 23;
explicit Bucket(int64_t key_lower, int64_t key_upper,
- PostingListIdentifier posting_list_identifier)
+ PostingListIdentifier posting_list_identifier =
+ PostingListIdentifier::kInvalid)
: key_lower_(key_lower),
key_upper_(key_upper),
posting_list_identifier_(posting_list_identifier) {}
+ bool operator<(const Bucket& other) const {
+ return key_lower_ < other.key_lower_;
+ }
+
// For FileBackedVector
bool operator==(const Bucket& other) const {
return key_lower_ == other.key_lower_ && key_upper_ == other.key_upper_ &&
posting_list_identifier_ == other.posting_list_identifier_;
}
+ int64_t key_lower() const { return key_lower_; }
+
+ int64_t key_upper() const { return key_upper_; }
+
PostingListIdentifier posting_list_identifier() const {
return posting_list_identifier_;
}
@@ -160,19 +194,185 @@ class IntegerIndexStorage {
FileBackedVector<Bucket>::kElementTypeSize,
"Max # of buckets cannot fit into FileBackedVector");
+ struct Options {
+ explicit Options() {}
+
+ explicit Options(std::vector<Bucket> custom_init_sorted_buckets_in,
+ std::vector<Bucket> custom_init_unsorted_buckets_in)
+ : custom_init_sorted_buckets(std::move(custom_init_sorted_buckets_in)),
+ custom_init_unsorted_buckets(
+ std::move(custom_init_unsorted_buckets_in)) {}
+
+ bool IsValid() const;
+
+ bool HasCustomInitBuckets() const {
+ return !custom_init_sorted_buckets.empty() ||
+ !custom_init_unsorted_buckets.empty();
+ }
+
+ // Custom buckets when initializing new files. If both are empty, then the
+ // initial bucket is (INT64_MIN, INT64_MAX). Usually we only set them in the
+ // unit test. Note that all buckets in custom_init_sorted_buckets and
+ // custom_init_unsorted_buckets should be disjoint and the range union
+ // should be [INT64_MIN, INT64_MAX].
+ std::vector<Bucket> custom_init_sorted_buckets;
+ std::vector<Bucket> custom_init_unsorted_buckets;
+ };
+
+ static constexpr std::string_view kSubDirectory = "storage_dir";
+ static constexpr std::string_view kFilePrefix = "integer_index_storage";
+
+ // Creates a new IntegerIndexStorage instance to index integers. For directory
+ // management purpose, we define working_dir as "<base_dir>/storage_dir", and
+ // all underlying files will be stored under it. If any of the underlying file
+ // is missing, then delete the whole working_dir and (re)initialize with new
+ // ones. Otherwise initialize and create the instance by existing files.
+ //
+ // filesystem: Object to make system level calls
+ // base_dir: Specifies the base directory for all integer index data related
+ // files to be stored. As mentioned above, all files will be stored
+ // under working_dir (which is "<base_dir>/storage_dir").
+ // options: Options instance.
+ // posting_list_serializer: a PostingListIntegerIndexSerializer instance to
+ // serialize/deserialize integer index data to/from
+ // posting lists.
+ //
+ // Returns:
+ // - INVALID_ARGUMENT_ERROR if any value in options is invalid.
+ // - FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum.
+ // - INTERNAL_ERROR on I/O errors.
+ // - Any FileBackedVector/FlashIndexStorage errors.
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ Create(const Filesystem& filesystem, std::string_view base_dir,
+ Options options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ // Delete copy and move constructor/assignment operator.
+ IntegerIndexStorage(const IntegerIndexStorage&) = delete;
+ IntegerIndexStorage& operator=(const IntegerIndexStorage&) = delete;
+
+ IntegerIndexStorage(IntegerIndexStorage&&) = delete;
+ IntegerIndexStorage& operator=(IntegerIndexStorage&&) = delete;
+
+ ~IntegerIndexStorage();
+
+ // Batch adds new keys (of the same DocumentId and SectionId) into the integer
+ // index storage.
+ // Note that since we separate different property names into different integer
+ // index storages, it is impossible to have keys in a single document across
+ // multiple sections to add into the same integer index storage.
+ //
+ // Returns:
+ // - OK on success
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::Status AddKeys(DocumentId document_id,
+ SectionId section_id,
+ std::vector<int64_t>&& new_keys);
+
+ // Returns a DocHitInfoIteratorNumeric<int64_t> (in DocHitInfoIterator
+ // interface type format) for iterating through all docs which have the
+ // specified (integer) property contents in range [query_key_lower,
+ // query_key_upper].
+ // When iterating through all relevant doc hits, it:
+ // - Merges multiple SectionIds of doc hits with same DocumentId into a single
+ // SectionIdMask and constructs DocHitInfo.
+ // - Returns DocHitInfo in descending DocumentId order.
+ //
+ // Returns:
+ // - On success: a DocHitInfoIterator(Numeric)
+ // - INVALID_ARGUMENT_ERROR if query_key_lower > query_key_upper
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+ int64_t query_key_lower, int64_t query_key_upper) const;
+
+ // Flushes content to underlying files.
+ //
+ // Returns:
+ // - OK on success
+ // - INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status PersistToDisk();
+
private:
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeNewFiles(
+ const Filesystem& filesystem, std::string&& working_dir,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
+ static libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ InitializeExistingFiles(
+ const Filesystem& filesystem, std::string&& working_dir,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer);
+
explicit IntegerIndexStorage(
- const Filesystem& filesystem, std::string_view base_dir,
- PostingListUsedIntegerIndexDataSerializer* serializer,
+ const Filesystem& filesystem, std::string&& working_dir,
+ Options&& options,
+ PostingListIntegerIndexSerializer* posting_list_serializer,
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets,
std::unique_ptr<FileBackedVector<Bucket>> unsorted_buckets,
- std::unique_ptr<FlashIndexStorage> flash_index_storage);
+ std::unique_ptr<FlashIndexStorage> flash_index_storage)
+ : filesystem_(filesystem),
+ working_dir_(std::move(working_dir)),
+ options_(std::move(options)),
+ posting_list_serializer_(posting_list_serializer),
+ metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+ sorted_buckets_(std::move(sorted_buckets)),
+ unsorted_buckets_(std::move(unsorted_buckets)),
+ flash_index_storage_(std::move(flash_index_storage)) {}
+
+ // Helper function to add keys in range [it_start, it_end) into the given
+ // bucket. It handles the bucket and its corresponding posting list(s) to make
+ // searching and indexing efficient.
+ //
+ // When the (single) posting list of the bucket is full:
+ // - If the size of posting list hasn't reached the max size, then just simply
+ // add a new key into it, and PostingListAccessor mechanism will
+ // automatically double the size of the posting list.
+ // - Else:
+ // - If the bucket is splittable (i.e. key_lower < key_upper), then split it
+ // into several new buckets with new ranges, and split the data (according
+ // to their keys and the range of new buckets) of the original posting
+ // list into several new posting lists.
+ // TODO(b/259743562): [Optimization 1] implement split
+ // - Otherwise, just simply add a new key into it, and PostingListAccessor
+ // mechanism will automatically create a new max size posting list and
+ // chain them.
+ //
+ // Returns:
+ // - On success: a vector of new Buckets (to add into the unsorted bucket
+ // array later)
+ // - Any FileBackedVector or PostingList errors
+ libtextclassifier3::StatusOr<std::vector<Bucket>>
+ AddKeysIntoBucketAndSplitIfNecessary(
+ DocumentId document_id, SectionId section_id,
+ const std::vector<int64_t>::const_iterator& it_start,
+ const std::vector<int64_t>::const_iterator& it_end,
+ FileBackedVector<Bucket>::MutableView& mutable_bucket);
+
+ Crcs* crcs() {
+ return reinterpret_cast<Crcs*>(metadata_mmapped_file_->mutable_region() +
+ Crcs::kFileOffset);
+ }
+
+ Info* info() {
+ return reinterpret_cast<Info*>(metadata_mmapped_file_->mutable_region() +
+ Info::kFileOffset);
+ }
+
+ const Info* info() const {
+ return reinterpret_cast<const Info*>(metadata_mmapped_file_->region() +
+ Info::kFileOffset);
+ }
const Filesystem& filesystem_;
- std::string base_dir_;
+ std::string working_dir_;
+
+ Options options_;
- PostingListUsedIntegerIndexDataSerializer* serializer_; // Does not own.
+ PostingListIntegerIndexSerializer* posting_list_serializer_; // Does not own.
std::unique_ptr<MemoryMappedFile> metadata_mmapped_file_;
std::unique_ptr<FileBackedVector<Bucket>> sorted_buckets_;
diff --git a/icing/index/numeric/integer-index-storage_test.cc b/icing/index/numeric/integer-index-storage_test.cc
new file mode 100644
index 0000000..0afc96b
--- /dev/null
+++ b/icing/index/numeric/integer-index-storage_test.cc
@@ -0,0 +1,1147 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/numeric/integer-index-storage.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Ne;
+
+using Bucket = IntegerIndexStorage::Bucket;
+using Crcs = IntegerIndexStorage::Crcs;
+using Info = IntegerIndexStorage::Info;
+using Options = IntegerIndexStorage::Options;
+
+static constexpr int32_t kCorruptedValueOffset = 3;
+static constexpr DocumentId kDefaultDocumentId = 123;
+static constexpr SectionId kDefaultSectionId = 31;
+
+class IntegerIndexStorageTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ base_dir_ = GetTestTempDir() + "/integer_index_storage_test";
+
+ serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
+ }
+
+ void TearDown() override {
+ serializer_.reset();
+ filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+ }
+
+ Filesystem filesystem_;
+ std::string base_dir_;
+ std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
+};
+
+libtextclassifier3::StatusOr<std::vector<DocHitInfo>> Query(
+ const IntegerIndexStorage* storage, int64_t key_lower, int64_t key_upper) {
+ ICING_ASSIGN_OR_RETURN(std::unique_ptr<DocHitInfoIterator> iter,
+ storage->GetIterator(key_lower, key_upper));
+ std::vector<DocHitInfo> hits;
+ while (iter->Advance().ok()) {
+ hits.push_back(iter->doc_hit_info());
+ }
+ return hits;
+}
+
+TEST_F(IntegerIndexStorageTest, OptionsEmptyCustomInitBucketsShouldBeValid) {
+ EXPECT_THAT(Options().IsValid(), IsTrue());
+}
+
+TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsRange) {
+ // Invalid custom init sorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(10, std::numeric_limits<int64_t>::max())})
+ .IsValid(),
+ IsFalse());
+
+ // Invalid custom init unsorted bucket
+ EXPECT_THAT(
+ Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(10, std::numeric_limits<int64_t>::max())},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), 5), Bucket(9, 6)})
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_F(IntegerIndexStorageTest,
+ OptionsInvalidCustomInitBucketsPostingListIdentifier) {
+ // Custom init buckets should contain invalid posting list identifier.
+ PostingListIdentifier valid_posting_list_identifier(0, 0, 0);
+ ASSERT_THAT(valid_posting_list_identifier.is_valid(), IsTrue());
+
+ // Invalid custom init sorted bucket
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)},
+ /*custom_init_unsorted_buckets_in=*/{})
+ .IsValid(),
+ IsFalse());
+
+ // Invalid custom init unsorted bucket
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::max(),
+ valid_posting_list_identifier)})
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsOverlapping) {
+ // sorted buckets overlap
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-100, std::numeric_limits<int64_t>::max())},
+ /*custom_init_unsorted_buckets_in=*/{})
+ .IsValid(),
+ IsFalse());
+
+ // unsorted buckets overlap
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(-100, std::numeric_limits<int64_t>::max()),
+ Bucket(std::numeric_limits<int64_t>::min(), -100)})
+ .IsValid(),
+ IsFalse());
+
+ // Cross buckets overlap
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(200, std::numeric_limits<int64_t>::max()),
+ Bucket(0, 50), Bucket(51, 199)})
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_F(IntegerIndexStorageTest, OptionsInvalidCustomInitBucketsUnion) {
+ // Missing INT64_MAX
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100),
+ Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, 1000)})
+ .IsValid(),
+ IsFalse());
+
+ // Missing INT64_MIN
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(-200, -100), Bucket(-99, 0)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())})
+ .IsValid(),
+ IsFalse());
+
+ // Missing some intermediate ranges
+ EXPECT_THAT(Options(/*custom_init_sorted_buckets_in=*/
+ {Bucket(std::numeric_limits<int64_t>::min(), -100)},
+ /*custom_init_unsorted_buckets_in=*/
+ {Bucket(1, std::numeric_limits<int64_t>::max())})
+ .IsValid(),
+ IsFalse());
+}
+
+TEST_F(IntegerIndexStorageTest, InvalidBaseDir) {
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, "/dev/null", Options(),
+ serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(IntegerIndexStorageTest, CreateWithInvalidOptionsShouldFail) {
+ Options invalid_options(
+ /*custom_init_sorted_buckets_in=*/{},
+ /*custom_init_unsorted_buckets_in=*/{
+ Bucket(-100, std::numeric_limits<int64_t>::max()),
+ Bucket(std::numeric_limits<int64_t>::min(), -100)});
+ ASSERT_THAT(invalid_options.IsValid(), IsFalse());
+
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_,
+ invalid_options, serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IntegerIndexStorageTest, InitializeNewFiles) {
+ {
+ // Create new integer index storage
+ ASSERT_FALSE(filesystem_.DirectoryExists(base_dir_.c_str()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ // Metadata file should be initialized correctly for both info and crcs
+ // sections.
+ const std::string metadata_file_path =
+ absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
+ "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ // Check info section
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ Info::kFileOffset));
+ EXPECT_THAT(info.magic, Eq(Info::kMagic));
+ EXPECT_THAT(info.num_keys, Eq(0));
+
+ // Check crcs section
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ Crcs::kFileOffset));
+ // # of elements in sorted_buckets should be 1, so it should have non-zero
+ // crc value.
+ EXPECT_THAT(crcs.component_crcs.sorted_buckets_crc, Ne(0));
+ // Other empty file backed vectors should have 0 crc value.
+ EXPECT_THAT(crcs.component_crcs.unsorted_buckets_crc, Eq(0));
+ EXPECT_THAT(crcs.component_crcs.flash_index_storage_crc, Eq(0));
+ EXPECT_THAT(crcs.component_crcs.info_crc,
+ Eq(Crc32(std::string_view(reinterpret_cast<const char*>(&info),
+ sizeof(Info)))
+ .Get()));
+ EXPECT_THAT(crcs.all_crc,
+ Eq(Crc32(std::string_view(
+ reinterpret_cast<const char*>(&crcs.component_crcs),
+ sizeof(Crcs::ComponentCrcs)))
+ .Get()));
+}
+
+TEST_F(IntegerIndexStorageTest,
+ InitializationShouldFailWithoutPersistToDiskOrDestruction) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+
+ // Insert some data.
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
+
+ // Without calling PersistToDisk, checksums will not be recomputed or synced
+ // to disk, so initializing another instance on the same files should fail.
+ EXPECT_THAT(IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+}
+
+TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedWithPersistToDisk) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage1,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+
+ // Insert some data.
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/0, /*section_id=*/20,
+ /*new_keys=*/{0, 100, -100}));
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/1, /*section_id=*/2,
+ /*new_keys=*/{3, -1000, 500}));
+ ICING_ASSERT_OK(storage1->AddKeys(/*document_id=*/2, /*section_id=*/15,
+ /*new_keys=*/{-6, 321, 98}));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<DocHitInfo> doc_hit_info_vec,
+ Query(storage1.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+
+ // After calling PersistToDisk, all checksums should be recomputed and synced
+ // correctly to disk, so initializing another instance on the same files
+ // should succeed, and we should be able to get the same contents.
+ ICING_EXPECT_OK(storage1->PersistToDisk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage2,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ EXPECT_THAT(
+ Query(storage2.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAreArray(doc_hit_info_vec.begin(), doc_hit_info_vec.end())));
+}
+
+TEST_F(IntegerIndexStorageTest, InitializationShouldSucceedAfterDestruction) {
+ std::vector<DocHitInfo> doc_hit_info_vec;
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ doc_hit_info_vec,
+ Query(storage.get(),
+ /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()));
+ }
+
+ {
+ // The previous instance went out of scope and was destructed. Although we
+ // didn't call PersistToDisk explicitly, the destructor should invoke it and
+ // thus initializing another instance on the same files should succeed, and
+ // we should be able to get the same contents.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAreArray(doc_hit_info_vec.begin(),
+ doc_hit_info_vec.end())));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest,
+ InitializeExistingFilesWithWrongAllCrcShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
+ "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ Crcs::kFileOffset));
+
+ // Manually corrupt all_crc
+ crcs.all_crc += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
+ sizeof(Crcs)));
+ metadata_sfd.reset();
+
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted all_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ Options(), serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid all crc for IntegerIndexStorage"));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest,
+ InitializeExistingFilesWithCorruptedInfoShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
+ "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Info info;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &info, sizeof(Info),
+ Info::kFileOffset));
+
+ // Modify info, but don't update the checksum. This would be similar to
+ // corruption of info.
+ info.num_keys += kCorruptedValueOffset;
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Info::kFileOffset, &info,
+ sizeof(Info)));
+ {
+ // Attempt to create the integer index storage with info that doesn't match
+ // its checksum and confirm that it fails.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ Options(), serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(storage_or.status().error_message(),
+ HasSubstr("Invalid info crc for IntegerIndexStorage"));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest,
+ InitializeExistingFilesWithWrongSortedBucketsCrcShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
+ "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ Crcs::kFileOffset));
+
+ // Manually corrupt sorted_buckets_crc
+ crcs.component_crcs.sorted_buckets_crc += kCorruptedValueOffset;
+ crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
+ sizeof(Crcs)));
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted sorted_buckets_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ Options(), serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ storage_or.status().error_message(),
+ HasSubstr("Mismatch crc with IntegerIndexStorage sorted buckets"));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest,
+ InitializeExistingFilesWithWrongUnsortedBucketsCrcShouldFail) {
+ {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ ICING_ASSERT_OK(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ /*new_keys=*/{0, 100, -100}));
+
+ ICING_ASSERT_OK(storage->PersistToDisk());
+ }
+
+ const std::string metadata_file_path =
+ absl_ports::StrCat(base_dir_, "/", IntegerIndexStorage::kSubDirectory,
+ "/", IntegerIndexStorage::kFilePrefix, ".m");
+ ScopedFd metadata_sfd(filesystem_.OpenForWrite(metadata_file_path.c_str()));
+ ASSERT_TRUE(metadata_sfd.is_valid());
+
+ Crcs crcs;
+ ASSERT_TRUE(filesystem_.PRead(metadata_sfd.get(), &crcs, sizeof(Crcs),
+ Crcs::kFileOffset));
+
+ // Manually corrupt unsorted_buckets_crc
+ crcs.component_crcs.unsorted_buckets_crc += kCorruptedValueOffset;
+ crcs.all_crc = crcs.component_crcs.ComputeChecksum().Get();
+ ASSERT_TRUE(filesystem_.PWrite(metadata_sfd.get(), Crcs::kFileOffset, &crcs,
+ sizeof(Crcs)));
+ {
+ // Attempt to create the integer index storage with metadata containing
+ // corrupted unsorted_buckets_crc. This should fail.
+ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
+ storage_or = IntegerIndexStorage::Create(filesystem_, base_dir_,
+ Options(), serializer_.get());
+ EXPECT_THAT(storage_or,
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ storage_or.status().error_message(),
+ HasSubstr("Mismatch crc with IntegerIndexStorage unsorted buckets"));
+ }
+}
+
+// TODO(b/259744228): add test for corrupted flash_index_storage_crc
+
+TEST_F(IntegerIndexStorageTest, InvalidQuery) {
+ // Create new integer index storage
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(filesystem_, base_dir_, Options(),
+ serializer_.get()));
+ EXPECT_THAT(
+ storage->GetIterator(/*query_key_lower=*/0, /*query_key_upper=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IntegerIndexStorageTest, ExactQuerySortedBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key in each sorted bucket should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-500, /*key_upper=*/-500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/208, /*key_upper=*/208),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-200, /*key_upper=*/-200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-1000),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/300, /*key_upper=*/300),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, ExactQueryUnsortedBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key in each unsorted bucket should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::max(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::min()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1500, /*key_upper=*/-1500),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/2000, /*key_upper=*/2000),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, ExactQueryIdenticalKeys) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into buckets [(0,100), (1000,INT64_MAX)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{20}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{20}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Exact query on key with multiple hits should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1024, /*key_upper=*/1024),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/20, /*key_upper=*/20),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQueryEmptyIntegerIndexStorage) {
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireSortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into sorted buckets [(-1000,-100), (200,300)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on each sorted bucket boundary should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-100),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/0, /*key_upper=*/100),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/150, /*key_upper=*/199),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/200, /*key_upper=*/300),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/301, /*key_upper=*/999),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQuerySingleEntireUnsortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets [(1000,INT64_MAX), (INT64_MIN,-1001)].
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on each unsorted bucket boundary should get the correct result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/1000,
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-99, /*key_upper=*/-1),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/101, /*key_upper=*/149),
+ IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/-1001),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialSortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into sorted bucket (0,100).
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{43}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{30}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on partial range of each sorted bucket should get the correct
+ // result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/200),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/31, /*key_upper=*/49),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/25, /*key_upper=*/31),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/3, /*key_upper=*/5),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQuerySinglePartialUnsortedBucket) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into unsorted buckets (-99,-1).
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-19}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{-72}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query on partial range of each unsorted bucket should get the correct
+ // result.
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-1000, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/149),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-38, /*key_upper=*/-15),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-80, /*key_upper=*/-38),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-95, /*key_upper=*/-92),
+ IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(IntegerIndexStorageTest, RangeQueryMultipleBuckets) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys into buckets [(-1000,-100), (200,300), (1000,INT64_MAX),
+ // (INT64_MIN,-1001)]
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/0, kDefaultSectionId,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/1, kDefaultSectionId,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/2, kDefaultSectionId,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/3, kDefaultSectionId,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/4, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/5, kDefaultSectionId,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/6, kDefaultSectionId,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(/*document_id=*/7, kDefaultSectionId,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/8, kDefaultSectionId,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(/*document_id=*/9, kDefaultSectionId,
+ /*new_keys=*/{2000}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ // Range query should get the correct result.
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/-199,
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(/*document_id=*/9, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/6, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/4, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/3, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/1, expected_sections))));
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/-200),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/8, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/7, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/5, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/2, expected_sections),
+ EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest, BatchAdd) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Sorted buckets: [(-1000,-100), (0,100), (150,199), (200,300), (301,999)]
+ // Unsorted buckets: [(1000,INT64_MAX), (-99,-1), (101,149),
+ // (INT64_MIN,-1001)]
+ // Batch add the following keys (including some edge cases) to test the
+ // correctness of the sort and binary search logic in AddKeys().
+ // clang-format off
+ std::vector<int64_t> keys = {4000, 3000, 2000, 300, 201, 200, 106, 104,
+ 100, 3, 2, 1, 0, -97, -98, -99,
+ -100, -200, -1000, -1001, -1500, -2000,
+ std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::min()};
+ // clang-format on
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, kDefaultSectionId,
+ std::vector<int64_t>(keys)),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ for (int64_t key : keys) {
+ EXPECT_THAT(Query(storage.get(), /*key_lower=*/key, /*key_upper=*/key),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+ }
+}
+
+TEST_F(IntegerIndexStorageTest, MultipleKeysShouldMergeAndDedupeDocHitInfo) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys with same document id and section id.
+ EXPECT_THAT(
+ storage->AddKeys(
+ /*document_id=*/0, kDefaultSectionId, /*new_keys=*/
+ {-500, 1024, -200, 208, std::numeric_limits<int64_t>::max(), -1000,
+ 300, std::numeric_limits<int64_t>::min(), -1500, 2000}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {kDefaultSectionId};
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(
+ ElementsAre(EqualsDocHitInfo(/*document_id=*/0, expected_sections))));
+}
+
+TEST_F(IntegerIndexStorageTest,
+ MultipleSectionsShouldMergeSectionsAndDedupeDocHitInfo) {
+ // We use predefined custom buckets to initialize new integer index storage
+ // and create some test keys accordingly.
+ std::vector<Bucket> custom_init_sorted_buckets = {
+ Bucket(-1000, -100), Bucket(0, 100), Bucket(150, 199), Bucket(200, 300),
+ Bucket(301, 999)};
+ std::vector<Bucket> custom_init_unsorted_buckets = {
+ Bucket(1000, std::numeric_limits<int64_t>::max()), Bucket(-99, -1),
+ Bucket(101, 149), Bucket(std::numeric_limits<int64_t>::min(), -1001)};
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<IntegerIndexStorage> storage,
+ IntegerIndexStorage::Create(
+ filesystem_, base_dir_,
+ Options(std::move(custom_init_sorted_buckets),
+ std::move(custom_init_unsorted_buckets)),
+ serializer_.get()));
+
+ // Add some keys with same document id but different section ids.
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/63,
+ /*new_keys=*/{-500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/62,
+ /*new_keys=*/{1024}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/61,
+ /*new_keys=*/{-200}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/60,
+ /*new_keys=*/{208}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, /*section_id=*/59,
+ /*new_keys=*/{std::numeric_limits<int64_t>::max()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/58,
+ /*new_keys=*/{-1000}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/57,
+ /*new_keys=*/{300}),
+ IsOk());
+ EXPECT_THAT(
+ storage->AddKeys(kDefaultDocumentId, /*section_id=*/56,
+ /*new_keys=*/{std::numeric_limits<int64_t>::min()}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/55,
+ /*new_keys=*/{-1500}),
+ IsOk());
+ EXPECT_THAT(storage->AddKeys(kDefaultDocumentId, /*section_id=*/54,
+ /*new_keys=*/{2000}),
+ IsOk());
+
+ std::vector<SectionId> expected_sections = {63, 62, 61, 60, 59,
+ 58, 57, 56, 55, 54};
+ EXPECT_THAT(
+ Query(storage.get(), /*key_lower=*/std::numeric_limits<int64_t>::min(),
+ /*key_upper=*/std::numeric_limits<int64_t>::max()),
+ IsOkAndHolds(ElementsAre(
+ EqualsDocHitInfo(kDefaultDocumentId, expected_sections))));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor.cc b/icing/index/numeric/posting-list-integer-index-accessor.cc
index 73b48e2..271f8dc 100644
--- a/icing/index/numeric/posting-list-integer-index-data-accessor.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-integer-index-data-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
#include <cstdint>
#include <memory>
@@ -26,17 +26,16 @@
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
/* static */ libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
-PostingListIntegerIndexDataAccessor::Create(
- FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer) {
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::Create(
+ FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer) {
uint32_t max_posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
storage->block_size(), serializer->GetDataTypeBytes());
std::unique_ptr<uint8_t[]> posting_list_buffer_array =
@@ -45,20 +44,19 @@ PostingListIntegerIndexDataAccessor::Create(
PostingListUsed posting_list_buffer,
PostingListUsed::CreateFromUnitializedRegion(
serializer, posting_list_buffer_array.get(), max_posting_list_bytes));
- return std::unique_ptr<PostingListIntegerIndexDataAccessor>(
- new PostingListIntegerIndexDataAccessor(
+ return std::unique_ptr<PostingListIntegerIndexAccessor>(
+ new PostingListIntegerIndexAccessor(
storage, std::move(posting_list_buffer_array),
std::move(posting_list_buffer), serializer));
}
/* static */ libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
-PostingListIntegerIndexDataAccessor::CreateFromExisting(
- FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer,
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
+PostingListIntegerIndexAccessor::CreateFromExisting(
+ FlashIndexStorage* storage, PostingListIntegerIndexSerializer* serializer,
PostingListIdentifier existing_posting_list_id) {
ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
Create(storage, serializer));
ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
storage->GetPostingList(existing_posting_list_id));
@@ -69,13 +67,13 @@ PostingListIntegerIndexDataAccessor::CreateFromExisting(
// Returns the next batch of integer index data for the provided posting list.
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
-PostingListIntegerIndexDataAccessor::GetNextDataBatch() {
+PostingListIntegerIndexAccessor::GetNextDataBatch() {
if (preexisting_posting_list_ == nullptr) {
if (has_reached_posting_list_chain_end_) {
return std::vector<IntegerIndexData>();
}
return absl_ports::FailedPreconditionError(
- "Cannot retrieve data from a PostingListIntegerIndexDataAccessor that "
+ "Cannot retrieve data from a PostingListIntegerIndexAccessor that "
"was not created from a preexisting posting list.");
}
ICING_ASSIGN_OR_RETURN(
@@ -106,7 +104,7 @@ PostingListIntegerIndexDataAccessor::GetNextDataBatch() {
return batch;
}
-libtextclassifier3::Status PostingListIntegerIndexDataAccessor::PrependData(
+libtextclassifier3::Status PostingListIntegerIndexAccessor::PrependData(
const IntegerIndexData& data) {
PostingListUsed& active_pl = (preexisting_posting_list_ != nullptr)
? preexisting_posting_list_->posting_list
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor.h b/icing/index/numeric/posting-list-integer-index-accessor.h
index 7835bf9..64a8901 100644
--- a/icing/index/numeric/posting-list-integer-index-data-accessor.h
+++ b/icing/index/numeric/posting-list-integer-index-accessor.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
-#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
#include <cstdint>
#include <memory>
@@ -26,7 +26,7 @@
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
namespace icing {
namespace lib {
@@ -34,35 +34,35 @@ namespace lib {
// TODO(b/259743562): Refactor PostingListAccessor derived classes
// This class is used to provide a simple abstraction for adding integer index
-// data to posting lists. PostingListIntegerIndexDataAccessor handles:
+// data to posting lists. PostingListIntegerIndexAccessor handles:
// 1) selection of properly-sized posting lists for the accumulated integer
// index data during Finalize()
// 2) chaining of max-sized posting lists.
-class PostingListIntegerIndexDataAccessor : public PostingListAccessor {
+class PostingListIntegerIndexAccessor : public PostingListAccessor {
public:
- // Creates an empty PostingListIntegerIndexDataAccessor.
+ // Creates an empty PostingListIntegerIndexAccessor.
//
// RETURNS:
- // - On success, a valid instance of PostingListIntegerIndexDataAccessor
+ // - On success, a valid instance of PostingListIntegerIndexAccessor
// - INVALID_ARGUMENT error if storage has an invalid block_size.
static libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
Create(FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer);
+ PostingListIntegerIndexSerializer* serializer);
- // Create a PostingListIntegerIndexDataAccessor with an existing posting list
+ // Create a PostingListIntegerIndexAccessor with an existing posting list
// identified by existing_posting_list_id.
//
// RETURNS:
- // - On success, a valid instance of PostingListIntegerIndexDataAccessor
+ // - On success, a valid instance of PostingListIntegerIndexAccessor
// - INVALID_ARGUMENT if storage has an invalid block_size.
static libtextclassifier3::StatusOr<
- std::unique_ptr<PostingListIntegerIndexDataAccessor>>
+ std::unique_ptr<PostingListIntegerIndexAccessor>>
CreateFromExisting(FlashIndexStorage* storage,
- PostingListUsedIntegerIndexDataSerializer* serializer,
+ PostingListIntegerIndexSerializer* serializer,
PostingListIdentifier existing_posting_list_id);
- PostingListUsedSerializer* GetSerializer() override { return serializer_; }
+ PostingListSerializer* GetSerializer() override { return serializer_; }
// Retrieve the next batch of data in the posting list chain
//
@@ -75,7 +75,7 @@ class PostingListIntegerIndexDataAccessor : public PostingListAccessor {
GetNextDataBatch();
// Prepend one data. This may result in flushing the posting list to disk (if
- // the PostingListIntegerIndexDataAccessor holds a max-sized posting list that
+ // the PostingListIntegerIndexAccessor holds a max-sized posting list that
// is full) or freeing a pre-existing posting list if it is too small to fit
// all data necessary.
//
@@ -90,19 +90,19 @@ class PostingListIntegerIndexDataAccessor : public PostingListAccessor {
// TODO(b/259743562): [Optimization 1] add GetAndClear, IsFull for split
private:
- explicit PostingListIntegerIndexDataAccessor(
+ explicit PostingListIntegerIndexAccessor(
FlashIndexStorage* storage,
std::unique_ptr<uint8_t[]> posting_list_buffer_array,
PostingListUsed posting_list_buffer,
- PostingListUsedIntegerIndexDataSerializer* serializer)
+ PostingListIntegerIndexSerializer* serializer)
: PostingListAccessor(storage, std::move(posting_list_buffer_array),
std::move(posting_list_buffer)),
serializer_(serializer) {}
- PostingListUsedIntegerIndexDataSerializer* serializer_; // Does not own.
+ PostingListIntegerIndexSerializer* serializer_; // Does not own.
};
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_DATA_ACCESSOR_H_
+#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_ACCESSOR_H_
diff --git a/icing/index/numeric/posting-list-integer-index-data-accessor_test.cc b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
index ca0804e..f5fd693 100644
--- a/icing/index/numeric/posting-list-integer-index-data-accessor_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-accessor_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-integer-index-data-accessor.h"
+#include "icing/index/numeric/posting-list-integer-index-accessor.h"
#include <cstdint>
#include <memory>
@@ -27,7 +27,7 @@
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/numeric/integer-index-data.h"
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
@@ -44,7 +44,7 @@ using ::testing::Eq;
using ::testing::Lt;
using ::testing::SizeIs;
-class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
+class PostingListIntegerIndexAccessorTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/test_dir";
@@ -53,7 +53,7 @@ class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
- serializer_ = std::make_unique<PostingListUsedIntegerIndexDataSerializer>();
+ serializer_ = std::make_unique<PostingListIntegerIndexSerializer>();
ICING_ASSERT_OK_AND_ASSIGN(
FlashIndexStorage flash_index_storage,
@@ -71,7 +71,7 @@ class PostingListIntegerIndexDataAccessorTest : public ::testing::Test {
Filesystem filesystem_;
std::string test_dir_;
std::string file_name_;
- std::unique_ptr<PostingListUsedIntegerIndexDataSerializer> serializer_;
+ std::unique_ptr<PostingListIntegerIndexSerializer> serializer_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
};
@@ -96,11 +96,11 @@ std::vector<IntegerIndexData> CreateData(int num_data,
return data;
}
-TEST_F(PostingListIntegerIndexDataAccessorTest, DataAddAndRetrieveProperly) {
+TEST_F(PostingListIntegerIndexAccessorTest, DataAddAndRetrieveProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Add some integer index data
std::vector<IntegerIndexData> data_vec =
CreateData(/*num_data=*/5, /*start_document_id=*/0, /*start_key=*/819);
@@ -122,11 +122,11 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, DataAddAndRetrieveProperly) {
EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLKeepOnSameBlock) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Add a single data. This will fit in a min-sized posting list.
IntegerIndexData data1(/*section_id=*/1, /*document_id=*/0, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor->PrependData(data1));
@@ -141,7 +141,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
// two data, so this should NOT cause the previous pl to be reallocated.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
IntegerIndexData data2(/*section_id=*/1, /*document_id=*/1, /*key=*/23456);
ICING_ASSERT_OK(pl_accessor->PrependData(data2));
@@ -159,12 +159,11 @@ TEST_F(PostingListIntegerIndexDataAccessorTest, PreexistingPLKeepOnSameBlock) {
IsOkAndHolds(ElementsAre(data2, data1)));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
- PreexistingPLReallocateToLargerPL) {
+TEST_F(PostingListIntegerIndexAccessorTest, PreexistingPLReallocateToLargerPL) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Adding 3 data should cause Finalize allocating a 48-byte posting list,
// which can store at most 4 data.
std::vector<IntegerIndexData> data_vec1 =
@@ -182,7 +181,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
// Now add more data.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
// The current posting list can fit 1 more data. Adding 12 more data should
// result in these data being moved to a larger posting list. Also the total
@@ -217,12 +216,11 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
all_data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
- MultiBlockChainsBlocksProperly) {
+TEST_F(PostingListIntegerIndexAccessorTest, MultiBlockChainsBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
// is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
// Adding 341 data should cause:
@@ -268,12 +266,12 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
IsOkAndHolds(ElementsAreArray(first_block_data_start, data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
PreexistingMultiBlockReusesBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
// Block size is 4096, sizeof(BlockHeader) is 12 and sizeof(IntegerIndexData)
// is 12, so the max size posting list can store (4096 - 12) / 12 = 340 data.
// Adding 341 data will cause:
@@ -296,7 +294,7 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
// fill it up.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), first_add_id));
std::vector<IntegerIndexData> data_vec2 = CreateData(
/*num_data=*/10,
@@ -342,23 +340,23 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
all_data_vec.rend())));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
InvalidDataShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData invalid_data;
EXPECT_THAT(pl_accessor->PrependData(invalid_data),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
BasicHitIncreasingShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor->PrependData(data1));
@@ -371,24 +369,24 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
NewPostingListNoDataAddedShouldReturnInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
PostingListAccessor::FinalizeResult result =
std::move(*pl_accessor).Finalize();
EXPECT_THAT(result.status,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(PostingListIntegerIndexDataAccessorTest,
+TEST_F(PostingListIntegerIndexAccessorTest,
PreexistingPostingListNoDataAddedShouldSucceed) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor1,
- PostingListIntegerIndexDataAccessor::Create(flash_index_storage_.get(),
- serializer_.get()));
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor1,
+ PostingListIntegerIndexAccessor::Create(flash_index_storage_.get(),
+ serializer_.get()));
IntegerIndexData data1(/*section_id=*/3, /*document_id=*/1, /*key=*/12345);
ICING_ASSERT_OK(pl_accessor1->PrependData(data1));
PostingListAccessor::FinalizeResult result1 =
@@ -396,8 +394,8 @@ TEST_F(PostingListIntegerIndexDataAccessorTest,
ICING_ASSERT_OK(result1.status);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<PostingListIntegerIndexDataAccessor> pl_accessor2,
- PostingListIntegerIndexDataAccessor::CreateFromExisting(
+ std::unique_ptr<PostingListIntegerIndexAccessor> pl_accessor2,
+ PostingListIntegerIndexAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor2).Finalize();
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer.cc b/icing/index/numeric/posting-list-integer-index-serializer.cc
index 800fd6b..6556451 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include <cstdint>
#include <vector>
@@ -29,7 +29,7 @@
namespace icing {
namespace lib {
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetBytesUsed(
+uint32_t PostingListIntegerIndexSerializer::GetBytesUsed(
const PostingListUsed* posting_list_used) const {
// The special data will be included if they represent actual data. If they
// represent the data start offset or the invalid data sentinel, they are not
@@ -38,7 +38,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetBytesUsed(
GetStartByteOffset(posting_list_used);
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetMinPostingListSizeToFit(
+uint32_t PostingListIntegerIndexSerializer::GetMinPostingListSizeToFit(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used) || IsAlmostFull(posting_list_used)) {
// If in either the FULL state or ALMOST_FULL state, this posting list *is*
@@ -57,7 +57,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetMinPostingListSizeToFit(
return GetBytesUsed(posting_list_used) + GetDataTypeBytes();
}
-void PostingListUsedIntegerIndexDataSerializer::Clear(
+void PostingListIntegerIndexSerializer::Clear(
PostingListUsed* posting_list_used) const {
// Safe to ignore return value because posting_list_used->size_in_bytes() is
// a valid argument.
@@ -65,7 +65,7 @@ void PostingListUsedIntegerIndexDataSerializer::Clear(
/*offset=*/posting_list_used->size_in_bytes());
}
-libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::MoveFrom(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::MoveFrom(
PostingListUsed* dst, PostingListUsed* src) const {
ICING_RETURN_ERROR_IF_NULL(dst);
ICING_RETURN_ERROR_IF_NULL(src);
@@ -121,7 +121,7 @@ libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::MoveFrom(
}
libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependDataToAlmostFull(
+PostingListIntegerIndexSerializer::PrependDataToAlmostFull(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
SpecialDataType special_data = GetSpecialData(posting_list_used, /*index=*/1);
if (special_data.data().basic_hit() < data.basic_hit()) {
@@ -139,7 +139,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependDataToAlmostFull(
return libtextclassifier3::Status::OK;
}
-void PostingListUsedIntegerIndexDataSerializer::PrependDataToEmpty(
+void PostingListIntegerIndexSerializer::PrependDataToEmpty(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
// First data to be added. Just add verbatim, no compression.
if (posting_list_used->size_in_bytes() == kSpecialDataSize) {
@@ -165,7 +165,7 @@ void PostingListUsedIntegerIndexDataSerializer::PrependDataToEmpty(
}
libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependDataToNotFull(
+PostingListIntegerIndexSerializer::PrependDataToNotFull(
PostingListUsed* posting_list_used, const IntegerIndexData& data,
uint32_t offset) const {
IntegerIndexData cur;
@@ -193,8 +193,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependDataToNotFull(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PrependData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PrependData(
PostingListUsed* posting_list_used, const IntegerIndexData& data) const {
static_assert(
sizeof(BasicHit::Value) <= sizeof(uint64_t),
@@ -223,7 +222,7 @@ PostingListUsedIntegerIndexDataSerializer::PrependData(
}
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::PrependDataArray(
+uint32_t PostingListIntegerIndexSerializer::PrependDataArray(
PostingListUsed* posting_list_used, const IntegerIndexData* array,
uint32_t num_data, bool keep_prepended) const {
if (!IsPostingListValid(posting_list_used)) {
@@ -248,14 +247,14 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::PrependDataArray(
}
libtextclassifier3::StatusOr<std::vector<IntegerIndexData>>
-PostingListUsedIntegerIndexDataSerializer::GetData(
+PostingListIntegerIndexSerializer::GetData(
const PostingListUsed* posting_list_used) const {
std::vector<IntegerIndexData> data_arr_out;
ICING_RETURN_IF_ERROR(GetData(posting_list_used, &data_arr_out));
return data_arr_out;
}
-libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::GetData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetData(
const PostingListUsed* posting_list_used,
std::vector<IntegerIndexData>* data_arr_out) const {
return GetDataInternal(posting_list_used,
@@ -263,8 +262,7 @@ libtextclassifier3::Status PostingListUsedIntegerIndexDataSerializer::GetData(
/*pop=*/false, data_arr_out);
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::PopFrontData(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::PopFrontData(
PostingListUsed* posting_list_used, uint32_t num_data) const {
if (num_data == 1 && IsFull(posting_list_used)) {
// The PL is in FULL state which means that we save 2 uncompressed data in
@@ -345,8 +343,7 @@ PostingListUsedIntegerIndexDataSerializer::PopFrontData(
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status
-PostingListUsedIntegerIndexDataSerializer::GetDataInternal(
+libtextclassifier3::Status PostingListIntegerIndexSerializer::GetDataInternal(
const PostingListUsed* posting_list_used, uint32_t limit, bool pop,
std::vector<IntegerIndexData>* out) const {
// TODO(b/259743562): [Optimization 2] handle compressed data
@@ -404,8 +401,8 @@ PostingListUsedIntegerIndexDataSerializer::GetDataInternal(
return libtextclassifier3::Status::OK;
}
-PostingListUsedIntegerIndexDataSerializer::SpecialDataType
-PostingListUsedIntegerIndexDataSerializer::GetSpecialData(
+PostingListIntegerIndexSerializer::SpecialDataType
+PostingListIntegerIndexSerializer::GetSpecialData(
const PostingListUsed* posting_list_used, uint32_t index) const {
// It is ok to temporarily construct a SpecialData with offset = 0 since we're
// going to overwrite it by memcpy.
@@ -417,7 +414,7 @@ PostingListUsedIntegerIndexDataSerializer::GetSpecialData(
return special_data;
}
-void PostingListUsedIntegerIndexDataSerializer::SetSpecialData(
+void PostingListIntegerIndexSerializer::SetSpecialData(
PostingListUsed* posting_list_used, uint32_t index,
const SpecialDataType& special_data) const {
memcpy(posting_list_used->posting_list_buffer() +
@@ -425,7 +422,7 @@ void PostingListUsedIntegerIndexDataSerializer::SetSpecialData(
&special_data, sizeof(SpecialDataType));
}
-bool PostingListUsedIntegerIndexDataSerializer::IsPostingListValid(
+bool PostingListIntegerIndexSerializer::IsPostingListValid(
const PostingListUsed* posting_list_used) const {
if (IsAlmostFull(posting_list_used)) {
// Special data 1 should hold a valid data.
@@ -449,7 +446,7 @@ bool PostingListUsedIntegerIndexDataSerializer::IsPostingListValid(
return true;
}
-uint32_t PostingListUsedIntegerIndexDataSerializer::GetStartByteOffset(
+uint32_t PostingListIntegerIndexSerializer::GetStartByteOffset(
const PostingListUsed* posting_list_used) const {
if (IsFull(posting_list_used)) {
return 0;
@@ -460,7 +457,7 @@ uint32_t PostingListUsedIntegerIndexDataSerializer::GetStartByteOffset(
}
}
-bool PostingListUsedIntegerIndexDataSerializer::SetStartByteOffset(
+bool PostingListIntegerIndexSerializer::SetStartByteOffset(
PostingListUsed* posting_list_used, uint32_t offset) const {
if (offset > posting_list_used->size_in_bytes()) {
ICING_LOG(ERROR) << "offset cannot be a value greater than size "
@@ -497,7 +494,7 @@ bool PostingListUsedIntegerIndexDataSerializer::SetStartByteOffset(
}
libtextclassifier3::StatusOr<uint32_t>
-PostingListUsedIntegerIndexDataSerializer::PrependDataUncompressed(
+PostingListIntegerIndexSerializer::PrependDataUncompressed(
PostingListUsed* posting_list_used, const IntegerIndexData& data,
uint32_t offset) const {
if (offset < kSpecialDataSize + sizeof(IntegerIndexData)) {
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer.h b/icing/index/numeric/posting-list-integer-index-serializer.h
index 49007e3..9cfdb7a 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer.h
+++ b/icing/index/numeric/posting-list-integer-index-serializer.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
-#define ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
+#ifndef ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
+#define ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
#include <cstdint>
#include <vector>
@@ -28,8 +28,7 @@ namespace icing {
namespace lib {
// A serializer class to serialize IntegerIndexData to PostingListUsed.
-class PostingListUsedIntegerIndexDataSerializer
- : public PostingListUsedSerializer {
+class PostingListIntegerIndexSerializer : public PostingListSerializer {
public:
using SpecialDataType = SpecialData<IntegerIndexData>;
static_assert(sizeof(SpecialDataType) == sizeof(IntegerIndexData), "");
@@ -335,4 +334,4 @@ class PostingListUsedIntegerIndexDataSerializer
} // namespace lib
} // namespace icing
-#endif // ICING_INDEX_NUMERIC_POSTING_LIST_USED_INTEGER_INDEX_DATA_SERIALIZER_H_
+#endif // ICING_INDEX_NUMERIC_POSTING_LIST_INTEGER_INDEX_SERIALIZER_H_
diff --git a/icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
index c270137..d3d54ef 100644
--- a/icing/index/numeric/posting-list-used-integer-index-data-serializer_test.cc
+++ b/icing/index/numeric/posting-list-integer-index-serializer_test.cc
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/index/numeric/posting-list-used-integer-index-data-serializer.h"
+#include "icing/index/numeric/posting-list-integer-index-serializer.h"
#include <memory>
#include <vector>
@@ -39,9 +39,8 @@ namespace {
// without ALMOST_FULL) test cases, including for
// PopFrontData.
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- GetMinPostingListSizeToFitNotNull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitNotNull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -65,9 +64,9 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
Eq(3 * sizeof(IntegerIndexData)));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
GetMinPostingListSizeToFitAlmostFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -87,9 +86,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- GetMinPostingListSizeToFitFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, GetMinPostingListSizeToFitFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -113,8 +111,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used), Eq(size));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataNotFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataNotFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2551 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -151,8 +149,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataNotFull) {
IsOkAndHolds(ElementsAre(data2, data1, data0)));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataAlmostFull) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataAlmostFull) {
+ PostingListIntegerIndexSerializer serializer;
int size = 4 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -195,9 +193,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, PrependDataAlmostFull) {
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- PrependDataPostingListUsedMinSize) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataPostingListUsedMinSize) {
+ PostingListIntegerIndexSerializer serializer;
int size = serializer.GetMinPostingListSize();
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -233,9 +230,9 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
PrependDataArrayDoNotKeepPrepended) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -314,9 +311,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
- PrependDataArrayKeepPrepended) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PrependDataArrayKeepPrepended) {
+ PostingListIntegerIndexSerializer serializer;
int size = 6 * sizeof(IntegerIndexData);
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -371,8 +367,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_pushed.rbegin(), data_pushed.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveFrom) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, MoveFrom) {
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size);
@@ -412,9 +408,9 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveFrom) {
EXPECT_THAT(serializer.GetData(&pl_used1), IsOkAndHolds(IsEmpty()));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest,
+TEST(PostingListIntegerIndexSerializerTest,
MoveToNullReturnsFailedPrecondition) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+ PostingListIntegerIndexSerializer serializer;
int size = 3 * serializer.GetMinPostingListSize();
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);
@@ -443,8 +439,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest,
IsOkAndHolds(ElementsAreArray(data_arr.rbegin(), data_arr.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveToPostingListTooSmall) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, MoveToPostingListTooSmall) {
+ PostingListIntegerIndexSerializer serializer;
int size1 = 3 * serializer.GetMinPostingListSize();
std::unique_ptr<char[]> buf1 = std::make_unique<char[]>(size1);
@@ -486,8 +482,8 @@ TEST(PostingListUsedIntegerIndexDataSerializerTest, MoveToPostingListTooSmall) {
IsOkAndHolds(ElementsAreArray(data_arr2.rbegin(), data_arr2.rend())));
}
-TEST(PostingListUsedIntegerIndexDataSerializerTest, PopFrontData) {
- PostingListUsedIntegerIndexDataSerializer serializer;
+TEST(PostingListIntegerIndexSerializerTest, PopFrontData) {
+ PostingListIntegerIndexSerializer serializer;
int size = 2 * serializer.GetMinPostingListSize();
std::unique_ptr<char[]> buf = std::make_unique<char[]>(size);