aboutsummaryrefslogtreecommitdiff
path: root/icing/index/lite
diff options
context:
space:
mode:
authorGrace Zhao <gracezrx@google.com>2023-08-30 23:54:01 -0700
committerGrace Zhao <gracezrx@google.com>2023-08-30 23:54:01 -0700
commit6293cd0a843b1e5b09086d9010ec863556d0c1ba (patch)
tree8834036305dca128017ee06f9b2d3d157da54c62 /icing/index/lite
parent8c71e61d02944611249c892236e67c6acace8a2d (diff)
downloadicing-6293cd0a843b1e5b09086d9010ec863556d0c1ba.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== [fixit] Remove incorrect reserve usage for STL data structure ======================================================================== Move LiteIndex::SortHits() from the query path to the indexing path ======================================================================== Bug: 286418010 Bug: 297549761 Bug: 248295995 Change-Id: I46168178e0227451b0f7dbf394bb6cc5db94e1e8
Diffstat (limited to 'icing/index/lite')
-rw-r--r--icing/index/lite/lite-index-options.cc10
-rw-r--r--icing/index/lite/lite-index-options.h6
-rw-r--r--icing/index/lite/lite-index.cc269
-rw-r--r--icing/index/lite/lite-index.h83
-rw-r--r--icing/index/lite/lite-index_test.cc641
-rw-r--r--icing/index/lite/lite-index_thread-safety_test.cc7
-rw-r--r--icing/index/lite/term-id-hit-pair.h2
7 files changed, 859 insertions, 159 deletions
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 29075f8..8780d45 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -14,6 +14,8 @@
#include "icing/index/lite/lite-index-options.h"
+#include <cstdint>
+
#include "icing/index/lite/term-id-hit-pair.h"
namespace icing {
@@ -64,9 +66,13 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes)
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes)
: filename_base(filename_base),
- hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes) {
+ hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
+ hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index ae58802..9f8452c 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -27,7 +27,9 @@ struct LiteIndexOptions {
// hit_buffer_want_merge_bytes and the logic in CalculateHitBufferSize and
// CalculateTrieOptions.
LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes);
+ uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing,
+ uint32_t hit_buffer_sort_threshold_bytes);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -35,6 +37,8 @@ struct LiteIndexOptions {
std::string filename_base;
uint32_t hit_buffer_want_merge_bytes = 0;
uint32_t hit_buffer_size = 0;
+ bool hit_buffer_sort_at_indexing = false;
+ uint32_t hit_buffer_sort_threshold_bytes = 0;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index bf54dec..ec7141a 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -36,6 +36,8 @@
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/hit/hit.h"
#include "icing/index/lite/lite-index-header.h"
+#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/legacy/core/icing-timer.h"
@@ -44,10 +46,13 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mmapper.h"
#include "icing/proto/debug.pb.h"
+#include "icing/proto/scoring.pb.h"
#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
+#include "icing/store/suggestion-result-checker.h"
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -160,7 +165,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
}
// Set up header.
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -175,7 +180,7 @@ libtextclassifier3::Status LiteIndex::Initialize() {
UpdateChecksum();
} else {
- header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+ header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
header_mmap_.address()));
@@ -352,6 +357,73 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
return tvi;
}
+void LiteIndex::ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const {
+ // Check sections.
+ if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
+ return;
+ }
+ // Check prefix section only.
+ if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+ return;
+ }
+ // Check whether this Hit is desired.
+ // TODO(b/230553264) Move common logic into helper function once we support
+ // score term by prefix_hit in lite_index.
+ DocumentId document_id = hit.document_id();
+ bool is_new_document = document_id != last_document_id;
+ if (is_new_document) {
+ last_document_id = document_id;
+ is_last_document_desired =
+ suggestion_result_checker == nullptr ||
+ suggestion_result_checker->BelongsToTargetResults(document_id,
+ hit.section_id());
+ }
+ if (!is_last_document_desired) {
+ // The document is removed or expired or not desired.
+ return;
+ }
+
+ // Score the hit by the strategy
+ switch (score_by) {
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
+ total_score_out = 1;
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT:
+ if (is_new_document) {
+ ++total_score_out;
+ }
+ break;
+ case SuggestionScoringSpecProto::SuggestionRankingStrategy::TERM_FREQUENCY:
+ if (hit.has_term_frequency()) {
+ total_score_out += hit.term_frequency();
+ } else {
+ ++total_score_out;
+ }
+ break;
+ }
+
+ // Append the Hit or update hit section to the output vector.
+ if (is_new_document && hits_out != nullptr) {
+ hits_out->push_back(DocHitInfo(document_id));
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->push_back(Hit::TermFrequencyArray());
+ }
+ }
+ if (hits_out != nullptr) {
+ hits_out->back().UpdateSection(hit.section_id());
+ if (term_frequency_out != nullptr) {
+ term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
+ }
+ }
+}
+
int LiteIndex::FetchHits(
uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
@@ -359,19 +431,38 @@ int LiteIndex::FetchHits(
const SuggestionResultChecker* suggestion_result_checker,
std::vector<DocHitInfo>* hits_out,
std::vector<Hit::TermFrequencyArray>* term_frequency_out) {
- int score = 0;
- DocumentId last_document_id = kInvalidDocumentId;
- // Record whether the last document belongs to the given namespaces.
- bool is_last_document_desired = false;
-
- if (NeedSort()) {
- // Transition from shared_lock in NeedSort to unique_lock here is safe
- // because it doesn't hurt to sort again if sorting was done already by
- // another thread after NeedSort is evaluated. NeedSort is called before
- // sorting to improve concurrency as threads can avoid acquiring the unique
- // lock if no sorting is needed.
+ bool need_sort_at_querying = false;
+ {
+ absl_ports::shared_lock l(&mutex_);
+
+ // We sort here when:
+ // 1. We don't enable sorting at indexing time (i.e. we sort at querying
+ // time), and there is an unsorted tail portion. OR
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold,
+ // regardless of whether or not hit_buffer_sort_at_indexing is enabled.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ need_sort_at_querying = NeedSortAtQuerying();
+ }
+ if (need_sort_at_querying) {
absl_ports::unique_lock l(&mutex_);
- SortHits();
+ IcingTimer timer;
+
+ // Transition from shared_lock to unique_lock is safe here because it
+ // doesn't hurt to sort again if sorting was done already by another thread
+ // after need_sort_at_querying is evaluated.
+ // We check need_sort_at_querying to improve query concurrency as threads
+ // can avoid acquiring the unique lock if no sorting is needed.
+ SortHitsImpl();
+
+ if (options_.hit_buffer_sort_at_indexing) {
+ // This is the second case for sort. Log as this should be a very rare
+ // occasion.
+ ICING_LOG(WARNING) << "Sorting HitBuffer at querying time when "
+ "hit_buffer_sort_at_indexing is enabled. Sort and "
+ "merge HitBuffer in "
+ << timer.Elapsed() * 1000 << " ms.";
+ }
}
// This downgrade from an unique_lock to a shared_lock is safe because we're
@@ -379,75 +470,72 @@ int LiteIndex::FetchHits(
// only in Seek().
// Any operations that might execute in between the transition of downgrading
// the lock here are guaranteed not to alter the searchable section (or the
- // LiteIndex due to a global lock in IcingSearchEngine).
+ // LiteIndex) due to a global lock in IcingSearchEngine.
absl_ports::shared_lock l(&mutex_);
- for (uint32_t idx = Seek(term_id); idx < header_->searchable_end(); idx++) {
- TermIdHitPair term_id_hit_pair =
- hit_buffer_.array_cast<TermIdHitPair>()[idx];
- if (term_id_hit_pair.term_id() != term_id) break;
-
- const Hit& hit = term_id_hit_pair.hit();
- // Check sections.
- if (((UINT64_C(1) << hit.section_id()) & section_id_mask) == 0) {
- continue;
- }
- // Check prefix section only.
- if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
- continue;
- }
- // TODO(b/230553264) Move common logic into helper function once we support
- // score term by prefix_hit in lite_index.
- // Check whether this Hit is desired.
- DocumentId document_id = hit.document_id();
- bool is_new_document = document_id != last_document_id;
- if (is_new_document) {
- last_document_id = document_id;
- is_last_document_desired =
- suggestion_result_checker == nullptr ||
- suggestion_result_checker->BelongsToTargetResults(document_id,
- hit.section_id());
- }
- if (!is_last_document_desired) {
- // The document is removed or expired or not desired.
- continue;
- }
- // Score the hit by the strategy
- switch (score_by) {
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::NONE:
- score = 1;
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- DOCUMENT_COUNT:
- if (is_new_document) {
- ++score;
- }
- break;
- case SuggestionScoringSpecProto::SuggestionRankingStrategy::
- TERM_FREQUENCY:
- if (hit.has_term_frequency()) {
- score += hit.term_frequency();
- } else {
- ++score;
- }
- break;
- }
+ // Search in the HitBuffer array for Hits with the corresponding term_id.
+ // Hits are added in increasing order of doc ids, so hits that get appended
+ // later have larger docIds. This means that:
+ // 1. Hits in the unsorted tail will have larger docIds than hits in the
+ // sorted portion.
+ // 2. Hits at the end of the unsorted tail will have larger docIds than hits
+ // in the front of the tail.
+ // We want to retrieve hits in descending order of docIds. Therefore we should
+ // search by doing:
+ // 1. Linear search first in reverse iteration order over the unsorted tail
+ // portion.
+ // 2. Followed by binary search on the sorted portion.
+ const TermIdHitPair* array = hit_buffer_.array_cast<TermIdHitPair>();
- // Append the Hit or update hit section to the output vector.
- if (is_new_document && hits_out != nullptr) {
- hits_out->push_back(DocHitInfo(document_id));
- if (term_frequency_out != nullptr) {
- term_frequency_out->push_back(Hit::TermFrequencyArray());
+ DocumentId last_document_id = kInvalidDocumentId;
+ // Record whether the last document belongs to the given namespaces.
+ bool is_last_document_desired = false;
+ int total_score = 0;
+
+ // Linear search over unsorted tail in reverse iteration order.
+ // This should only be performed when hit_buffer_sort_at_indexing is enabled.
+ // When disabled, the entire HitBuffer should be sorted already and only
+ // binary search is needed.
+ if (options_.hit_buffer_sort_at_indexing) {
+ uint32_t unsorted_length = header_->cur_size() - header_->searchable_end();
+ for (uint32_t i = 1; i <= unsorted_length; ++i) {
+ TermIdHitPair term_id_hit_pair = array[header_->cur_size() - i];
+ if (term_id_hit_pair.term_id() == term_id) {
+ // We've found a matched hit.
+ const Hit& matched_hit = term_id_hit_pair.hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors
+ // are non-null.
+ ScoreAndAppendFetchedHit(matched_hit, section_id_mask,
+ only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id,
+ is_last_document_desired, total_score,
+ hits_out, term_frequency_out);
}
}
- if (hits_out != nullptr) {
- hits_out->back().UpdateSection(hit.section_id());
- if (term_frequency_out != nullptr) {
- term_frequency_out->back()[hit.section_id()] = hit.term_frequency();
- }
+ }
+
+ // Do binary search over the sorted section and repeat the above steps.
+ TermIdHitPair target_term_id_hit_pair(
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
+ for (const TermIdHitPair* ptr = std::lower_bound(
+ array, array + header_->searchable_end(), target_term_id_hit_pair);
+ ptr < array + header_->searchable_end(); ++ptr) {
+ if (ptr->term_id() != term_id) {
+ // We've processed all matches. Stop iterating further.
+ break;
}
+
+ const Hit& matched_hit = ptr->hit();
+ // Score the hit and add to total_score. Also add the hits and its term
+ // frequency info to hits_out and term_frequency_out if the two vectors are
+ // non-null.
+ ScoreAndAppendFetchedHit(
+ matched_hit, section_id_mask, only_from_prefix_sections, score_by,
+ suggestion_result_checker, last_document_id, is_last_document_desired,
+ total_score, hits_out, term_frequency_out);
}
- return score;
+ return total_score;
}
libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
@@ -455,9 +543,9 @@ libtextclassifier3::StatusOr<int> LiteIndex::ScoreHits(
SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
const SuggestionResultChecker* suggestion_result_checker) {
return FetchHits(term_id, kSectionIdMaskAll,
- /*only_from_prefix_sections=*/false, score_by,
- suggestion_result_checker,
- /*hits_out=*/nullptr);
+ /*only_from_prefix_sections=*/false, score_by,
+ suggestion_result_checker,
+ /*hits_out=*/nullptr);
}
bool LiteIndex::is_full() const {
@@ -515,7 +603,7 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
return storage_info;
}
-void LiteIndex::SortHits() {
+void LiteIndex::SortHitsImpl() {
// Make searchable by sorting by hit buffer.
uint32_t sort_len = header_->cur_size() - header_->searchable_end();
if (sort_len <= 0) {
@@ -546,25 +634,6 @@ void LiteIndex::SortHits() {
UpdateChecksum();
}
-uint32_t LiteIndex::Seek(uint32_t term_id) const {
- // Binary search for our term_id. Make sure we get the first
- // element. Using kBeginSortValue ensures this for the hit value.
- TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
-
- const TermIdHitPair::Value* array =
- hit_buffer_.array_cast<TermIdHitPair::Value>();
- if (header_->searchable_end() != header_->cur_size()) {
- ICING_LOG(WARNING) << "Lite index: hit buffer searchable end != current "
- << "size during Seek(): "
- << header_->searchable_end() << " vs "
- << header_->cur_size();
- }
- const TermIdHitPair::Value* ptr = std::lower_bound(
- array, array + header_->searchable_end(), term_id_hit_pair.value());
- return ptr - array;
-}
-
libtextclassifier3::Status LiteIndex::Optimize(
const std::vector<DocumentId>& document_id_old_to_new,
const TermIdCodec* term_id_codec, DocumentId new_last_added_document_id) {
@@ -575,7 +644,7 @@ libtextclassifier3::Status LiteIndex::Optimize(
}
// Sort the hits so that hits with the same term id will be grouped together,
// which helps later to determine which terms will be unused after compaction.
- SortHits();
+ SortHitsImpl();
uint32_t new_size = 0;
uint32_t curr_term_id = 0;
uint32_t curr_tvi = 0;
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 916a14b..288602a 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -20,6 +20,7 @@
#define ICING_INDEX_LITE_INDEX_H_
#include <cstdint>
+#include <iterator>
#include <limits>
#include <memory>
#include <string>
@@ -48,7 +49,6 @@
#include "icing/store/document-id.h"
#include "icing/store/namespace-id.h"
#include "icing/store/suggestion-result-checker.h"
-#include "icing/util/bit-util.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -63,6 +63,9 @@ class LiteIndex {
// An entry in the hit buffer.
using Options = LiteIndexOptions;
+ // Offset for the LiteIndex_Header in the hit buffer mmap.
+ static constexpr uint32_t kHeaderFileOffset = 0;
+
// Updates checksum of subcomponents.
~LiteIndex();
@@ -152,8 +155,8 @@ class LiteIndex {
// Add all hits with term_id from the sections specified in section_id_mask,
// skipping hits in non-prefix sections if only_from_prefix_sections is true,
// to hits_out. If hits_out is nullptr, no hits will be added. The
- // corresponding hit term frequencies will also be added if term_frequency_out
- // is nullptr.
+ // corresponding hit term frequencies will also not be added if
+ // term_frequency_out is nullptr.
//
// Only those hits which belongs to the given namespaces will be counted and
// fetched. A nullptr namespace checker will disable this check.
@@ -181,15 +184,29 @@ class LiteIndex {
uint32_t size() const ICING_LOCKS_EXCLUDED(mutex_) {
absl_ports::shared_lock l(&mutex_);
- return sizeLocked();
+ return size_impl();
}
bool WantsMerge() const ICING_LOCKS_EXCLUDED(mutex_) {
absl_ports::shared_lock l(&mutex_);
- return is_full() || sizeLocked() >= (options_.hit_buffer_want_merge_bytes /
- sizeof(TermIdHitPair::Value));
+ return is_full() || size_impl() >= (options_.hit_buffer_want_merge_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
+
+ // Whether or not the HitBuffer's unsorted tail size exceeds the sort
+ // threshold.
+ bool HasUnsortedHitsExceedingSortThreshold() const
+ ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::shared_lock l(&mutex_);
+ return HasUnsortedHitsExceedingSortThresholdImpl();
}
+ // Sort hits stored in the index.
+ void SortHits() ICING_LOCKS_EXCLUDED(mutex_) {
+ absl_ports::unique_lock l(&mutex_);
+ SortHitsImpl();
+ };
+
class const_iterator {
friend class LiteIndex;
@@ -326,17 +343,13 @@ class LiteIndex {
// Check if the hit buffer has reached its capacity.
bool is_full() const ICING_SHARED_LOCKS_REQUIRED(mutex_);
- uint32_t sizeLocked() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
- return header_->cur_size();
- }
-
// Non-locking implementation for empty().
bool empty_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return size_impl() == 0;
}
// Non-locking implementation for size().
- bool size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ uint32_t size_impl() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
return header_->cur_size();
}
@@ -352,18 +365,48 @@ class LiteIndex {
NamespaceId namespace_id)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Whether or not the HitBuffer requires sorting.
- bool NeedSort() ICING_LOCKS_EXCLUDED(mutex_) {
- absl_ports::shared_lock l(&mutex_);
- return header_->cur_size() - header_->searchable_end() > 0;
+ // We need to sort during querying time when:
+ // 1. Sorting at indexing time is not enabled and there is an unsorted tail
+ // section in the HitBuffer.
+ // 2. The unsorted tail size exceeds the hit_buffer_sort_threshold, regardless
+ // of whether or not hit_buffer_sort_at_indexing is enabled. This is to
+ // prevent performing sequential search on a large unsorted tail section,
+ // which would result in bad query performance.
+ // This is more of a sanity check. We should not really be encountering
+ // this case.
+ bool NeedSortAtQuerying() const ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return HasUnsortedHitsExceedingSortThresholdImpl() ||
+ (!options_.hit_buffer_sort_at_indexing &&
+ header_->cur_size() - header_->searchable_end() > 0);
}
- // Sort hits stored in the index.
- void SortHits() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ // Non-locking implementation for HasUnsortedHitsExceedingSortThresholdImpl().
+ bool HasUnsortedHitsExceedingSortThresholdImpl() const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_) {
+ return header_->cur_size() - header_->searchable_end() >=
+ (options_.hit_buffer_sort_threshold_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
- // Returns the position of the first element with term_id, or the searchable
- // end of the hit buffer if term_id is not present.
- uint32_t Seek(uint32_t term_id) const ICING_SHARED_LOCKS_REQUIRED(mutex_);
+ // Non-locking implementation for SortHits().
+ void SortHitsImpl() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Calculates and adds the score for a fetched hit to total_score_out, while
+ // updating last_document_id (which keeps track of the last added docId so
+ // far), and is_last_document_desired (which keeps track of whether that last
+ // added docId belongs to the query's desired namespace.)
+ //
+ // Also appends the hit to hits_out and term_frequency_out if the vectors are
+ // not null.
+ void ScoreAndAppendFetchedHit(
+ const Hit& hit, SectionIdMask section_id_mask,
+ bool only_from_prefix_sections,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::Code score_by,
+ const SuggestionResultChecker* suggestion_result_checker,
+ DocumentId& last_document_id, bool& is_last_document_desired,
+ int& total_score_out, std::vector<DocHitInfo>* hits_out,
+ std::vector<Hit::TermFrequencyArray>* term_frequency_out) const
+ ICING_SHARED_LOCKS_REQUIRED(mutex_);
// File descriptor that points to where the header and hit buffer are written
// to.
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 5f141ed..9811fa2 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc
@@ -14,14 +14,27 @@
#include "icing/index/lite/lite-index.h"
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
+#include "icing/index/lite/lite-index-header.h"
#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
-#include "icing/store/suggestion-result-checker.h"
+#include "icing/store/namespace-id.h"
#include "icing/testing/always-false-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -34,6 +47,8 @@ namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
using ::testing::SizeIs;
class LiteIndexTest : public testing::Test {
@@ -41,62 +56,518 @@ class LiteIndexTest : public testing::Test {
void SetUp() override {
index_dir_ = GetTestTempDir() + "/test_dir";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
-
- std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
- LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
- LiteIndex::Create(options, &icing_filesystem_));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- term_id_codec_,
- TermIdCodec::Create(
- IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
- IcingDynamicTrie::max_value_index(options.lexicon_options)));
}
void TearDown() override {
term_id_codec_.reset();
- lite_index_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
}
std::string index_dir_;
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
- std::unique_ptr<LiteIndex> lite_index_;
std::unique_ptr<TermIdCodec> term_id_codec_;
};
constexpr NamespaceId kNamespace0 = 0;
-TEST_F(LiteIndexTest, LiteIndexAppendHits) {
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
ICING_ASSERT_OK_AND_ASSIGN(
- uint32_t tvi,
- lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
- term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- Hit doc_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
std::vector<DocHitInfo> hits1;
- lite_index_->FetchHits(
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
/*namespace_checker=*/nullptr, &hits1);
EXPECT_THAT(hits1, SizeIs(1));
- EXPECT_THAT(hits1.back().document_id(), Eq(0));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
// Check that the hits are coming from section 0 and section 1.
EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
std::vector<DocHitInfo> hits2;
AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
- lite_index_->FetchHits(
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are sorted after querying LiteIndex. Persist the data and
+ // pread from LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+}
+
+TEST_F(LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsBelowSortThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Add some hits
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
+
+ // Check that unsorted hits does not exceed the sort threshold.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+
+ // Check that hits are unsorted. Persist the data and pread from
+ // LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+
+ // Query the LiteIndex
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(1));
+ EXPECT_THAT(hits1.back().document_id(), Eq(1));
+ // Check that the hits are coming from section 0 and section 1.
+ EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ // Check that hits are still unsorted after querying LiteIndex because the
+ // HitBuffer unsorted size is still below the sort threshold, and we've
+ // enabled sort_at_indexing.
+ // Persist the data and performing a pread on LiteIndexHeader.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(4));
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtQuerying_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ // However note that in these tests we're unable to sort hits after
+ // indexing, as sorting performed by the string-section-indexing-handler
+ // after indexing all hits in an entire document, rather than after each
+ // AddHits() operation.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add 14 hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ // Verify that the HitBuffer has not been sorted.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0)}
+ // bar: {(0, 0); (1, 0); (1, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0)}
+ // quz: {(0, 2); (2, 1)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(4));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(3));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits1[1].document_id(), Eq(2));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[2].document_id(), Eq(1));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[3].document_id(), Eq(0));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ &always_false_suggestion_result_checker, &hits2);
+ // Check that no hits are returned because they get skipped by the namespace
+ // checker.
+ EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+}
+
+TEST_F(
+ LiteIndexTest,
+ LiteIndexFetchHits_sortAtIndexing_unsortedHitsExceedingSortAtIndexThreshold) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
+ // Doc 0
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 1
+ Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 2
+ Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 3
+ Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit2(/*section_id=*/1, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc3_hit3(/*section_id=*/2, /*document_id=*/3, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ // Doc 4
+ Hit doc4_hit0(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit1(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit2(/*section_id=*/1, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+ Hit doc4_hit3(/*section_id=*/2, /*document_id=*/4, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false);
+
+ // Create terms
+ // Foo
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t foo_tvi,
+ lite_index->InsertTerm("foo", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
+ // Bar
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t bar_tvi,
+ lite_index->InsertTerm("bar", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
+ term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
+ // Baz
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t baz_tvi,
+ lite_index->InsertTerm("baz", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t baz_term_id,
+ term_id_codec_->EncodeTvi(baz_tvi, TviType::LITE));
+ // Qux
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t qux_tvi,
+ lite_index->InsertTerm("qux", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t qux_term_id,
+ term_id_codec_->EncodeTvi(qux_tvi, TviType::LITE));
+
+ // Add hits and make sure that termIds are added in unsorted order.
+ // Documents should be inserted in order as new incoming hits should have
+ // larger document ids.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc0_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc0_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc1_hit3));
+ // Adding 8 hits exceeds the sort threshold. However when sort_at_indexing is
+ // enabled, sorting is done in the string-section-indexing-handler rather than
+ // AddHit() itself, we need to invoke SortHits() manually.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+ lite_index->SortHits();
+ // Check that the HitBuffer is sorted.
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ LiteIndex_HeaderImpl::HeaderData header_data;
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
+
+ // Add 12 more hits so that sort threshold is exceeded again.
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc2_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc2_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc2_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc3_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc3_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc3_hit3));
+ ICING_ASSERT_OK(lite_index->AddHit(baz_term_id, doc4_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(qux_term_id, doc4_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc4_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, doc4_hit3));
+
+ // Adding these hits exceeds the sort threshold. However when sort_at_indexing
+ // is enabled, sorting is done in the string-section-indexing-handler rather
+ // than AddHit() itself.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsTrue());
+
+ // We now have the following in the hit buffer:
+ // <term>: {(docId, sectionId)...}
+ // foo: {(0, 0); (1, 0); (1, 1); (2, 0); (2, 2); (3, 0); (3, 1); (4, 1)}
+ // bar: {(0, 0); (1, 0); (1, 2); (3, 2); (4, 2)}
+ // baz: {(0, 1); (2, 0); (3, 0); (4, 0)}
+ // quz: {(0, 2); (2, 1); (4, 0)}
+
+ // Search over the HitBuffer.
+ std::vector<DocHitInfo> hits1;
+ lite_index->FetchHits(
+ foo_term_id, kSectionIdMaskAll,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits1);
+ EXPECT_THAT(hits1, SizeIs(5));
+ // Check that hits are retrieved in descending order of docIds.
+ EXPECT_THAT(hits1[0].document_id(), Eq(4));
+ EXPECT_THAT(hits1[0].hit_section_ids_mask(), Eq(0b10));
+ EXPECT_THAT(hits1[1].document_id(), Eq(3));
+ EXPECT_THAT(hits1[1].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[2].document_id(), Eq(2));
+ EXPECT_THAT(hits1[2].hit_section_ids_mask(), Eq(0b101));
+ EXPECT_THAT(hits1[3].document_id(), Eq(1));
+ EXPECT_THAT(hits1[3].hit_section_ids_mask(), Eq(0b11));
+ EXPECT_THAT(hits1[4].document_id(), Eq(0));
+ EXPECT_THAT(hits1[4].hit_section_ids_mask(), Eq(0b1));
+
+ std::vector<DocHitInfo> hits2;
+ AlwaysFalseSuggestionResultCheckerImpl always_false_suggestion_result_checker;
+ lite_index->FetchHits(
foo_term_id, kSectionIdMaskAll,
/*only_from_prefix_sections=*/false,
SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
@@ -104,13 +575,119 @@ TEST_F(LiteIndexTest, LiteIndexAppendHits) {
// Check that no hits are returned because they get skipped by the namespace
// checker.
EXPECT_THAT(hits2, IsEmpty());
+
+ std::vector<DocHitInfo> hits3;
+ lite_index->FetchHits(
+ bar_term_id, 0b1,
+ /*only_from_prefix_sections=*/false,
+ SuggestionScoringSpecProto::SuggestionRankingStrategy::DOCUMENT_COUNT,
+ /*namespace_checker=*/nullptr, &hits3);
+ EXPECT_THAT(hits3, SizeIs(2));
+ // Check fetching hits with SectionIdMask.
+ EXPECT_THAT(hits3[0].document_id(), Eq(1));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+ EXPECT_THAT(hits3[1].document_id(), Eq(0));
+ EXPECT_THAT(hits3[1].hit_section_ids_mask(), Eq(0b1));
+
+ // Check that the HitBuffer is sorted after the query call. FetchHits should
+ // sort before performing binary search if the HitBuffer unsorted size exceeds
+ // the sort threshold. Regardless of the sort_at_indexing config.
+ EXPECT_THAT(lite_index->HasUnsortedHitsExceedingSortThreshold(), IsFalse());
+ ASSERT_THAT(lite_index->PersistToDisk(), IsOk());
+ ASSERT_TRUE(filesystem_.PRead((lite_index_file_name + "hb").c_str(),
+ &header_data, sizeof(header_data),
+ LiteIndex::kHeaderFileOffset));
+ EXPECT_THAT(header_data.cur_size - header_data.searchable_end, Eq(0));
}
TEST_F(LiteIndexTest, LiteIndexIterator) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
+ const std::string term = "foo";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+ Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
+ /*is_in_prefix_section=*/false);
+ Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc0_section_id_mask = 0b11;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
+
+ Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
+ /*is_in_prefix_section=*/false);
+ Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
+ /*is_in_prefix_section=*/false);
+ SectionIdMask doc1_section_id_mask = 0b110;
+ std::unordered_map<SectionId, Hit::TermFrequency>
+ expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
+
+ std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
+ std::make_unique<DocHitInfoIteratorTermLiteExact>(
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
+ /*unnormalized_term_length=*/0, kSectionIdMaskAll,
+ /*need_hit_term_frequency=*/true);
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(1));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc1_section_id_mask));
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map1)));
+
+ ASSERT_THAT(iter->Advance(), IsOk());
+ EXPECT_THAT(iter->doc_hit_info().document_id(), Eq(0));
+ EXPECT_THAT(iter->doc_hit_info().hit_section_ids_mask(),
+ Eq(doc0_section_id_mask));
+ matched_terms_stats.clear();
+ iter->PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
+ term, expected_section_ids_tf_map0)));
+}
+
+TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
+ // Set up LiteIndex and TermIdCodec
+ std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+ // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+ LiteIndex::Options options(lite_index_file_name,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/false,
+ /*hit_buffer_sort_threshold_bytes=*/64);
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ term_id_codec_,
+ TermIdCodec::Create(
+ IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+ IcingDynamicTrie::max_value_index(options.lexicon_options)));
+
const std::string term = "foo";
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t tvi,
- lite_index_->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
+ lite_index->InsertTerm(term, TermMatchType::PREFIX, kNamespace0));
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
@@ -120,8 +697,8 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc0_section_id_mask = 0b11;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit0));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc0_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit0));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
/*is_in_prefix_section=*/false);
@@ -130,12 +707,12 @@ TEST_F(LiteIndexTest, LiteIndexIterator) {
SectionIdMask doc1_section_id_mask = 0b110;
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit1));
- ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit2));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit1));
+ ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc1_hit2));
std::unique_ptr<DocHitInfoIteratorTermLiteExact> iter =
std::make_unique<DocHitInfoIteratorTermLiteExact>(
- term_id_codec_.get(), lite_index_.get(), term, /*term_start_index=*/0,
+ term_id_codec_.get(), lite_index.get(), term, /*term_start_index=*/0,
/*unnormalized_term_length=*/0, kSectionIdMaskAll,
/*need_hit_term_frequency=*/true);
diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
index 7711f92..53aa6cd 100644
--- a/icing/index/lite/lite-index_thread-safety_test.cc
+++ b/icing/index/lite/lite-index_thread-safety_test.cc
@@ -19,12 +19,9 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/schema/section.h"
-#include "icing/store/suggestion-result-checker.h"
-#include "icing/testing/always-false-suggestion-result-checker-impl.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -52,7 +49,9 @@ class LiteIndexThreadSafetyTest : public testing::Test {
std::string lite_index_file_name =
index_dir_ + "/test_file.lite-idx-thread-safety.index";
LiteIndex::Options options(lite_index_file_name,
- /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+ /*hit_buffer_sort_at_indexing=*/true,
+ /*hit_buffer_sort_threshold_bytes=*/64);
ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
LiteIndex::Create(options, &icing_filesystem_));
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 61ec502..82bd010 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -73,6 +73,8 @@ class TermIdHitPair {
return value_ == rhs.value_;
}
+ bool operator<(const TermIdHitPair& rhs) const { return value_ < rhs.value_; }
+
private:
Value value_;
};