aboutsummaryrefslogtreecommitdiff
path: root/icing/scoring/priority-queue-scored-document-hits-ranker.h
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2022-12-12 18:03:05 -0800
committerTim Barron <tjbarron@google.com>2022-12-12 18:03:05 -0800
commit8ddc32ad433ea147de80dcfac2afe58962360f18 (patch)
tree50c30cb98396499bf1d6caf33b383f7f4bbc7e58 /icing/scoring/priority-queue-scored-document-hits-ranker.h
parent2658f90984737e5bf6c76d82024103dccd4d51c6 (diff)
downloadicing-8ddc32ad433ea147de80dcfac2afe58962360f18.tar.gz
Sync from upstream.
Descriptions: ====================================================================== Add ScoringSpec into JoinSpec. Rename joined_document to child_document. ====================================================================== Create JoinedScoredDocumentHit class and refactor ScoredDocumentHitsRanker. ====================================================================== Implement initial Join workflow ====================================================================== Implement the Lexer for Icing Advanced Query Language ====================================================================== Create struct Options for PersistentHashMap ====================================================================== Premapping FileBackedVector ====================================================================== Create class PersistentHashMapKeyMapper ====================================================================== Add integer sections into TokenizedDocument and rename string sections ====================================================================== Create NumericIndex interface and DocHitInfoIteratorNumeric ====================================================================== Implement DummyNumericIndex and unit test ====================================================================== Change PostingListAccessor::Finalize to rvalue member function ====================================================================== Define the Abstract Syntax Tree for Icing's list_filter parser. ====================================================================== Refactor query processing and score ====================================================================== Refactor IcingSearchEngine for AppSearch Dynamite Module 0p APIs ====================================================================== Implement the Lexer for Icing Advanced Scoring Language ====================================================================== Add a common interface for IcingSearchEngine and dynamite client ====================================================================== Implement a subset of the query grammar. ====================================================================== Refactor index processor ====================================================================== Add integer index into IcingSearchEngine and IndexProcessor ====================================================================== Implement the parser for Icing Advanced Scoring Language ====================================================================== Implement IntegerIndexData and PostingListUsedIntegerIndexDataSerializer ====================================================================== Add PostingListAccessor abstract class for common components and methods ====================================================================== Implement PostingListIntegerIndexDataAccessor ====================================================================== Create PostingListIntegerIndexDataAccessorTest ====================================================================== Fix Icing Segmentation tests for word connectors that changed in ICU 72. ====================================================================== Modify the Advanced Query grammar to allow functions to accept expressions. ====================================================================== Implement QueryVisitor. ====================================================================== Enable the Advanced Query Parser to handle member functions ====================================================================== Refactor the Scorer class to support the Advanced Scoring Language ====================================================================== Integrate advanced query parser with the query processor. ====================================================================== Implement support for JoinSpec in Icing. ====================================================================== Implement the Advanced Scoring Language for basic functions and operators ====================================================================== Bug: 208654892 Bug: 249829533 Bug: 256022027 Bug: 261474063 Bug: 240333360 Bug: 193919210 Change-Id: I5f5bdc6249282ecc4b014b4fbdf8e2d1f8b20c19
Diffstat (limited to 'icing/scoring/priority-queue-scored-document-hits-ranker.h')
-rw-r--r--icing/scoring/priority-queue-scored-document-hits-ranker.h69
1 files changed, 59 insertions, 10 deletions
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.h b/icing/scoring/priority-queue-scored-document-hits-ranker.h
index 3ef2ae5..0798d7d 100644
--- a/icing/scoring/priority-queue-scored-document-hits-ranker.h
+++ b/icing/scoring/priority-queue-scored-document-hits-ranker.h
@@ -26,21 +26,37 @@ namespace lib {
// ScoredDocumentHitsRanker interface implementation, based on
// std::priority_queue. We can get next top hit in O(lgN) time.
+template <typename ScoredDataType,
+ typename Converter = typename ScoredDataType::Converter>
class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
public:
explicit PriorityQueueScoredDocumentHitsRanker(
- std::vector<ScoredDocumentHit>&& scored_document_hits,
- bool is_descending = true);
+ std::vector<ScoredDataType>&& scored_data_vec, bool is_descending = true);
~PriorityQueueScoredDocumentHitsRanker() override = default;
- ScoredDocumentHit PopNext() override;
+ // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit, so we
+ // have template for scored_data_pq_.
+ // - JoinedScoredDocumentHit is a superset of ScoredDocumentHit, so we unify
+ // the return type of PopNext to use the superset type
+ // JoinedScoredDocumentHit in order to make it simple, and rankers storing
+ // ScoredDocumentHit should convert it to JoinedScoredDocumentHit before
+ // returning. It makes the implementation simpler, especially for
+ // ResultRetriever, which now only needs to deal with one single return
+ // format.
+ // - JoinedScoredDocumentHit has ~2x size of ScoredDocumentHit. Since we cache
+ // ranker (which contains a priority queue of data) in ResultState, if we
+ // store the scored hits in JoinedScoredDocumentHit format directly, then it
+ // doubles the memory usage. Therefore, we still keep the flexibility to
+ // store ScoredDocumentHit or any other types of data, but require PopNext
+ // to convert it to JoinedScoredDocumentHit.
+ JoinedScoredDocumentHit PopNext() override;
void TruncateHitsTo(int new_size) override;
- int size() const override { return scored_document_hits_pq_.size(); }
+ int size() const override { return scored_data_pq_.size(); }
- bool empty() const override { return scored_document_hits_pq_.empty(); }
+ bool empty() const override { return scored_data_pq_.empty(); }
private:
// Comparator for std::priority_queue. Since std::priority is a max heap
@@ -49,8 +65,8 @@ class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
public:
explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {}
- bool operator()(const ScoredDocumentHit& lhs,
- const ScoredDocumentHit& rhs) const {
+ bool operator()(const ScoredDataType& lhs,
+ const ScoredDataType& rhs) const {
// STL comparator requirement: equal MUST return false.
// If writing `return is_ascending_ == !(lhs < rhs)`:
// - When lhs == rhs, !(lhs < rhs) is true
@@ -68,11 +84,44 @@ class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker {
Comparator comparator_;
// Use priority queue to get top K hits in O(KlgN) time.
- std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>,
- Comparator>
- scored_document_hits_pq_;
+ std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
+ scored_data_pq_;
+
+ Converter converter_;
};
+template <typename ScoredDataType, typename Converter>
+PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::
+ PriorityQueueScoredDocumentHitsRanker(
+ std::vector<ScoredDataType>&& scored_data_vec, bool is_descending)
+ : comparator_(/*is_ascending=*/!is_descending),
+ scored_data_pq_(comparator_, std::move(scored_data_vec)) {}
+
+template <typename ScoredDataType, typename Converter>
+JoinedScoredDocumentHit
+PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::PopNext() {
+ ScoredDataType next_scored_data = scored_data_pq_.top();
+ scored_data_pq_.pop();
+ return converter_(std::move(next_scored_data));
+}
+
+template <typename ScoredDataType, typename Converter>
+void PriorityQueueScoredDocumentHitsRanker<
+ ScoredDataType, Converter>::TruncateHitsTo(int new_size) {
+ if (new_size < 0 || scored_data_pq_.size() <= new_size) {
+ return;
+ }
+
+ // Copying the best new_size results.
+ std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator>
+ new_pq(comparator_);
+ for (int i = 0; i < new_size; ++i) {
+ new_pq.push(scored_data_pq_.top());
+ scored_data_pq_.pop();
+ }
+ scored_data_pq_ = std::move(new_pq);
+}
+
} // namespace lib
} // namespace icing