diff options
author | Tim Barron <tjbarron@google.com> | 2022-12-12 18:03:05 -0800 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2022-12-12 18:03:05 -0800 |
commit | 8ddc32ad433ea147de80dcfac2afe58962360f18 (patch) | |
tree | 50c30cb98396499bf1d6caf33b383f7f4bbc7e58 /icing/scoring/priority-queue-scored-document-hits-ranker.h | |
parent | 2658f90984737e5bf6c76d82024103dccd4d51c6 (diff) | |
download | icing-8ddc32ad433ea147de80dcfac2afe58962360f18.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
Add ScoringSpec into JoinSpec. Rename joined_document to child_document.
======================================================================
Create JoinedScoredDocumentHit class and refactor ScoredDocumentHitsRanker.
======================================================================
Implement initial Join workflow
======================================================================
Implement the Lexer for Icing Advanced Query Language
======================================================================
Create struct Options for PersistentHashMap
======================================================================
Premapping FileBackedVector
======================================================================
Create class PersistentHashMapKeyMapper
======================================================================
Add integer sections into TokenizedDocument and rename string sections
======================================================================
Create NumericIndex interface and DocHitInfoIteratorNumeric
======================================================================
Implement DummyNumericIndex and unit test
======================================================================
Change PostingListAccessor::Finalize to rvalue member function
======================================================================
Define the Abstract Syntax Tree for Icing's list_filter parser.
======================================================================
Refactor query processing and score
======================================================================
Refactor IcingSearchEngine for AppSearch Dynamite Module 0p APIs
======================================================================
Implement the Lexer for Icing Advanced Scoring Language
======================================================================
Add a common interface for IcingSearchEngine and dynamite client
======================================================================
Implement a subset of the query grammar.
======================================================================
Refactor index processor
======================================================================
Add integer index into IcingSearchEngine and IndexProcessor
======================================================================
Implement the parser for Icing Advanced Scoring Language
======================================================================
Implement IntegerIndexData and PostingListUsedIntegerIndexDataSerializer
======================================================================
Add PostingListAccessor abstract class for common components and methods
======================================================================
Implement PostingListIntegerIndexDataAccessor
======================================================================
Create PostingListIntegerIndexDataAccessorTest
======================================================================
Fix Icing Segmentation tests for word connectors that changed in ICU 72.
======================================================================
Modify the Advanced Query grammar to allow functions to accept expressions.
======================================================================
Implement QueryVisitor.
======================================================================
Enable the Advanced Query Parser to handle member functions
======================================================================
Refactor the Scorer class to support the Advanced Scoring Language
======================================================================
Integrate advanced query parser with the query processor.
======================================================================
Implement support for JoinSpec in Icing.
======================================================================
Implement the Advanced Scoring Language for basic functions and operators
======================================================================
Bug: 208654892
Bug: 249829533
Bug: 256022027
Bug: 261474063
Bug: 240333360
Bug: 193919210
Change-Id: I5f5bdc6249282ecc4b014b4fbdf8e2d1f8b20c19
Diffstat (limited to 'icing/scoring/priority-queue-scored-document-hits-ranker.h')
-rw-r--r-- | icing/scoring/priority-queue-scored-document-hits-ranker.h | 69 |
1 files changed, 59 insertions, 10 deletions
diff --git a/icing/scoring/priority-queue-scored-document-hits-ranker.h b/icing/scoring/priority-queue-scored-document-hits-ranker.h index 3ef2ae5..0798d7d 100644 --- a/icing/scoring/priority-queue-scored-document-hits-ranker.h +++ b/icing/scoring/priority-queue-scored-document-hits-ranker.h @@ -26,21 +26,37 @@ namespace lib { // ScoredDocumentHitsRanker interface implementation, based on // std::priority_queue. We can get next top hit in O(lgN) time. +template <typename ScoredDataType, + typename Converter = typename ScoredDataType::Converter> class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker { public: explicit PriorityQueueScoredDocumentHitsRanker( - std::vector<ScoredDocumentHit>&& scored_document_hits, - bool is_descending = true); + std::vector<ScoredDataType>&& scored_data_vec, bool is_descending = true); ~PriorityQueueScoredDocumentHitsRanker() override = default; - ScoredDocumentHit PopNext() override; + // Note: ranker may store ScoredDocumentHit or JoinedScoredDocumentHit, so we + // have template for scored_data_pq_. + // - JoinedScoredDocumentHit is a superset of ScoredDocumentHit, so we unify + // the return type of PopNext to use the superset type + // JoinedScoredDocumentHit in order to make it simple, and rankers storing + // ScoredDocumentHit should convert it to JoinedScoredDocumentHit before + // returning. It makes the implementation simpler, especially for + // ResultRetriever, which now only needs to deal with one single return + // format. + // - JoinedScoredDocumentHit has ~2x size of ScoredDocumentHit. Since we cache + // ranker (which contains a priority queue of data) in ResultState, if we + // store the scored hits in JoinedScoredDocumentHit format directly, then it + // doubles the memory usage. Therefore, we still keep the flexibility to + // store ScoredDocumentHit or any other types of data, but require PopNext + // to convert it to JoinedScoredDocumentHit. + JoinedScoredDocumentHit PopNext() override; void TruncateHitsTo(int new_size) override; - int size() const override { return scored_document_hits_pq_.size(); } + int size() const override { return scored_data_pq_.size(); } - bool empty() const override { return scored_document_hits_pq_.empty(); } + bool empty() const override { return scored_data_pq_.empty(); } private: // Comparator for std::priority_queue. Since std::priority is a max heap @@ -49,8 +65,8 @@ class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker { public: explicit Comparator(bool is_ascending) : is_ascending_(is_ascending) {} - bool operator()(const ScoredDocumentHit& lhs, - const ScoredDocumentHit& rhs) const { + bool operator()(const ScoredDataType& lhs, + const ScoredDataType& rhs) const { // STL comparator requirement: equal MUST return false. // If writing `return is_ascending_ == !(lhs < rhs)`: // - When lhs == rhs, !(lhs < rhs) is true @@ -68,11 +84,44 @@ class PriorityQueueScoredDocumentHitsRanker : public ScoredDocumentHitsRanker { Comparator comparator_; // Use priority queue to get top K hits in O(KlgN) time. - std::priority_queue<ScoredDocumentHit, std::vector<ScoredDocumentHit>, - Comparator> - scored_document_hits_pq_; + std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator> + scored_data_pq_; + + Converter converter_; }; +template <typename ScoredDataType, typename Converter> +PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>:: + PriorityQueueScoredDocumentHitsRanker( + std::vector<ScoredDataType>&& scored_data_vec, bool is_descending) + : comparator_(/*is_ascending=*/!is_descending), + scored_data_pq_(comparator_, std::move(scored_data_vec)) {} + +template <typename ScoredDataType, typename Converter> +JoinedScoredDocumentHit +PriorityQueueScoredDocumentHitsRanker<ScoredDataType, Converter>::PopNext() { + ScoredDataType next_scored_data = scored_data_pq_.top(); + scored_data_pq_.pop(); + return converter_(std::move(next_scored_data)); +} + +template <typename ScoredDataType, typename Converter> +void PriorityQueueScoredDocumentHitsRanker< + ScoredDataType, Converter>::TruncateHitsTo(int new_size) { + if (new_size < 0 || scored_data_pq_.size() <= new_size) { + return; + } + + // Copying the best new_size results. + std::priority_queue<ScoredDataType, std::vector<ScoredDataType>, Comparator> + new_pq(comparator_); + for (int i = 0; i < new_size; ++i) { + new_pq.push(scored_data_pq_.top()); + scored_data_pq_.pop(); + } + scored_data_pq_ = std::move(new_pq); +} + } // namespace lib } // namespace icing |