aboutsummaryrefslogtreecommitdiff
path: root/icing/scoring/ranker.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/scoring/ranker.cc')
-rw-r--r--icing/scoring/ranker.cc98
1 files changed, 98 insertions, 0 deletions
diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc
index fecee82..117f44c 100644
--- a/icing/scoring/ranker.cc
+++ b/icing/scoring/ranker.cc
@@ -32,6 +32,7 @@ namespace {
// Helper function to wrap the heapify algorithm, it heapifies the target
// subtree node in place.
+// TODO(b/152934343) refactor the heapify function and making it into a class.
void Heapify(
std::vector<ScoredDocumentHit>* scored_document_hits,
int target_subtree_root_index,
@@ -71,6 +72,80 @@ void Heapify(
}
}
+// Heapify the given term vector from top to bottom. Call it after add or
+// replace an element at the front of the vector.
+void HeapifyTermDown(std::vector<TermMetadata>& scored_terms,
+ int target_subtree_root_index) {
+ int heap_size = scored_terms.size();
+ if (target_subtree_root_index >= heap_size) {
+ return;
+ }
+
+ // Initializes subtree root as the current minimum node.
+ int min = target_subtree_root_index;
+ // If we represent a heap in an array/vector, indices of left and right
+ // children can be calculated as such.
+ const int left = target_subtree_root_index * 2 + 1;
+ const int right = target_subtree_root_index * 2 + 2;
+
+ // If left child is smaller than current minimum.
+ if (left < heap_size &&
+ scored_terms.at(left).hit_count < scored_terms.at(min).hit_count) {
+ min = left;
+ }
+
+ // If right child is smaller than current minimum.
+ if (right < heap_size &&
+ scored_terms.at(right).hit_count < scored_terms.at(min).hit_count) {
+ min = right;
+ }
+
+ // If the minimum is not the subtree root, swap and continue heapifying the
+ // lower level subtree.
+ if (min != target_subtree_root_index) {
+ std::swap(scored_terms.at(min),
+ scored_terms.at(target_subtree_root_index));
+ HeapifyTermDown(scored_terms, min);
+ }
+}
+
+// Heapify the given term vector from bottom to top. Call it after add an
+// element at the end of the vector.
+void HeapifyTermUp(std::vector<TermMetadata>& scored_terms,
+ int target_subtree_child_index) {
+ // If we represent a heap in an array/vector, indices of root can be
+ // calculated as such.
+ const int root = (target_subtree_child_index + 1) / 2 - 1;
+
+ // If the current child is smaller than the root, swap and continue heapifying
+ // the upper level subtree
+ if (root >= 0 && scored_terms.at(target_subtree_child_index).hit_count <
+ scored_terms.at(root).hit_count) {
+ std::swap(scored_terms.at(root),
+ scored_terms.at(target_subtree_child_index));
+ HeapifyTermUp(scored_terms, root);
+ }
+}
+
+TermMetadata PopRootTerm(std::vector<TermMetadata>& scored_terms) {
+ if (scored_terms.empty()) {
+ // Return an invalid TermMetadata as a sentinel value.
+ return TermMetadata(/*content_in=*/"", /*hit_count_in=*/-1);
+ }
+
+ // Steps to extract root from heap:
+ // 1. copy out root
+ TermMetadata root = scored_terms.at(0);
+ const size_t last_node_index = scored_terms.size() - 1;
+ // 2. swap root and the last node
+ std::swap(scored_terms.at(0), scored_terms.at(last_node_index));
+ // 3. remove last node
+ scored_terms.pop_back();
+ // 4. heapify root
+ HeapifyTermDown(scored_terms, /*target_subtree_root_index=*/0);
+ return root;
+}
+
// Helper function to extract the root from the heap. The heap structure will be
// maintained.
//
@@ -115,6 +190,19 @@ void BuildHeapInPlace(
}
}
+void PushToTermHeap(TermMetadata term, int number_to_return,
+ std::vector<TermMetadata>& scored_terms_heap) {
+ if (scored_terms_heap.size() < number_to_return) {
+ scored_terms_heap.push_back(std::move(term));
+ // We insert at end, so we should heapify bottom up.
+ HeapifyTermUp(scored_terms_heap, scored_terms_heap.size() - 1);
+ } else if (scored_terms_heap.at(0).hit_count < term.hit_count) {
+ scored_terms_heap.at(0) = std::move(term);
+ // We insert at root, so we should heapify top down.
+ HeapifyTermDown(scored_terms_heap, /*target_subtree_root_index=*/0);
+ }
+}
+
std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
std::vector<ScoredDocumentHit>* scored_document_hits_heap, int num_results,
const ScoredDocumentHitComparator& scored_document_hit_comparator) {
@@ -134,5 +222,15 @@ std::vector<ScoredDocumentHit> PopTopResultsFromHeap(
return scored_document_hit_result;
}
+std::vector<TermMetadata> PopAllTermsFromHeap(
+ std::vector<TermMetadata>& scored_terms_heap) {
+ std::vector<TermMetadata> top_term_result;
+ top_term_result.reserve(scored_terms_heap.size());
+ while (!scored_terms_heap.empty()) {
+ top_term_result.push_back(PopRootTerm(scored_terms_heap));
+ }
+ return top_term_result;
+}
+
} // namespace lib
} // namespace icing