1 files changed, 80 insertions, 14 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index f215d63..597f5b5 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -14,7 +14,9 @@
 
 #include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
 
+#include <array>
 #include <cstdint>
+#include <numeric>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/absl_ports/canonical_errors.h"
@@ -30,9 +32,9 @@ namespace lib {
 namespace {
 
 std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
-  std::string mask(kMaxSectionId + 1, '0');
+  std::string mask(kTotalNumSections, '0');
   for (SectionId i = kMaxSectionId; i >= 0; --i) {
-    if (section_id_mask & (1U << i)) {
+    if (section_id_mask & (UINT64_C(1) << i)) {
       mask[kMaxSectionId - i] = '1';
     }
   }
@@ -76,9 +78,11 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
   ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->GetTermId(term_));
   ICING_ASSIGN_OR_RETURN(uint32_t term_id,
                          term_id_codec_->EncodeTvi(tvi, TviType::LITE));
-  lite_index_->AppendHits(term_id, section_restrict_mask_,
-                          /*only_from_prefix_sections=*/false,
-                          /*namespace_checker=*/nullptr, &cached_hits_);
+  lite_index_->AppendHits(
+      term_id, section_restrict_mask_,
+      /*only_from_prefix_sections=*/false,
+      /*namespace_checker=*/nullptr, &cached_hits_,
+      need_hit_term_frequency_ ? &cached_hit_term_frequency_ : nullptr);
   cached_hits_idx_ = 0;
   return libtextclassifier3::Status::OK;
 }
@@ -99,9 +103,11 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
     ICING_ASSIGN_OR_RETURN(
         uint32_t term_id,
         term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
-    lite_index_->AppendHits(term_id, section_restrict_mask_,
-                            /*only_from_prefix_sections=*/!exact_match,
-                            /*namespace_checker=*/nullptr, &cached_hits_);
+    lite_index_->AppendHits(
+        term_id, section_restrict_mask_,
+        /*only_from_prefix_sections=*/!exact_match,
+        /*namespace_checker=*/nullptr, &cached_hits_,
+        need_hit_term_frequency_ ? &cached_hit_term_frequency_ : nullptr);
     ++terms_matched;
   }
   if (terms_matched > 1) {
@@ -111,23 +117,83 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
   return libtextclassifier3::Status::OK;
 }
 
-void DocHitInfoIteratorTermLitePrefix::SortAndDedupeDocumentIds() {
+void DocHitInfoIteratorTermLitePrefix::SortDocumentIds() {
   // Re-sort cached document_ids and merge sections.
-  sort(cached_hits_.begin(), cached_hits_.end());
+  if (!need_hit_term_frequency_) {
+    // If we don't need to also sort cached_hit_term_frequency_ along with
+    // cached_hits_, then just simply sort cached_hits_.
+    sort(cached_hits_.begin(), cached_hits_.end());
+  } else {
+    // Sort cached_hit_term_frequency_ along with cached_hits_.
+    std::vector<int> indices(cached_hits_.size());
+    std::iota(indices.begin(), indices.end(), 0);
+    std::sort(indices.begin(), indices.end(), [this](int i, int j) {
+      return cached_hits_[i] < cached_hits_[j];
+    });
+    // Now indices is a map from sorted index to current index. In other words,
+    // the sorted cached_hits_[i] should be the current cached_hits_[indices[i]]
+    // for every valid i.
+    std::vector<bool> done(indices.size());
+    // Apply permutation
+    for (int i = 0; i < indices.size(); ++i) {
+      if (done[i]) {
+        continue;
+      }
+      done[i] = true;
+      int curr = i;
+      int next = indices[i];
+      // Since every finite permutation is formed by disjoint cycles, we can
+      // start with the current element, at index i, and swap the element at
+      // this position with whatever element that *should* be here. Then,
+      // continue to swap the original element, at its updated positions, with
+      // the element that should be occupying that position until the original
+      // element has reached *its* correct position. This completes applying the
+      // single cycle in the permutation.
+      while (next != i) {
+        std::swap(cached_hits_[curr], cached_hits_[next]);
+        std::swap(cached_hit_term_frequency_[curr],
+                  cached_hit_term_frequency_[next]);
+        done[next] = true;
+        curr = next;
+        next = indices[next];
+      }
+    }
+  }
+}
 
+void DocHitInfoIteratorTermLitePrefix::SortAndDedupeDocumentIds() {
+  SortDocumentIds();
   int idx = 0;
   for (int i = 1; i < cached_hits_.size(); ++i) {
-    const DocHitInfo& hit_info = cached_hits_.at(i);
-    DocHitInfo& collapsed_hit_info = cached_hits_.at(idx);
+    const DocHitInfo& hit_info = cached_hits_[i];
+    DocHitInfo& collapsed_hit_info = cached_hits_[idx];
     if (collapsed_hit_info.document_id() == hit_info.document_id()) {
-      collapsed_hit_info.MergeSectionsFrom(hit_info);
+      SectionIdMask curr_mask = hit_info.hit_section_ids_mask();
+      collapsed_hit_info.MergeSectionsFrom(curr_mask);
+      if (need_hit_term_frequency_) {
+        Hit::TermFrequencyArray& collapsed_term_frequency =
+            cached_hit_term_frequency_[idx];
+        while (curr_mask) {
+          SectionId section_id = __builtin_ctzll(curr_mask);
+          collapsed_term_frequency[section_id] =
+              cached_hit_term_frequency_[i][section_id];
+          curr_mask &= ~(UINT64_C(1) << section_id);
+        }
+      }
     } else {
       // New document_id.
-      cached_hits_.at(++idx) = hit_info;
+      ++idx;
+      cached_hits_[idx] = hit_info;
+      if (need_hit_term_frequency_) {
+        cached_hit_term_frequency_[idx] = cached_hit_term_frequency_[i];
+      }
     }
   }
   // idx points to last doc hit info.
   cached_hits_.resize(idx + 1);
+  if (need_hit_term_frequency_) {
+    cached_hit_term_frequency_.resize(idx + 1);
+  }
 }
 
 std::string DocHitInfoIteratorTermLitePrefix::ToString() const {