aboutsummaryrefslogtreecommitdiff
path: root/icing/index/main/doc-hit-info-iterator-term-main.h
diff options
context:
space:
mode:
Diffstat (limited to 'icing/index/main/doc-hit-info-iterator-term-main.h')
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h37
1 files changed, 26 insertions, 11 deletions
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index 08a385c..1987e12 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -17,10 +17,14 @@
#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
@@ -31,6 +35,19 @@ namespace lib {
class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
public:
+ struct DocHitInfoAndTermFrequencyArray {
+ DocHitInfo doc_hit_info;
+ std::optional<Hit::TermFrequencyArray> term_frequency_array;
+
+ explicit DocHitInfoAndTermFrequencyArray() = default;
+
+ explicit DocHitInfoAndTermFrequencyArray(
+ DocHitInfo doc_hit_info_in,
+ std::optional<Hit::TermFrequencyArray> term_frequency_array_in)
+ : doc_hit_info(std::move(doc_hit_info_in)),
+ term_frequency_array(std::move(term_frequency_array_in)) {}
+ };
+
explicit DocHitInfoIteratorTermMain(MainIndex* main_index,
const std::string& term,
int term_start_index,
@@ -74,8 +91,9 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
while (section_mask_copy) {
SectionId section_id = __builtin_ctzll(section_mask_copy);
if (need_hit_term_frequency_) {
- section_term_frequencies.at(section_id) = cached_hit_term_frequency_.at(
- cached_doc_hit_infos_idx_)[section_id];
+ section_term_frequencies.at(section_id) =
+ (*cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_)
+ .term_frequency_array)[section_id];
}
section_mask_copy &= ~(UINT64_C(1) << section_id);
}
@@ -106,12 +124,13 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
std::unique_ptr<PostingListHitAccessor> posting_list_accessor_;
MainIndex* main_index_;
- // Stores hits retrieved from the index. This may only be a subset of the hits
- // that are present in the index. Current value pointed to by the Iterator is
- // tracked by cached_doc_hit_infos_idx_.
- std::vector<DocHitInfo> cached_doc_hit_infos_;
- std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_;
+ // Stores hits and optional term frequency arrays retrieved from the index.
+ // This may only be a subset of the hits that are present in the index.
+ // Current value pointed to by the Iterator is tracked by
+ // cached_doc_hit_infos_idx_.
+ std::vector<DocHitInfoAndTermFrequencyArray> cached_doc_hit_infos_;
int cached_doc_hit_infos_idx_;
+
int num_advance_calls_;
int num_blocks_inspected_;
bool all_pages_consumed_;
@@ -168,10 +187,6 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain {
libtextclassifier3::Status RetrieveMoreHits() override;
private:
- // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
- // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
- // merged.
- void SortAndDedupeDocumentIds();
// Whether or not posting_list_accessor_ holds a posting list chain for
// 'term' or for a term for which 'term' is a prefix. This is necessary to
// determine whether to return hits that are not from a prefix section (hits