diff options
Diffstat (limited to 'icing/index/main/doc-hit-info-iterator-term-main.h')
-rw-r--r-- | icing/index/main/doc-hit-info-iterator-term-main.h | 37 |
1 files changed, 26 insertions, 11 deletions
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h index 08a385c..1987e12 100644 --- a/icing/index/main/doc-hit-info-iterator-term-main.h +++ b/icing/index/main/doc-hit-info-iterator-term-main.h @@ -17,10 +17,14 @@ #include <cstdint> #include <memory> +#include <optional> +#include <string> +#include <utility> #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/index/hit/doc-hit-info.h" +#include "icing/index/hit/hit.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/main/main-index.h" #include "icing/index/main/posting-list-hit-accessor.h" @@ -31,6 +35,19 @@ namespace lib { class DocHitInfoIteratorTermMain : public DocHitInfoIterator { public: + struct DocHitInfoAndTermFrequencyArray { + DocHitInfo doc_hit_info; + std::optional<Hit::TermFrequencyArray> term_frequency_array; + + explicit DocHitInfoAndTermFrequencyArray() = default; + + explicit DocHitInfoAndTermFrequencyArray( + DocHitInfo doc_hit_info_in, + std::optional<Hit::TermFrequencyArray> term_frequency_array_in) + : doc_hit_info(std::move(doc_hit_info_in)), + term_frequency_array(std::move(term_frequency_array_in)) {} + }; + explicit DocHitInfoIteratorTermMain(MainIndex* main_index, const std::string& term, int term_start_index, @@ -74,8 +91,9 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator { while (section_mask_copy) { SectionId section_id = __builtin_ctzll(section_mask_copy); if (need_hit_term_frequency_) { - section_term_frequencies.at(section_id) = cached_hit_term_frequency_.at( - cached_doc_hit_infos_idx_)[section_id]; + section_term_frequencies.at(section_id) = + (*cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_) + .term_frequency_array)[section_id]; } section_mask_copy &= ~(UINT64_C(1) << section_id); } @@ -106,12 +124,13 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator { std::unique_ptr<PostingListHitAccessor> posting_list_accessor_; MainIndex* main_index_; - // Stores hits retrieved from the index. This may only be a subset of the hits - // that are present in the index. Current value pointed to by the Iterator is - // tracked by cached_doc_hit_infos_idx_. - std::vector<DocHitInfo> cached_doc_hit_infos_; - std::vector<Hit::TermFrequencyArray> cached_hit_term_frequency_; + // Stores hits and optional term frequency arrays retrieved from the index. + // This may only be a subset of the hits that are present in the index. + // Current value pointed to by the Iterator is tracked by + // cached_doc_hit_infos_idx_. + std::vector<DocHitInfoAndTermFrequencyArray> cached_doc_hit_infos_; int cached_doc_hit_infos_idx_; + int num_advance_calls_; int num_blocks_inspected_; bool all_pages_consumed_; @@ -168,10 +187,6 @@ class DocHitInfoIteratorTermMainPrefix : public DocHitInfoIteratorTermMain { libtextclassifier3::Status RetrieveMoreHits() override; private: - // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and - // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be - // merged. - void SortAndDedupeDocumentIds(); // Whether or not posting_list_accessor_ holds a posting list chain for // 'term' or for a term for which 'term' is a prefix. This is necessary to // determine whether to return hits that are not from a prefix section (hits |