diff options
Diffstat (limited to 'icing/index/hit/doc-hit-info.h')
-rw-r--r-- | icing/index/hit/doc-hit-info.h | 53 |
1 files changed, 26 insertions, 27 deletions
diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h index 0be87d6..2770de2 100644 --- a/icing/index/hit/doc-hit-info.h +++ b/icing/index/hit/doc-hit-info.h @@ -26,19 +26,15 @@ namespace icing { namespace lib { // DocHitInfo provides a collapsed view of all hits for a specific doc. -// Hits contain a document_id, section_id and a term frequency. The -// information in multiple hits is collapse into a DocHitInfo by providing a -// SectionIdMask of all sections that contained a hit for this term as well as -// the highest term frequency of any hit for each section. +// Hits contain a document_id and section_id. The information in multiple hits +// is collapse into a DocHitInfo by providing a SectionIdMask of all sections +// that contained a hit for this term. class DocHitInfo { public: explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId, SectionIdMask hit_section_ids_mask = kSectionIdMaskNone) : document_id_(document_id_in), - hit_section_ids_mask_(hit_section_ids_mask) { - memset(hit_term_frequency_, Hit::kNoTermFrequency, - sizeof(hit_term_frequency_)); - } + hit_section_ids_mask_(hit_section_ids_mask) {} DocumentId document_id() const { return document_id_; } @@ -50,41 +46,44 @@ class DocHitInfo { hit_section_ids_mask_ = section_id_mask; } - Hit::TermFrequency hit_term_frequency(SectionId section_id) const { - return hit_term_frequency_[section_id]; + bool operator<(const DocHitInfo& other) const { + if (document_id() != other.document_id()) { + // Sort by document_id descending. This mirrors how the individual hits + // that are collapsed into this DocHitInfo would sort with other hits - + // document_ids are inverted when encoded in hits. Hits are encoded this + // way because they are appended to posting lists and the most recent + // value appended to a posting list must have the smallest encoded value + // of any hit on the posting list. + return document_id() > other.document_id(); + } + return hit_section_ids_mask() < other.hit_section_ids_mask(); } - - bool operator<(const DocHitInfo& other) const; bool operator==(const DocHitInfo& other) const { - return (*this < other) == (other < *this); + return document_id_ == other.document_id_ && + hit_section_ids_mask_ == other.hit_section_ids_mask_; } - // Updates the hit_section_ids_mask and hit_term_frequency for the - // section, if necessary. - void UpdateSection(SectionId section_id, - Hit::TermFrequency hit_term_frequency); + // Updates the hit_section_ids_mask for the section, if necessary. + void UpdateSection(SectionId section_id) { + hit_section_ids_mask_ |= (UINT64_C(1) << section_id); + } - // Merges the sections of other into this. The hit_section_ids_masks are or'd; - // if this.hit_term_frequency_[sectionId] has already been defined, - // other.hit_term_frequency_[sectionId] value is ignored. + // Merges the sections of other into this. The hit_section_ids_masks are or'd. // // This does not affect the DocumentId of this or other. If callers care about // only merging sections for DocHitInfos with the same DocumentId, callers // should check this themselves. - void MergeSectionsFrom(const DocHitInfo& other); + void MergeSectionsFrom(const SectionIdMask& other_hit_section_ids_mask) { + hit_section_ids_mask_ |= other_hit_section_ids_mask; + } private: DocumentId document_id_; SectionIdMask hit_section_ids_mask_; - Hit::TermFrequency hit_term_frequency_[kMaxSectionId + 1]; } __attribute__((packed)); -static_assert(sizeof(DocHitInfo) == 22, ""); +static_assert(sizeof(DocHitInfo) == 12, ""); // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions. static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan"); -static_assert( - sizeof(Hit::TermFrequency) == 1, - "Change how hit_term_frequency_ is initialized if changing the type " - "of Hit::TermFrequency"); } // namespace lib } // namespace icing |