aboutsummaryrefslogtreecommitdiff
path: root/icing/index/main/doc-hit-info-iterator-term-main.cc
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2023-08-30 08:37:02 -0700
committerTim Barron <tjbarron@google.com>2023-08-30 08:40:39 -0700
commit3cb375c9f19cd6c7ef1f8a90e2cdcb66b80b407e (patch)
tree62f2a3f1d688b7ba688ed591d6322bca126663fd /icing/index/main/doc-hit-info-iterator-term-main.cc
parentec9c4f473d9b5b6d316405f5057eeeddbaa27ff5 (diff)
parent8c71e61d02944611249c892236e67c6acace8a2d (diff)
downloadicing-3cb375c9f19cd6c7ef1f8a90e2cdcb66b80b407e.tar.gz
Merge remote-tracking branch 'aosp/upstream-master' into androidx-main
* aosp/upstream-master: Update Icing from upstream. Descriptions: ======================================================================== Fix term frequency bug ======================================================================== Delete dead JNI functions. ======================================================================== Switch Icing JNI implementation to use RegisterNatives ======================================================================== Avoid unnecessary GetObjectClass and GetFieldID calls. ======================================================================== Bug: 296938196 Change-Id: Idaaf78068bcfa8d9a34efd9b892c20049646874f
Diffstat (limited to 'icing/index/main/doc-hit-info-iterator-term-main.cc')
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc86
1 files changed, 49 insertions, 37 deletions
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 8f0d3f5..5cf6a4c 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -14,16 +14,20 @@
#include "icing/index/main/doc-hit-info-iterator-term-main.h"
-#include <cstdint>
#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
-#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/main/posting-list-hit-accessor.h"
-#include "icing/legacy/core/icing-string-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/util/logging.h"
@@ -44,6 +48,30 @@ std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
return mask;
}
+void MergeNewHitIntoCachedDocHitInfos(
+ const Hit& hit, bool need_hit_term_frequency,
+ std::vector<DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray>&
+ cached_doc_hit_infos_out) {
+ if (cached_doc_hit_infos_out.empty() ||
+ hit.document_id() !=
+ cached_doc_hit_infos_out.back().doc_hit_info.document_id()) {
+ std::optional<Hit::TermFrequencyArray> tf_arr;
+ if (need_hit_term_frequency) {
+ tf_arr = std::make_optional<Hit::TermFrequencyArray>();
+ }
+
+ cached_doc_hit_infos_out.push_back(
+ DocHitInfoIteratorTermMain::DocHitInfoAndTermFrequencyArray(
+ DocHitInfo(hit.document_id()), std::move(tf_arr)));
+ }
+
+ cached_doc_hit_infos_out.back().doc_hit_info.UpdateSection(hit.section_id());
+ if (need_hit_term_frequency) {
+ (*cached_doc_hit_infos_out.back().term_frequency_array)[hit.section_id()] =
+ hit.term_frequency();
+ }
+}
+
} // namespace
libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
@@ -76,7 +104,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
- doc_hit_info_ = cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_);
+ doc_hit_info_ =
+ cached_doc_hit_infos_.at(cached_doc_hit_infos_idx_).doc_hit_info;
hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
@@ -90,16 +119,16 @@ DocHitInfoIteratorTermMain::TrimRightMostNode() && {
}
libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
if (posting_list_accessor_ == nullptr) {
ICING_ASSIGN_OR_RETURN(posting_list_accessor_,
@@ -112,8 +141,7 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
all_pages_consumed_ = true;
}
++num_blocks_inspected_;
- cached_doc_hit_infos_.reserve(hits.size() + 1);
- cached_hit_term_frequency_.reserve(hits.size() + 1);
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -123,13 +151,9 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
if (hit.is_prefix_hit()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- cached_hit_term_frequency_.back()[hit.section_id()] = hit.term_frequency();
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}
@@ -141,16 +165,16 @@ std::string DocHitInfoIteratorTermMainExact::ToString() const {
libtextclassifier3::Status
DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
- DocHitInfo last_doc_hit_info;
+ DocHitInfoAndTermFrequencyArray last_doc_hit_info;
if (!cached_doc_hit_infos_.empty()) {
- last_doc_hit_info = cached_doc_hit_infos_.back();
+ last_doc_hit_info = std::move(cached_doc_hit_infos_.back());
}
cached_doc_hit_infos_idx_ = 0;
cached_doc_hit_infos_.clear();
- if (last_doc_hit_info.document_id() != kInvalidDocumentId) {
+ if (last_doc_hit_info.doc_hit_info.document_id() != kInvalidDocumentId) {
// Carry over the last hit. It might need to be merged with the first hit of
// of the next posting list in the chain.
- cached_doc_hit_infos_.push_back(last_doc_hit_info);
+ cached_doc_hit_infos_.push_back(std::move(last_doc_hit_info));
}
++num_blocks_inspected_;
@@ -165,10 +189,7 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
if (hits.empty()) {
all_pages_consumed_ = true;
}
- cached_doc_hit_infos_.reserve(hits.size());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.reserve(hits.size());
- }
+ cached_doc_hit_infos_.reserve(cached_doc_hit_infos_.size() + hits.size());
for (const Hit& hit : hits) {
// Check sections.
if (((UINT64_C(1) << hit.section_id()) & section_restrict_mask_) == 0) {
@@ -178,18 +199,9 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
if (!exact_ && !hit.is_in_prefix_section()) {
continue;
}
- if (cached_doc_hit_infos_.empty() ||
- hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
- cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.push_back(Hit::TermFrequencyArray());
- }
- }
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id());
- if (need_hit_term_frequency_) {
- cached_hit_term_frequency_.back()[hit.section_id()] =
- hit.term_frequency();
- }
+
+ MergeNewHitIntoCachedDocHitInfos(hit, need_hit_term_frequency_,
+ cached_doc_hit_infos_);
}
return libtextclassifier3::Status::OK;
}