aboutsummaryrefslogtreecommitdiff
path: root/icing/index/iterator/doc-hit-info-iterator-or.cc
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2019-12-20 15:11:45 -0800
committerCassie Wang <cassiewang@google.com>2019-12-20 16:18:05 -0800
commit128c9db88925c8425f2ad81e1d8985461d7ba21a (patch)
treef97ee47cc99d2c162eb30a5e051c606823dfd1ec /icing/index/iterator/doc-hit-info-iterator-or.cc
parent1897505cb34f3d53e848da13fafe7691c17417ea (diff)
downloadicing-128c9db88925c8425f2ad81e1d8985461d7ba21a.tar.gz
Port over Icing c++ code from upstream
Change-Id: Ia3981fed7e0e70589efc027d4123f306cdfbe990
Diffstat (limited to 'icing/index/iterator/doc-hit-info-iterator-or.cc')
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.cc239
1 files changed, 239 insertions, 0 deletions
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
new file mode 100644
index 0000000..b4dc86a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -0,0 +1,239 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// When combining Or iterators, n-ary operator has better performance when
+// number of operands > 2 according to benchmark cl/243321264
+// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
+// changes.
+constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
+
+} // namespace
+
+std::unique_ptr<DocHitInfoIterator> CreateOrIterator(
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
+ if (iterators.size() == 1) {
+ return std::move(iterators.at(0));
+ }
+
+ std::unique_ptr<DocHitInfoIterator> iterator;
+ if (iterators.size() == kBinaryOrIteratorPerformanceThreshold) {
+ iterator = std::make_unique<DocHitInfoIteratorOr>(std::move(iterators[0]),
+ std::move(iterators[1]));
+ } else {
+ // If the vector is too small, the OrNary iterator can handle it and return
+ // an error on the Advance call
+ iterator = std::make_unique<DocHitInfoIteratorOrNary>(std::move(iterators));
+ }
+
+ return iterator;
+}
+
+DocHitInfoIteratorOr::DocHitInfoIteratorOr(
+ std::unique_ptr<DocHitInfoIterator> left_it,
+ std::unique_ptr<DocHitInfoIterator> right_it)
+ : left_(std::move(left_it)), right_(std::move(right_it)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
+ // Cache the document_id of the left iterator for comparison to the right.
+ DocumentId orig_left_document_id = left_document_id_;
+
+ // Advance the left iterator if necessary.
+ if (left_document_id_ != kInvalidDocumentId) {
+ if (right_document_id_ == kInvalidDocumentId ||
+ left_document_id_ >= right_document_id_) {
+ if (left_->Advance().ok()) {
+ left_document_id_ = left_->doc_hit_info().document_id();
+ } else {
+ left_document_id_ = kInvalidDocumentId;
+ }
+ }
+ }
+
+ // Advance the right iterator if necessary, by comparing to the original
+ // left document_id (not the one which may have been updated).
+ if (right_document_id_ != kInvalidDocumentId) {
+ if (orig_left_document_id == kInvalidDocumentId ||
+ right_document_id_ >= orig_left_document_id) {
+ if (right_->Advance().ok()) {
+ right_document_id_ = right_->doc_hit_info().document_id();
+ } else {
+ right_document_id_ = kInvalidDocumentId;
+ }
+ }
+ }
+
+ // Done, we either found a match or we reached the end of potential
+ // DocHitInfos
+ if (left_document_id_ == kInvalidDocumentId &&
+ right_document_id_ == kInvalidDocumentId) {
+ // Reached the end, set these to invalid values and return
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError(
+ "No more DocHitInfos in iterator");
+ }
+
+ // Now chose the best one that is not invalid.
+ DocHitInfoIterator* chosen;
+ if (left_document_id_ == kInvalidDocumentId) {
+ chosen = right_.get();
+ } else if (right_document_id_ == kInvalidDocumentId) {
+ chosen = left_.get();
+ } else if (left_document_id_ < right_document_id_) {
+ chosen = right_.get();
+ } else {
+ chosen = left_.get();
+ }
+
+ doc_hit_info_ = chosen->doc_hit_info();
+ hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask();
+
+ // If equal, combine.
+ if (left_document_id_ == right_document_id_) {
+ doc_hit_info_.MergeSectionsFrom(right_->doc_hit_info());
+ hit_intersect_section_ids_mask_ &= right_->hit_intersect_section_ids_mask();
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorOr::GetNumBlocksInspected() const {
+ return left_->GetNumBlocksInspected() + right_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorOr::GetNumLeafAdvanceCalls() const {
+ return left_->GetNumLeafAdvanceCalls() + right_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorOr::ToString() const {
+ return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
+ ")");
+}
+
+DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
+ std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
+ : iterators_(std::move(iterators)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
+ if (iterators_.size() < 2) {
+ return absl_ports::InvalidArgumentError(
+ "Not enough iterators to OR together");
+ }
+
+ if (doc_hit_info_.document_id() == 0) {
+ // 0 is the smallest (last) DocumentId, can't advance further. Reset to
+ // invalid values and return directly
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError(
+ "No more DocHitInfos in iterator");
+ }
+ // The maximum possible doc id for the current Advance() call.
+ const DocumentId next_document_id_max = doc_hit_info_.document_id() - 1;
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ DocumentId next_document_id = kInvalidDocumentId;
+ // Go through the iterators and try to find the maximum document_id that is
+ // equal to or smaller than next_document_id_max
+ for (const auto& iterator : iterators_) {
+ if (iterator->doc_hit_info().document_id() > next_document_id_max) {
+ // Advance the iterator until its value is equal to or smaller than
+ // next_document_id_max
+ if (ABSL_PREDICT_FALSE(
+ !AdvanceTo(iterator.get(), next_document_id_max).ok())) {
+ continue;
+ }
+ }
+ // Now iterator->get_document_id() <= next_document_id_max
+ if (next_document_id == kInvalidDocumentId) {
+ next_document_id = iterator->doc_hit_info().document_id();
+ } else {
+ next_document_id =
+ std::max(next_document_id, iterator->doc_hit_info().document_id());
+ }
+ }
+ if (next_document_id == kInvalidDocumentId) {
+ // None of the iterators had a next document_id, reset to invalid values and
+ // return
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError(
+ "No more DocHitInfos in iterator");
+ }
+
+ // Found the next hit DocumentId, now calculate the section info.
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ for (const auto& iterator : iterators_) {
+ if (iterator->doc_hit_info().document_id() == next_document_id) {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ doc_hit_info_ = iterator->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ iterator->hit_intersect_section_ids_mask();
+ } else {
+ doc_hit_info_.MergeSectionsFrom(iterator->doc_hit_info());
+ hit_intersect_section_ids_mask_ &=
+ iterator->hit_intersect_section_ids_mask();
+ }
+ }
+ }
+ return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorOrNary::GetNumBlocksInspected() const {
+ int32_t blockCount = 0;
+ for (const auto& iter : iterators_) {
+ blockCount += iter->GetNumBlocksInspected();
+ }
+ return blockCount;
+}
+
+int32_t DocHitInfoIteratorOrNary::GetNumLeafAdvanceCalls() const {
+ int32_t leafCount = 0;
+ for (const auto& iter : iterators_) {
+ leafCount += iter->GetNumLeafAdvanceCalls();
+ }
+ return leafCount;
+}
+
+std::string DocHitInfoIteratorOrNary::ToString() const {
+ std::string ret = "(";
+
+ for (int i = 0; i < iterators_.size(); ++i) {
+ absl_ports::StrAppend(&ret, iterators_.at(i)->ToString());
+ if (i != iterators_.size() - 1) {
+ // Not the last element in vector
+ absl_ports::StrAppend(&ret, " OR ");
+ }
+ }
+
+ absl_ports::StrAppend(&ret, ")");
+ return ret;
+}
+
+} // namespace lib
+} // namespace icing