aboutsummaryrefslogtreecommitdiff
path: root/icing/index/lite
diff options
context:
space:
mode:
authorDan Saadati <dsaadati@google.com>2021-12-28 13:43:34 -0800
committerDan Saadati <dsaadati@google.com>2021-12-28 15:12:47 -0800
commitbeff93fe1f5165aeeb871d9711963aa1846299ae (patch)
tree247c43adc7dc69fb846fd92c2da96654fbe818ea /icing/index/lite
parentda1b8986e7c873efa45529b8adc4a32490eb9c3c (diff)
downloadicing-beff93fe1f5165aeeb871d9711963aa1846299ae.tar.gz
Sync from upstream.
Descriptions: ================ Normalize Tokens by Token type when retrieving snippets ================ Rename max_window_bytes to max_window_utf32_length, Delete the max_tokens_per_doc field in IcingSearchEngineOptions. ================ Handle suggestion namespace ownership. ================ Fix OkStatus() is not a valid argument to StatusOr in Main_index.RetrieveMoreHits. ================ Allow advancing when current indices are negative in CharacterIterator ================ Adds support for verbatim tokenization and indexing in IcingLib ================ Renames TokenizerIterator Reset functions ================ Add term_match_type to SuggestionSpec proto ================ Unify the C++ proto enum style ================ Allow zero property weights in IcingLib ================ Bug: 204333391 Bug: 152934343 Bug: 205209589 Bug: 206147728 Bug: 209993976 Change-Id: Id94a377fd37c5eb7ebc3d7547cf8ff0ad4152620
Diffstat (limited to 'icing/index/lite')
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc5
-rw-r--r--icing/index/lite/lite-index.cc15
-rw-r--r--icing/index/lite/lite-index.h9
3 files changed, 23 insertions, 6 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 08df4fc..f215d63 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -77,7 +77,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
ICING_ASSIGN_OR_RETURN(uint32_t term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
lite_index_->AppendHits(term_id, section_restrict_mask_,
- /*only_from_prefix_sections=*/false, &cached_hits_);
+ /*only_from_prefix_sections=*/false,
+ /*namespace_checker=*/nullptr, &cached_hits_);
cached_hits_idx_ = 0;
return libtextclassifier3::Status::OK;
}
@@ -100,7 +101,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
lite_index_->AppendHits(term_id, section_restrict_mask_,
/*only_from_prefix_sections=*/!exact_match,
- &cached_hits_);
+ /*namespace_checker=*/nullptr, &cached_hits_);
++terms_matched;
}
if (terms_matched > 1) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 9e4ac28..e7a8cb3 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -336,6 +336,7 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
+ const NamespaceChecker* namespace_checker,
std::vector<DocHitInfo>* hits_out) {
int count = 0;
DocumentId last_document_id = kInvalidDocumentId;
@@ -355,11 +356,18 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
}
DocumentId document_id = hit.document_id();
if (document_id != last_document_id) {
+ last_document_id = document_id;
+ // Check does current document belongs to the given namespaces.
+ if (namespace_checker != nullptr &&
+ !namespace_checker->BelongsToTargetNamespaces(document_id)) {
+ // The document is removed or expired or not belongs to target
+ // namespaces.
+ continue;
+ }
++count;
if (hits_out != nullptr) {
hits_out->push_back(DocHitInfo(document_id));
}
- last_document_id = document_id;
}
if (hits_out != nullptr) {
hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
@@ -368,9 +376,10 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
return count;
}
-int LiteIndex::CountHits(uint32_t term_id) {
+libtextclassifier3::StatusOr<int> LiteIndex::CountHits(
+ uint32_t term_id, const NamespaceChecker* namespace_checker) {
return AppendHits(term_id, kSectionIdMaskAll,
- /*only_from_prefix_sections=*/false,
+ /*only_from_prefix_sections=*/false, namespace_checker,
/*hits_out=*/nullptr);
}
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index b134aba..890980c 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -41,6 +41,7 @@
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
+#include "icing/store/namespace-checker.h"
#include "icing/store/namespace-id.h"
#include "icing/util/bit-util.h"
#include "icing/util/crc32.h"
@@ -140,13 +141,19 @@ class LiteIndex {
// skipping hits in non-prefix sections if only_from_prefix_sections is true,
// to hits_out. If hits_out is nullptr, no hits will be added.
//
+ // Only those hits which belongs to the given namespaces will be counted and
+ // appended. A nullptr namespace checker will disable this check.
+ //
// Returns the number of hits that would be added to hits_out.
int AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
bool only_from_prefix_sections,
+ const NamespaceChecker* namespace_checker,
std::vector<DocHitInfo>* hits_out);
// Returns the hit count of the term.
- int CountHits(uint32_t term_id);
+ // Only those hits which belongs to the given namespaces will be counted.
+ libtextclassifier3::StatusOr<int> CountHits(
+ uint32_t term_id, const NamespaceChecker* namespace_checker);
// Check if buffer has reached its capacity.
bool is_full() const;