diff options
author | Dan Saadati <dsaadati@google.com> | 2021-12-28 13:43:34 -0800 |
---|---|---|
committer | Dan Saadati <dsaadati@google.com> | 2021-12-28 15:12:47 -0800 |
commit | beff93fe1f5165aeeb871d9711963aa1846299ae (patch) | |
tree | 247c43adc7dc69fb846fd92c2da96654fbe818ea /icing/index/lite | |
parent | da1b8986e7c873efa45529b8adc4a32490eb9c3c (diff) | |
download | icing-beff93fe1f5165aeeb871d9711963aa1846299ae.tar.gz |
Sync from upstream.
Descriptions:
================
Normalize Tokens by Token type when retrieving snippets
================
Rename max_window_bytes to max_window_utf32_length,
Delete the max_tokens_per_doc field in IcingSearchEngineOptions.
================
Handle suggestion namespace ownership.
================
Fix OkStatus() is not a valid argument to StatusOr in
Main_index.RetrieveMoreHits.
================
Allow advancing when current indices are negative in CharacterIterator
================
Adds support for verbatim tokenization and indexing in IcingLib
================
Renames TokenizerIterator Reset functions
================
Add term_match_type to SuggestionSpec proto
================
Unify the C++ proto enum style
================
Allow zero property weights in IcingLib
================
Bug: 204333391
Bug: 152934343
Bug: 205209589
Bug: 206147728
Bug: 209993976
Change-Id: Id94a377fd37c5eb7ebc3d7547cf8ff0ad4152620
Diffstat (limited to 'icing/index/lite')
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.cc | 5 | ||||
-rw-r--r-- | icing/index/lite/lite-index.cc | 15 | ||||
-rw-r--r-- | icing/index/lite/lite-index.h | 9 |
3 files changed, 23 insertions, 6 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc index 08df4fc..f215d63 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc @@ -77,7 +77,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() { ICING_ASSIGN_OR_RETURN(uint32_t term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); lite_index_->AppendHits(term_id, section_restrict_mask_, - /*only_from_prefix_sections=*/false, &cached_hits_); + /*only_from_prefix_sections=*/false, + /*namespace_checker=*/nullptr, &cached_hits_); cached_hits_idx_ = 0; return libtextclassifier3::Status::OK; } @@ -100,7 +101,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() { term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE)); lite_index_->AppendHits(term_id, section_restrict_mask_, /*only_from_prefix_sections=*/!exact_match, - &cached_hits_); + /*namespace_checker=*/nullptr, &cached_hits_); ++terms_matched; } if (terms_matched > 1) { diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index 9e4ac28..e7a8cb3 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -336,6 +336,7 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId( int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask, bool only_from_prefix_sections, + const NamespaceChecker* namespace_checker, std::vector<DocHitInfo>* hits_out) { int count = 0; DocumentId last_document_id = kInvalidDocumentId; @@ -355,11 +356,18 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask, } DocumentId document_id = hit.document_id(); if (document_id != last_document_id) { + last_document_id = document_id; + // Check does current document belongs to the given namespaces. + if (namespace_checker != nullptr && + !namespace_checker->BelongsToTargetNamespaces(document_id)) { + // The document is removed or expired or not belongs to target + // namespaces. + continue; + } ++count; if (hits_out != nullptr) { hits_out->push_back(DocHitInfo(document_id)); } - last_document_id = document_id; } if (hits_out != nullptr) { hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency()); @@ -368,9 +376,10 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask, return count; } -int LiteIndex::CountHits(uint32_t term_id) { +libtextclassifier3::StatusOr<int> LiteIndex::CountHits( + uint32_t term_id, const NamespaceChecker* namespace_checker) { return AppendHits(term_id, kSectionIdMaskAll, - /*only_from_prefix_sections=*/false, + /*only_from_prefix_sections=*/false, namespace_checker, /*hits_out=*/nullptr); } diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index b134aba..890980c 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -41,6 +41,7 @@ #include "icing/proto/term.pb.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" +#include "icing/store/namespace-checker.h" #include "icing/store/namespace-id.h" #include "icing/util/bit-util.h" #include "icing/util/crc32.h" @@ -140,13 +141,19 @@ class LiteIndex { // skipping hits in non-prefix sections if only_from_prefix_sections is true, // to hits_out. If hits_out is nullptr, no hits will be added. // + // Only those hits which belongs to the given namespaces will be counted and + // appended. A nullptr namespace checker will disable this check. + // // Returns the number of hits that would be added to hits_out. int AppendHits(uint32_t term_id, SectionIdMask section_id_mask, bool only_from_prefix_sections, + const NamespaceChecker* namespace_checker, std::vector<DocHitInfo>* hits_out); // Returns the hit count of the term. - int CountHits(uint32_t term_id); + // Only those hits which belongs to the given namespaces will be counted. + libtextclassifier3::StatusOr<int> CountHits( + uint32_t term_id, const NamespaceChecker* namespace_checker); // Check if buffer has reached its capacity. bool is_full() const; |