Sync from upstream.

Descriptions: ================ Normalize Tokens by Token type when retrieving snippets ================ Rename max_window_bytes to max_window_utf32_length, Delete the max_tokens_per_doc field in IcingSearchEngineOptions. ================ Handle suggestion namespace ownership. ================ Fix OkStatus() is not a valid argument to StatusOr in Main_index.RetrieveMoreHits. ================ Allow advancing when current indices are negative in CharacterIterator ================ Adds support for verbatim tokenization and indexing in IcingLib ================ Renames TokenizerIterator Reset functions ================ Add term_match_type to SuggestionSpec proto ================ Unify the C++ proto enum style ================ Allow zero property weights in IcingLib ================ Bug: 204333391 Bug: 152934343 Bug: 205209589 Bug: 206147728 Bug: 209993976 Change-Id: Id94a377fd37c5eb7ebc3d7547cf8ff0ad4152620
author: Dan Saadati <dsaadati@google.com> 2021-12-28 13:43:34 -0800
committer: Dan Saadati <dsaadati@google.com> 2021-12-28 15:12:47 -0800
commit: beff93fe1f5165aeeb871d9711963aa1846299ae (patch)
tree: 247c43adc7dc69fb846fd92c2da96654fbe818ea /icing/index/lite
parent: da1b8986e7c873efa45529b8adc4a32490eb9c3c (diff)
download: icing-beff93fe1f5165aeeb871d9711963aa1846299ae.tar.gz
3 files changed, 23 insertions, 6 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 08df4fc..f215d63 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -77,7 +77,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
   ICING_ASSIGN_OR_RETURN(uint32_t term_id,
                          term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   lite_index_->AppendHits(term_id, section_restrict_mask_,
-                          /*only_from_prefix_sections=*/false, &cached_hits_);
+                          /*only_from_prefix_sections=*/false,
+                          /*namespace_checker=*/nullptr, &cached_hits_);
   cached_hits_idx_ = 0;
   return libtextclassifier3::Status::OK;
 }
@@ -100,7 +101,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
         term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
     lite_index_->AppendHits(term_id, section_restrict_mask_,
                             /*only_from_prefix_sections=*/!exact_match,
-                            &cached_hits_);
+                            /*namespace_checker=*/nullptr, &cached_hits_);
     ++terms_matched;
   }
   if (terms_matched > 1) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 9e4ac28..e7a8cb3 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -336,6 +336,7 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
 
 int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                           bool only_from_prefix_sections,
+                          const NamespaceChecker* namespace_checker,
                           std::vector<DocHitInfo>* hits_out) {
   int count = 0;
   DocumentId last_document_id = kInvalidDocumentId;
@@ -355,11 +356,18 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
     }
     DocumentId document_id = hit.document_id();
     if (document_id != last_document_id) {
+      last_document_id = document_id;
+      // Check does current document belongs to the given namespaces.
+      if (namespace_checker != nullptr &&
+          !namespace_checker->BelongsToTargetNamespaces(document_id)) {
+        // The document is removed or expired or not belongs to target
+        // namespaces.
+        continue;
+      }
       ++count;
       if (hits_out != nullptr) {
         hits_out->push_back(DocHitInfo(document_id));
       }
-      last_document_id = document_id;
     }
     if (hits_out != nullptr) {
       hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
@@ -368,9 +376,10 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
   return count;
 }
 
-int LiteIndex::CountHits(uint32_t term_id) {
+libtextclassifier3::StatusOr<int> LiteIndex::CountHits(
+    uint32_t term_id, const NamespaceChecker* namespace_checker) {
   return AppendHits(term_id, kSectionIdMaskAll,
-                    /*only_from_prefix_sections=*/false,
+                    /*only_from_prefix_sections=*/false, namespace_checker,
                     /*hits_out=*/nullptr);
 }
 
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index b134aba..890980c 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -41,6 +41,7 @@
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/bit-util.h"
 #include "icing/util/crc32.h"
@@ -140,13 +141,19 @@ class LiteIndex {
   // skipping hits in non-prefix sections if only_from_prefix_sections is true,
   // to hits_out. If hits_out is nullptr, no hits will be added.
   //
+  // Only those hits which belongs to the given namespaces will be counted and
+  // appended. A nullptr namespace checker  will disable this check.
+  //
   // Returns the number of hits that would be added to hits_out.
   int AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                  bool only_from_prefix_sections,
+                 const NamespaceChecker* namespace_checker,
                  std::vector<DocHitInfo>* hits_out);
 
   // Returns the hit count of the term.
-  int CountHits(uint32_t term_id);
+  // Only those hits which belongs to the given namespaces will be counted.
+  libtextclassifier3::StatusOr<int> CountHits(
+      uint32_t term_id, const NamespaceChecker* namespace_checker);
 
   // Check if buffer has reached its capacity.
   bool is_full() const;
author	Dan Saadati <dsaadati@google.com>	2021-12-28 13:43:34 -0800
committer	Dan Saadati <dsaadati@google.com>	2021-12-28 15:12:47 -0800
commit	beff93fe1f5165aeeb871d9711963aa1846299ae (patch)
tree	247c43adc7dc69fb846fd92c2da96654fbe818ea /icing/index/lite
parent	da1b8986e7c873efa45529b8adc4a32490eb9c3c (diff)
download	icing-beff93fe1f5165aeeb871d9711963aa1846299ae.tar.gz