aboutsummaryrefslogtreecommitdiff
path: root/icing/index
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2021-02-26 08:04:01 -0800
committerCassie Wang <cassiewang@google.com>2021-03-02 15:29:44 -0800
commit85fd8c8521e338d2bab69f5482e3cc2cf312fd4e (patch)
tree929e118124b203997be393e4a1c5f5ee6da2de40 /icing/index
parenta34db390d80f862bfaaa49dea3605c5fec3bca3d (diff)
downloadicing-85fd8c8521e338d2bab69f5482e3cc2cf312fd4e.tar.gz
Sync from upstream.
Descriptions: ========== Add last optimized time to GetOptimizeInfo. ========== Update the implementation of snippeting to return property paths with value indices and remove the values_index field. ========== Create builders for SchemaProto, SchemaTypeConfigProto and PropertyConfigProto. ========== Rename some protos with the rules: - Remove "Native" prefix - Add "Proto" suffix for consistency with other protos ========== Upgrade your minimum iOS version to 11.4. ========== Fix PersistToDisk definitions to ensure that they properly call datasync. This change is meant to address the first part of ptd doc - that certain functions that claim to persist data don't actually explicitly flush. ========== Change function call from has_field() to field() ========== Add IcingStorageInfo. ========== Add IndexStorageStats. ========== Add SchemaStoreStorageStats. ========== Add DocumentStoreStorageStats. ========== Implement OptimizeStats. ========== Remove the max number of results per query limit (1000) and replace it with a more flexible way to limit memory use by the result-state-manager. ========== Add a test case to ensure we don't add UsageStore's checksum in DocumentStore's ComputeChecksum. ========== Account for UsageStore in GetDiskUsage. ========== Ensure that SchemaStore properly handles function calls when the schema isn't set. ========== Remove jlpl_strict_deps feature from package declarations. ========== Qualifies std::string in 3p directories ========== Section restricts should influence the relevance score. ========== Apply fixes upstream that were necessary to sync changes downstream. Also added a METADATA check to prevent any accidental adds of foo.proto.h includes. ========== Remove the 'com.google.protobuf' to 'com.google.android.icing.protobuf' translation in the export_to_aosp script. ========== Include usage store size in GetOptimizeInfo. This helps clients get a better idea of what savings they could get back if they called Optimize. Change-Id: Ia2339c7987267a73c49dadf1ced4a0a8ef001d4c
Diffstat (limited to 'icing/index')
-rw-r--r--icing/index/index-processor.cc2
-rw-r--r--icing/index/index-processor.h2
-rw-r--r--icing/index/index.cc14
-rw-r--r--icing/index/index.h13
-rw-r--r--icing/index/index_test.cc29
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-and.h15
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-filter.h6
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-or.h15
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc3
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h20
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc67
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-test-util.h18
-rw-r--r--icing/index/iterator/doc-hit-info-iterator.h6
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h15
-rw-r--r--icing/index/lite/lite-index.cc40
-rw-r--r--icing/index/lite/lite-index.h9
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.h15
-rw-r--r--icing/index/main/main-index.cc30
-rw-r--r--icing/index/main/main-index.h9
19 files changed, 262 insertions, 66 deletions
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index d2f9d41..09dda41 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -55,7 +55,7 @@ IndexProcessor::Create(const Normalizer* normalizer, Index* index,
libtextclassifier3::Status IndexProcessor::IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_->last_added_document_id() != kInvalidDocumentId &&
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 9fc7c46..6b07c98 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -81,7 +81,7 @@ class IndexProcessor {
// INTERNAL_ERROR if any other errors occur
libtextclassifier3::Status IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
private:
IndexProcessor(const Normalizer* normalizer, Index* index,
diff --git a/icing/index/index.cc b/icing/index/index.cc
index bd41b51..db59ad2 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -164,7 +164,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
icing_filesystem));
return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
std::move(lite_index),
- std::move(main_index)));
+ std::move(main_index), filesystem));
}
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
@@ -277,6 +277,18 @@ Index::FindTermsByPrefix(const std::string& prefix,
std::move(main_term_metadata_list), num_to_return);
}
+IndexStorageInfoProto Index::GetStorageInfo() const {
+ IndexStorageInfoProto storage_info;
+ int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_index_size(directory_size);
+ } else {
+ storage_info.set_index_size(-1);
+ }
+ storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
+ return main_index_->GetStorageInfo(std::move(storage_info));
+}
+
libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
// Step 1: See if this term is already in the lexicon
uint32_t tvi;
diff --git a/icing/index/index.h b/icing/index/index.h
index a4ea719..b7021ca 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,6 +32,7 @@
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -151,6 +152,12 @@ class Index {
return lite_index_size + main_index_size;
}
+ // Calculates the StorageInfo for the Index.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo() const;
+
// Create an iterator to iterate through all doc hit infos in the index that
// match the term. section_id_mask can be set to ignore hits from sections not
// listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
@@ -242,11 +249,12 @@ class Index {
private:
Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
std::unique_ptr<LiteIndex> lite_index,
- std::unique_ptr<MainIndex> main_index)
+ std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
: lite_index_(std::move(lite_index)),
main_index_(std::move(main_index)),
options_(options),
- term_id_codec_(std::move(term_id_codec)) {}
+ term_id_codec_(std::move(term_id_codec)),
+ filesystem_(filesystem) {}
libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
@@ -256,6 +264,7 @@ class Index {
std::unique_ptr<MainIndex> main_index_;
const Options options_;
std::unique_ptr<TermIdCodec> term_id_codec_;
+ const Filesystem* filesystem_;
};
} // namespace lib
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 3479ab1..de4edf8 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,6 +31,7 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -46,6 +47,7 @@ namespace {
using ::testing::ElementsAre;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::IsEmpty;
using ::testing::IsTrue;
@@ -1636,6 +1638,33 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
+TEST_F(IndexTest, IndexStorageInfoProto) {
+ // Add two documents to the lite index and merge them into main.
+ {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+ }
+
+ IndexStorageInfoProto storage_info = index_->GetStorageInfo();
+ EXPECT_THAT(storage_info.index_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
+ // There should be 1 block for the header and 1 block for two posting lists.
+ EXPECT_THAT(storage_info.num_blocks(), Eq(2));
+ EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index faca785..8ceff44 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -47,13 +47,16 @@ class DocHitInfoIteratorAnd : public DocHitInfoIterator {
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- short_->PopulateMatchedTermsStats(matched_terms_stats);
- long_->PopulateMatchedTermsStats(matched_terms_stats);
+ short_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ long_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
@@ -78,13 +81,15 @@ class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < iterators_.size(); ++i) {
- iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index fb60e38..9cee74c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -68,8 +68,10 @@ class DocHitInfoIteratorFilter : public DocHitInfoIterator {
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2f49430..2dae68d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -43,15 +43,18 @@ class DocHitInfoIteratorOr : public DocHitInfoIterator {
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- current_->PopulateMatchedTermsStats(matched_terms_stats);
+ current_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
// If equal, then current_ == left_. Combine with results from right_.
if (left_document_id_ == right_document_id_) {
- right_->PopulateMatchedTermsStats(matched_terms_stats);
+ right_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
@@ -83,13 +86,15 @@ class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < current_iterators_.size(); i++) {
- current_iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ current_iterators_.at(i)->PopulateMatchedTermsStats(
+ matched_terms_stats, filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 8acb91a..e6ee8e3 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -84,8 +84,7 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
if (section_metadata->path == target_section_) {
// The hit was in the target section name, return OK/found
doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- delegate_->hit_intersect_section_ids_mask();
+ hit_intersect_section_ids_mask_ = 1u << section_id;
return libtextclassifier3::Status::OK;
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ba74384..52b243a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -52,13 +52,21 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
std::string ToString() const override;
- // NOTE: currently, section restricts does decide which documents to
- // return, but doesn't impact the relevance score of a document.
- // TODO(b/173156803): decide whether we want to filter the matched_terms_stats
- // for the restricted sections.
+ // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that
+ // should set filtering_section_mask, hence the received
+ // filtering_section_mask is ignored and the filtering_section_mask passed to
+ // the delegate will be set to hit_intersect_section_ids_mask_. This will
+ // allow to filter the matching sections in the delegate.
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(
+ matched_terms_stats,
+ /*filtering_section_mask=*/hit_intersect_section_ids_mask_);
}
private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 91e0cbe..21b3f8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -43,6 +43,7 @@ namespace lib {
namespace {
using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsEmpty;
@@ -101,6 +102,57 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
FakeClock fake_clock_;
};
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfo doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ // Filtering for the indexed section name (which has a section id of 0) should
+ // get a result.
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_section=*/indexed_property_);
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(section_restrict_iterator.Advance());
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+ SectionIdMask expected_section_id_mask = 0b00000001; // hits in sections 0
+ EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+ std::array<Hit::TermFrequency, kMaxSectionId> expected_term_frequencies{
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+ ElementsAreArray(expected_term_frequencies));
+ EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask,
+ expected_section_id_mask);
+
+ EXPECT_FALSE(section_restrict_iterator.Advance().ok());
+}
+
TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
@@ -110,6 +162,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
schema_store_.get(), /*target_section=*/"");
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
@@ -148,6 +203,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
/*target_section=*/"");
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -171,6 +229,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
"some_section_name");
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -192,6 +253,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
indexed_property_);
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -216,6 +280,9 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
indexed_property_);
EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index 913696a..45acc8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -56,23 +56,25 @@ class DocHitInfoIteratorDummy : public DocHitInfoIterator {
// Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
-
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
- section_term_frequencies);
+ TermMatchInfo term_stats(term_, section_mask,
+ std::move(section_term_frequencies));
for (auto& cur_term_stats : *matched_terms_stats) {
if (cur_term_stats.term == term_stats.term) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index c4d9901..afb298b 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -15,6 +15,7 @@
#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
+#include <array>
#include <cstdint>
#include <string>
#include <string_view>
@@ -93,11 +94,14 @@ class DocHitInfoIterator {
// For the last hit docid, retrieves all the matched query terms and other
// stats, see TermMatchInfo.
+ // filtering_section_mask filters the matching sections and should be set only
+ // by DocHitInfoIteratorSectionRestrict.
// If Advance() wasn't called after construction, Advance() returned false or
// the concrete HitIterator didn't override this method, the vectors aren't
// populated.
virtual void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const {}
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const {}
protected:
DocHitInfo doc_hit_info_;
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index ac5e97f..8dbe043 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -50,21 +50,24 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index e0379b8..69138e1 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -394,26 +394,36 @@ void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
}
libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
- int64_t header_and_hit_buffer_file_size =
- filesystem_->GetFileSize(hit_buffer_fd_.get());
-
- if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of the LiteIndex's header and hit buffer");
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.lite_index_hit_buffer_size() == -1 ||
+ storage_info.lite_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of LiteIndex's members.");
}
-
- int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
- if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
- }
-
// On initialization, we grow the file to a padded size first. So this size
// won't count towards the size taken up by elements
size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+ return storage_info.lite_index_hit_buffer_size() - header_padded_size +
+ storage_info.lite_index_lexicon_size();
+}
- return header_and_hit_buffer_file_size - header_padded_size +
- lexicon_disk_usage;
+IndexStorageInfoProto LiteIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
+ int64_t header_and_hit_buffer_file_size =
+ filesystem_->GetFileSize(hit_buffer_fd_.get());
+ if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_hit_buffer_size(
+ header_and_hit_buffer_file_size);
+ } else {
+ storage_info.set_lite_index_hit_buffer_size(-1);
+ }
+ int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+ if (lexicon_disk_usage != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
+ } else {
+ storage_info.set_lite_index_lexicon_size(-1);
+ }
+ return storage_info;
}
uint32_t LiteIndex::Seek(uint32_t term_id) {
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 7b51aa4..90c6fbc 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,6 +37,7 @@
#include "icing/legacy/index/icing-lite-index-header.h"
#include "icing/legacy/index/icing-lite-index-options.h"
#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -240,6 +241,14 @@ class LiteIndex {
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the lite
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
private:
static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index d626d7a..f3cf701 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -50,21 +50,24 @@ class DocHitInfoIteratorTermMain : public DocHitInfoIterator {
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 636f631..8ae6b27 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -121,14 +121,34 @@ libtextclassifier3::Status MainIndex::Init(
}
libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.main_index_storage_size() == -1 ||
+ storage_info.main_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of MainIndex's members.");
+ }
+ return storage_info.main_index_storage_size() +
+ storage_info.main_index_lexicon_size();
+}
+
+IndexStorageInfoProto MainIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_lexicon_size(lexicon_elt_size);
+ } else {
+ storage_info.set_main_index_lexicon_size(-1);
+ }
int64_t index_elt_size = flash_index_storage_->GetElementsSize();
- if (lexicon_elt_size == IcingFilesystem::kBadFileSize ||
- index_elt_size == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_storage_size(index_elt_size);
+ } else {
+ storage_info.set_main_index_storage_size(-1);
}
- return lexicon_elt_size + index_elt_size;
+ storage_info.set_main_index_block_size(flash_index_storage_->block_size());
+ storage_info.set_num_blocks(flash_index_storage_->num_blocks());
+ storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
+ return storage_info;
}
libtextclassifier3::StatusOr<std::unique_ptr<PostingListAccessor>>
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 7403b8c..43635ca 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,6 +27,7 @@
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/store/namespace-id.h"
#include "icing/util/status-macros.h"
@@ -172,6 +173,14 @@ class MainIndex {
// - INTERNAL on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the main
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
// Returns debug information for the main index in out.
// verbosity <= 0, simplest debug information - just the lexicon
// verbosity > 0, more detailed debug information including raw postings