diff options
author | Cassie Wang <cassiewang@google.com> | 2021-02-26 08:04:01 -0800 |
---|---|---|
committer | Cassie Wang <cassiewang@google.com> | 2021-03-02 15:29:44 -0800 |
commit | 85fd8c8521e338d2bab69f5482e3cc2cf312fd4e (patch) | |
tree | 929e118124b203997be393e4a1c5f5ee6da2de40 /icing/index/lite | |
parent | a34db390d80f862bfaaa49dea3605c5fec3bca3d (diff) | |
download | icing-85fd8c8521e338d2bab69f5482e3cc2cf312fd4e.tar.gz |
Sync from upstream.
Descriptions:
==========
Add last optimized time to GetOptimizeInfo.
==========
Update the implementation of snippeting to return property paths with value indices and remove the values_index field.
==========
Create builders for SchemaProto, SchemaTypeConfigProto and PropertyConfigProto.
==========
Rename some protos with the rules:
- Remove "Native" prefix
- Add "Proto" suffix for consistency with other protos
==========
Upgrade your minimum iOS version to 11.4.
==========
Fix PersistToDisk definitions to ensure that they properly call datasync. This change is meant to address the first part of ptd doc - that certain functions that claim to persist data don't actually explicitly flush.
==========
Change function call from has_field() to field()
==========
Add IcingStorageInfo.
==========
Add IndexStorageStats.
==========
Add SchemaStoreStorageStats.
==========
Add DocumentStoreStorageStats.
==========
Implement OptimizeStats.
==========
Remove the max number of results per query limit (1000) and replace it with a more flexible way to limit memory use by the result-state-manager.
==========
Add a test case to ensure we don't add UsageStore's checksum in DocumentStore's ComputeChecksum.
==========
Account for UsageStore in GetDiskUsage.
==========
Ensure that SchemaStore properly handles function calls when the schema isn't set.
==========
Remove jlpl_strict_deps feature from package declarations.
==========
Qualifies std::string in 3p directories
==========
Section restricts should influence the relevance score.
==========
Apply fixes upstream that were necessary to sync changes downstream. Also added a METADATA check to prevent any accidental adds of foo.proto.h includes.
==========
Remove the 'com.google.protobuf' to 'com.google.android.icing.protobuf' translation in the export_to_aosp script.
==========
Include usage store size in GetOptimizeInfo. This helps clients get a better idea of what savings they could get back if they called Optimize.
Change-Id: Ia2339c7987267a73c49dadf1ced4a0a8ef001d4c
Diffstat (limited to 'icing/index/lite')
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.h | 15 | ||||
-rw-r--r-- | icing/index/lite/lite-index.cc | 40 | ||||
-rw-r--r-- | icing/index/lite/lite-index.h | 9 |
3 files changed, 43 insertions, 21 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h index ac5e97f..8dbe043 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.h +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h @@ -50,21 +50,24 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator { int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; } void PopulateMatchedTermsStats( - std::vector<TermMatchInfo>* matched_terms_stats) const override { + std::vector<TermMatchInfo>* matched_terms_stats, + SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override { if (doc_hit_info_.document_id() == kInvalidDocumentId) { // Current hit isn't valid, return. return; } - SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask(); + SectionIdMask section_mask = + doc_hit_info_.hit_section_ids_mask() & filtering_section_mask; + SectionIdMask section_mask_copy = section_mask; std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = { Hit::kNoTermFrequency}; - while (section_mask) { - SectionId section_id = __builtin_ctz(section_mask); + while (section_mask_copy) { + SectionId section_id = __builtin_ctz(section_mask_copy); section_term_frequencies.at(section_id) = doc_hit_info_.hit_term_frequency(section_id); - section_mask &= ~(1u << section_id); + section_mask_copy &= ~(1u << section_id); } - TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(), + TermMatchInfo term_stats(term_, section_mask, std::move(section_term_frequencies)); for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) { diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index e0379b8..69138e1 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -394,26 +394,36 @@ void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const { } libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const { - int64_t header_and_hit_buffer_file_size = - filesystem_->GetFileSize(hit_buffer_fd_.get()); - - if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) { - return absl_ports::InternalError( - "Failed to get element size of the LiteIndex's header and hit buffer"); + IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto()); + if (storage_info.lite_index_hit_buffer_size() == -1 || + storage_info.lite_index_lexicon_size() == -1) { + return absl_ports::AbortedError( + "Failed to get size of LiteIndex's members."); } - - int64_t lexicon_disk_usage = lexicon_.GetElementsSize(); - if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) { - return absl_ports::InternalError( - "Failed to get element size of LiteIndex's lexicon"); - } - // On initialization, we grow the file to a padded size first. So this size // won't count towards the size taken up by elements size_t header_padded_size = IcingMMapper::page_aligned_size(header_size()); + return storage_info.lite_index_hit_buffer_size() - header_padded_size + + storage_info.lite_index_lexicon_size(); +} - return header_and_hit_buffer_file_size - header_padded_size + - lexicon_disk_usage; +IndexStorageInfoProto LiteIndex::GetStorageInfo( + IndexStorageInfoProto storage_info) const { + int64_t header_and_hit_buffer_file_size = + filesystem_->GetFileSize(hit_buffer_fd_.get()); + if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) { + storage_info.set_lite_index_hit_buffer_size( + header_and_hit_buffer_file_size); + } else { + storage_info.set_lite_index_hit_buffer_size(-1); + } + int64_t lexicon_disk_usage = lexicon_.GetElementsSize(); + if (lexicon_disk_usage != Filesystem::kBadFileSize) { + storage_info.set_lite_index_lexicon_size(lexicon_disk_usage); + } else { + storage_info.set_lite_index_lexicon_size(-1); + } + return storage_info; } uint32_t LiteIndex::Seek(uint32_t term_id) { diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h index 7b51aa4..90c6fbc 100644 --- a/icing/index/lite/lite-index.h +++ b/icing/index/lite/lite-index.h @@ -37,6 +37,7 @@ #include "icing/legacy/index/icing-lite-index-header.h" #include "icing/legacy/index/icing-lite-index-options.h" #include "icing/legacy/index/icing-mmapper.h" +#include "icing/proto/storage.pb.h" #include "icing/proto/term.pb.h" #include "icing/schema/section.h" #include "icing/store/document-id.h" @@ -240,6 +241,14 @@ class LiteIndex { // INTERNAL_ERROR on IO error libtextclassifier3::StatusOr<int64_t> GetElementsSize() const; + // Takes the provided storage_info, populates the fields related to the lite + // index and returns that storage_info. + // + // If an IO error occurs while trying to calculate the value for a field, then + // that field will be set to -1. + IndexStorageInfoProto GetStorageInfo( + IndexStorageInfoProto storage_info) const; + private: static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions(); |