diff options
author | Jiayu Hu <hujiayu@google.com> | 2023-11-30 14:10:57 -0800 |
---|---|---|
committer | Jiayu Hu <hujiayu@google.com> | 2023-11-30 18:24:06 -0800 |
commit | cb6ac3ede1d2ad050895b588417ea353c75953fe (patch) | |
tree | 431aae8a813d3b3e229077175c1363901b870d53 /icing/index/lite | |
parent | be04186537a2e78ef1f27ba646676133d7e83c9a (diff) | |
download | icing-cb6ac3ede1d2ad050895b588417ea353c75953fe.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
[Icing][version 3] Bump kVersion to 3
========================================================================
Make lite index magic dependent on `IcingSearchEngineOptions::build_property_existence_metadata_hits`
========================================================================
Add a flag in IcingSearchEngineOptions to control whether to build property existence metadata hits
========================================================================
Support `hasProperty(property_path)` in the advanced query language
========================================================================
Add PropertyExistenceIndexingHandler to index property existence metadata hit
========================================================================
[JoinIndex Improvement][11/x] Add IcingSearchEngine initialization unit test for switching join index
========================================================================
[JoinIndex Improvement][10/x] Change/Add IcingSearchEngine unit tests
========================================================================
[JoinIndex Improvement][9/x] Integrate QualifiedIdJoinIndexImplV2 with IcingSearchEngine
========================================================================
[JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with JoinProcessor
========================================================================
[JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with QualifiedIdJoinIndexingHandler
========================================================================
[JoinIndex Improvement][7/x] Create QualifiedIdJoinIndex interface
========================================================================
[JoinIndex Improvement][6.1/x] Unit test (Optimize)
========================================================================
[JoinIndex Improvement][6.0/x] Unit test (General, Put, GetIterator)
========================================================================
[JoinIndex Improvement][5.3/x] Implement Optimize
========================================================================
Remove accents from Greek letters in normalizer
========================================================================
Make arm emulator tests build-only.
========================================================================
[JoinIndex Improvement][5.2/x] Implement GetIterator
========================================================================
[JoinIndex Improvement][5.1/x] Implement Put
========================================================================
[JoinIndex Improvement][5.0/x] Branch QualifiedIdJoinIndex to QualifiedIdJoinIndexImplV2
========================================================================
[JoinIndex Improvement][4/x] Implement PostingListJoinDataAccessor
========================================================================
[JoinIndex Improvement][3/x] Implement PostingListJoinDataSerializer and DocumentIdToJoinInfo data type
========================================================================
[JoinIndex Improvement][2/x] Create NamespaceFingerprintIdentifier
========================================================================
[JoinIndex Improvement][1/x] Implement namespace_id_old_to_new in Compaction
========================================================================
Update test to also handle ICU 74 segmentation rules.
========================================================================
[Icing][Expand QueryStats][3/x] Add new fields into QueryStats (1)
========================================================================
[Icing][Expand QueryStats][2/x] Refactor QueryStatsProto
========================================================================
[Icing][Expand QueryStats][1/x] Publish DocHitInfoIterator CallStats
========================================================================
Add additional property filter tests
========================================================================
Deprecate hit_intersect_section_ids_mask in DocHitInfoIterator
========================================================================
Change default requires_full_emulation to False for portable_cc_test (third_party/icing/testing)
========================================================================
Cleanup Set requires_full_emulation to True for selective tests
========================================================================
Fix monkey test failures
========================================================================
Complete monkey test logic to change schema during monkey test runtime
========================================================================
Refactor monkey test to prepare for schema update
========================================================================
Fix the schema bug found by monkey test with seed 2551429844
========================================================================
Move set query stats to the very top of InternalSearch()
========================================================================
Apply section restriction only on leaf nodes
========================================================================
[6/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Advanced query parser)
========================================================================
[5/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PersistentHashMap)
========================================================================
[4/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListIntegerIndexSerializer)
========================================================================
[3/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListHitSerializer)
========================================================================
[2/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Posting list storage)
========================================================================
[1/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Non-functional changes)
========================================================================
Decouple section restriction data from iterators
========================================================================
Fix the crash when a schema type gets more indexable properties than allowed
========================================================================
Add a checker to verify the property data type matches the schema.
========================================================================
Change global std::string in i18n-utils to constexpr std::string_view.
========================================================================
Adjust LiteIndex sort at indexing check conditions.
========================================================================
Bug: 305098009
Bug: 307508735
Bug: 291130542
Bug: 275121148
Bug: 303239901
Bug: 301116242
Bug: 299321977
Bug: 300135897
Bug: 297549761
Bug: 309826655
Bug: 296349369
Bug: 302192690
Bug: 302609704
Bug: 301566713
NO_IFTTT="False Alarm: The path is only valid in G3. kVersion is changed to 3, and schema is compatible with version 1."
Change-Id: I8c4c3cd9b93e5240bd774f0a3d6d812f7a9ec198
Diffstat (limited to 'icing/index/lite')
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.cc | 3 | ||||
-rw-r--r-- | icing/index/lite/doc-hit-info-iterator-term-lite.h | 12 | ||||
-rw-r--r-- | icing/index/lite/lite-index-header.h | 25 | ||||
-rw-r--r-- | icing/index/lite/lite-index-options.cc | 16 | ||||
-rw-r--r-- | icing/index/lite/lite-index-options.h | 7 | ||||
-rw-r--r-- | icing/index/lite/lite-index.cc | 6 |
6 files changed, 52 insertions, 17 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc index acf3b33..21eecb6 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc @@ -65,12 +65,11 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() { // Nothing more for the iterator to return. Set these members to invalid // values. doc_hit_info_ = DocHitInfo(); - hit_intersect_section_ids_mask_ = kSectionIdMaskNone; return absl_ports::ResourceExhaustedError( "No more DocHitInfos in iterator"); } + ++num_advance_calls_; doc_hit_info_ = cached_hits_.at(cached_hits_idx_); - hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask(); return libtextclassifier3::Status::OK; } diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h index 873ea89..7facd88 100644 --- a/icing/index/lite/doc-hit-info-iterator-term-lite.h +++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h @@ -28,7 +28,7 @@ namespace icing { namespace lib { -class DocHitInfoIteratorTermLite : public DocHitInfoIterator { +class DocHitInfoIteratorTermLite : public DocHitInfoLeafIterator { public: explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec, LiteIndex* lite_index, @@ -51,8 +51,14 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator { libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override; - int32_t GetNumBlocksInspected() const override { return 0; } - int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; } + CallStats GetCallStats() const override { + return CallStats( + /*num_leaf_advance_calls_lite_index_in=*/num_advance_calls_, + /*num_leaf_advance_calls_main_index_in=*/0, + /*num_leaf_advance_calls_integer_index_in=*/0, + /*num_leaf_advance_calls_no_index_in=*/0, + /*num_blocks_inspected_in=*/0); + } void PopulateMatchedTermsStats( std::vector<TermMatchInfo>* matched_terms_stats, diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h index 58379d6..75de8fa 100644 --- a/icing/index/lite/lite-index-header.h +++ b/icing/index/lite/lite-index-header.h @@ -15,6 +15,9 @@ #ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_ #define ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_ +#include <cstddef> +#include <cstdint> + #include "icing/legacy/core/icing-string-util.h" #include "icing/store/document-id.h" @@ -50,7 +53,14 @@ class LiteIndex_Header { class LiteIndex_HeaderImpl : public LiteIndex_Header { public: struct HeaderData { - static const uint32_t kMagic = 0xb4fb8792; + static uint32_t GetCurrentMagic( + bool include_property_existence_metadata_hits) { + if (!include_property_existence_metadata_hits) { + return 0x01c61418; + } else { + return 0x56e07d5b; + } + } uint32_t lite_index_crc; uint32_t magic; @@ -66,10 +76,15 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { uint32_t searchable_end; }; - explicit LiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {} + explicit LiteIndex_HeaderImpl(HeaderData *hdr, + bool include_property_existence_metadata_hits) + : hdr_(hdr), + include_property_existence_metadata_hits_( + include_property_existence_metadata_hits) {} bool check_magic() const override { - return hdr_->magic == HeaderData::kMagic; + return hdr_->magic == HeaderData::GetCurrentMagic( + include_property_existence_metadata_hits_); } uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; } @@ -96,7 +111,8 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { void Reset() override { hdr_->lite_index_crc = 0; - hdr_->magic = HeaderData::kMagic; + hdr_->magic = + HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_); hdr_->last_added_docid = kInvalidDocumentId; hdr_->cur_size = 0; hdr_->searchable_end = 0; @@ -104,6 +120,7 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header { private: HeaderData *hdr_; + bool include_property_existence_metadata_hits_; }; static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData), "sizeof(HeaderData) != 24"); diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc index 8780d45..7e6c076 100644 --- a/icing/index/lite/lite-index-options.cc +++ b/icing/index/lite/lite-index-options.cc @@ -14,9 +14,13 @@ #include "icing/index/lite/lite-index-options.h" +#include <algorithm> +#include <cstddef> #include <cstdint> +#include <string> #include "icing/index/lite/term-id-hit-pair.h" +#include "icing/legacy/index/icing-dynamic-trie.h" namespace icing { namespace lib { @@ -65,14 +69,16 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) { } // namespace -LiteIndexOptions::LiteIndexOptions(const std::string& filename_base, - uint32_t hit_buffer_want_merge_bytes, - bool hit_buffer_sort_at_indexing, - uint32_t hit_buffer_sort_threshold_bytes) +LiteIndexOptions::LiteIndexOptions( + const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes, + bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes, + bool include_property_existence_metadata_hits) : filename_base(filename_base), hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes), hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing), - hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) { + hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes), + include_property_existence_metadata_hits( + include_property_existence_metadata_hits) { hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes); lexicon_options = CalculateTrieOptions(hit_buffer_size); display_mappings_options = CalculateTrieOptions(hit_buffer_size); diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h index 9f8452c..8b03449 100644 --- a/icing/index/lite/lite-index-options.h +++ b/icing/index/lite/lite-index-options.h @@ -15,6 +15,9 @@ #ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_ #define ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_ +#include <cstdint> +#include <string> + #include "icing/legacy/index/icing-dynamic-trie.h" namespace icing { @@ -29,7 +32,8 @@ struct LiteIndexOptions { LiteIndexOptions(const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes, bool hit_buffer_sort_at_indexing, - uint32_t hit_buffer_sort_threshold_bytes); + uint32_t hit_buffer_sort_threshold_bytes, + bool include_property_existence_metadata_hits = false); IcingDynamicTrie::Options lexicon_options; IcingDynamicTrie::Options display_mappings_options; @@ -39,6 +43,7 @@ struct LiteIndexOptions { uint32_t hit_buffer_size = 0; bool hit_buffer_sort_at_indexing = false; uint32_t hit_buffer_sort_threshold_bytes = 0; + bool include_property_existence_metadata_hits = false; }; } // namespace lib diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index ec7141a..3f9cc93 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -168,7 +168,8 @@ libtextclassifier3::Status LiteIndex::Initialize() { header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size()); header_ = std::make_unique<LiteIndex_HeaderImpl>( reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>( - header_mmap_.address())); + header_mmap_.address()), + options_.include_property_existence_metadata_hits); header_->Reset(); if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, @@ -183,7 +184,8 @@ libtextclassifier3::Status LiteIndex::Initialize() { header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size()); header_ = std::make_unique<LiteIndex_HeaderImpl>( reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>( - header_mmap_.address())); + header_mmap_.address()), + options_.include_property_existence_metadata_hits); if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true, sizeof(TermIdHitPair::Value), header_->cur_size(), |