aboutsummaryrefslogtreecommitdiff
path: root/icing/index/lite
diff options
context:
space:
mode:
authorJiayu Hu <hujiayu@google.com>2023-11-30 14:10:57 -0800
committerJiayu Hu <hujiayu@google.com>2023-11-30 18:24:06 -0800
commitcb6ac3ede1d2ad050895b588417ea353c75953fe (patch)
tree431aae8a813d3b3e229077175c1363901b870d53 /icing/index/lite
parentbe04186537a2e78ef1f27ba646676133d7e83c9a (diff)
downloadicing-cb6ac3ede1d2ad050895b588417ea353c75953fe.tar.gz
Update Icing from upstream.
Descriptions: ======================================================================== [Icing][version 3] Bump kVersion to 3 ======================================================================== Make lite index magic dependent on `IcingSearchEngineOptions::build_property_existence_metadata_hits` ======================================================================== Add a flag in IcingSearchEngineOptions to control whether to build property existence metadata hits ======================================================================== Support `hasProperty(property_path)` in the advanced query language ======================================================================== Add PropertyExistenceIndexingHandler to index property existence metadata hit ======================================================================== [JoinIndex Improvement][11/x] Add IcingSearchEngine initialization unit test for switching join index ======================================================================== [JoinIndex Improvement][10/x] Change/Add IcingSearchEngine unit tests ======================================================================== [JoinIndex Improvement][9/x] Integrate QualifiedIdJoinIndexImplV2 with IcingSearchEngine ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with JoinProcessor ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with QualifiedIdJoinIndexingHandler ======================================================================== [JoinIndex Improvement][7/x] Create QualifiedIdJoinIndex interface ======================================================================== [JoinIndex Improvement][6.1/x] Unit test (Optimize) ======================================================================== [JoinIndex Improvement][6.0/x] Unit test (General, Put, GetIterator) ======================================================================== [JoinIndex Improvement][5.3/x] Implement Optimize ======================================================================== Remove accents from Greek letters in normalizer ======================================================================== Make arm emulator tests build-only. ======================================================================== [JoinIndex Improvement][5.2/x] Implement GetIterator ======================================================================== [JoinIndex Improvement][5.1/x] Implement Put ======================================================================== [JoinIndex Improvement][5.0/x] Branch QualifiedIdJoinIndex to QualifiedIdJoinIndexImplV2 ======================================================================== [JoinIndex Improvement][4/x] Implement PostingListJoinDataAccessor ======================================================================== [JoinIndex Improvement][3/x] Implement PostingListJoinDataSerializer and DocumentIdToJoinInfo data type ======================================================================== [JoinIndex Improvement][2/x] Create NamespaceFingerprintIdentifier ======================================================================== [JoinIndex Improvement][1/x] Implement namespace_id_old_to_new in Compaction ======================================================================== Update test to also handle ICU 74 segmentation rules. ======================================================================== [Icing][Expand QueryStats][3/x] Add new fields into QueryStats (1) ======================================================================== [Icing][Expand QueryStats][2/x] Refactor QueryStatsProto ======================================================================== [Icing][Expand QueryStats][1/x] Publish DocHitInfoIterator CallStats ======================================================================== Add additional property filter tests ======================================================================== Deprecate hit_intersect_section_ids_mask in DocHitInfoIterator ======================================================================== Change default requires_full_emulation to False for portable_cc_test (third_party/icing/testing) ======================================================================== Cleanup Set requires_full_emulation to True for selective tests ======================================================================== Fix monkey test failures ======================================================================== Complete monkey test logic to change schema during monkey test runtime ======================================================================== Refactor monkey test to prepare for schema update ======================================================================== Fix the schema bug found by monkey test with seed 2551429844 ======================================================================== Move set query stats to the very top of InternalSearch() ======================================================================== Apply section restriction only on leaf nodes ======================================================================== [6/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Advanced query parser) ======================================================================== [5/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PersistentHashMap) ======================================================================== [4/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListIntegerIndexSerializer) ======================================================================== [3/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListHitSerializer) ======================================================================== [2/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Posting list storage) ======================================================================== [1/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Non-functional changes) ======================================================================== Decouple section restriction data from iterators ======================================================================== Fix the crash when a schema type gets more indexable properties than allowed ======================================================================== Add a checker to verify the property data type matches the schema. ======================================================================== Change global std::string in i18n-utils to constexpr std::string_view. ======================================================================== Adjust LiteIndex sort at indexing check conditions. ======================================================================== Bug: 305098009 Bug: 307508735 Bug: 291130542 Bug: 275121148 Bug: 303239901 Bug: 301116242 Bug: 299321977 Bug: 300135897 Bug: 297549761 Bug: 309826655 Bug: 296349369 Bug: 302192690 Bug: 302609704 Bug: 301566713 NO_IFTTT="False Alarm: The path is only valid in G3. kVersion is changed to 3, and schema is compatible with version 1." Change-Id: I8c4c3cd9b93e5240bd774f0a3d6d812f7a9ec198
Diffstat (limited to 'icing/index/lite')
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.cc3
-rw-r--r--icing/index/lite/doc-hit-info-iterator-term-lite.h12
-rw-r--r--icing/index/lite/lite-index-header.h25
-rw-r--r--icing/index/lite/lite-index-options.cc16
-rw-r--r--icing/index/lite/lite-index-options.h7
-rw-r--r--icing/index/lite/lite-index.cc6
6 files changed, 52 insertions, 17 deletions
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index acf3b33..21eecb6 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -65,12 +65,11 @@ libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
// Nothing more for the iterator to return. Set these members to invalid
// values.
doc_hit_info_ = DocHitInfo();
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
return absl_ports::ResourceExhaustedError(
"No more DocHitInfos in iterator");
}
+ ++num_advance_calls_;
doc_hit_info_ = cached_hits_.at(cached_hits_idx_);
- hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index 873ea89..7facd88 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -28,7 +28,7 @@
namespace icing {
namespace lib {
-class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
+class DocHitInfoIteratorTermLite : public DocHitInfoLeafIterator {
public:
explicit DocHitInfoIteratorTermLite(const TermIdCodec* term_id_codec,
LiteIndex* lite_index,
@@ -51,8 +51,14 @@ class DocHitInfoIteratorTermLite : public DocHitInfoIterator {
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
- int32_t GetNumBlocksInspected() const override { return 0; }
- int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
+ CallStats GetCallStats() const override {
+ return CallStats(
+ /*num_leaf_advance_calls_lite_index_in=*/num_advance_calls_,
+ /*num_leaf_advance_calls_main_index_in=*/0,
+ /*num_leaf_advance_calls_integer_index_in=*/0,
+ /*num_leaf_advance_calls_no_index_in=*/0,
+ /*num_blocks_inspected_in=*/0);
+ }
void PopulateMatchedTermsStats(
std::vector<TermMatchInfo>* matched_terms_stats,
diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h
index 58379d6..75de8fa 100644
--- a/icing/index/lite/lite-index-header.h
+++ b/icing/index/lite/lite-index-header.h
@@ -15,6 +15,9 @@
#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
+#include <cstddef>
+#include <cstdint>
+
#include "icing/legacy/core/icing-string-util.h"
#include "icing/store/document-id.h"
@@ -50,7 +53,14 @@ class LiteIndex_Header {
class LiteIndex_HeaderImpl : public LiteIndex_Header {
public:
struct HeaderData {
- static const uint32_t kMagic = 0xb4fb8792;
+ static uint32_t GetCurrentMagic(
+ bool include_property_existence_metadata_hits) {
+ if (!include_property_existence_metadata_hits) {
+ return 0x01c61418;
+ } else {
+ return 0x56e07d5b;
+ }
+ }
uint32_t lite_index_crc;
uint32_t magic;
@@ -66,10 +76,15 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
uint32_t searchable_end;
};
- explicit LiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {}
+ explicit LiteIndex_HeaderImpl(HeaderData *hdr,
+ bool include_property_existence_metadata_hits)
+ : hdr_(hdr),
+ include_property_existence_metadata_hits_(
+ include_property_existence_metadata_hits) {}
bool check_magic() const override {
- return hdr_->magic == HeaderData::kMagic;
+ return hdr_->magic == HeaderData::GetCurrentMagic(
+ include_property_existence_metadata_hits_);
}
uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; }
@@ -96,7 +111,8 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
void Reset() override {
hdr_->lite_index_crc = 0;
- hdr_->magic = HeaderData::kMagic;
+ hdr_->magic =
+ HeaderData::GetCurrentMagic(include_property_existence_metadata_hits_);
hdr_->last_added_docid = kInvalidDocumentId;
hdr_->cur_size = 0;
hdr_->searchable_end = 0;
@@ -104,6 +120,7 @@ class LiteIndex_HeaderImpl : public LiteIndex_Header {
private:
HeaderData *hdr_;
+ bool include_property_existence_metadata_hits_;
};
static_assert(24 == sizeof(LiteIndex_HeaderImpl::HeaderData),
"sizeof(HeaderData) != 24");
diff --git a/icing/index/lite/lite-index-options.cc b/icing/index/lite/lite-index-options.cc
index 8780d45..7e6c076 100644
--- a/icing/index/lite/lite-index-options.cc
+++ b/icing/index/lite/lite-index-options.cc
@@ -14,9 +14,13 @@
#include "icing/index/lite/lite-index-options.h"
+#include <algorithm>
+#include <cstddef>
#include <cstdint>
+#include <string>
#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
namespace icing {
namespace lib {
@@ -65,14 +69,16 @@ IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
} // namespace
-LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
- uint32_t hit_buffer_want_merge_bytes,
- bool hit_buffer_sort_at_indexing,
- uint32_t hit_buffer_sort_threshold_bytes)
+LiteIndexOptions::LiteIndexOptions(
+ const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes,
+ bool hit_buffer_sort_at_indexing, uint32_t hit_buffer_sort_threshold_bytes,
+ bool include_property_existence_metadata_hits)
: filename_base(filename_base),
hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
- hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
+ hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes),
+ include_property_existence_metadata_hits(
+ include_property_existence_metadata_hits) {
hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
lexicon_options = CalculateTrieOptions(hit_buffer_size);
display_mappings_options = CalculateTrieOptions(hit_buffer_size);
diff --git a/icing/index/lite/lite-index-options.h b/icing/index/lite/lite-index-options.h
index 9f8452c..8b03449 100644
--- a/icing/index/lite/lite-index-options.h
+++ b/icing/index/lite/lite-index-options.h
@@ -15,6 +15,9 @@
#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
+#include <cstdint>
+#include <string>
+
#include "icing/legacy/index/icing-dynamic-trie.h"
namespace icing {
@@ -29,7 +32,8 @@ struct LiteIndexOptions {
LiteIndexOptions(const std::string& filename_base,
uint32_t hit_buffer_want_merge_bytes,
bool hit_buffer_sort_at_indexing,
- uint32_t hit_buffer_sort_threshold_bytes);
+ uint32_t hit_buffer_sort_threshold_bytes,
+ bool include_property_existence_metadata_hits = false);
IcingDynamicTrie::Options lexicon_options;
IcingDynamicTrie::Options display_mappings_options;
@@ -39,6 +43,7 @@ struct LiteIndexOptions {
uint32_t hit_buffer_size = 0;
bool hit_buffer_sort_at_indexing = false;
uint32_t hit_buffer_sort_threshold_bytes = 0;
+ bool include_property_existence_metadata_hits = false;
};
} // namespace lib
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index ec7141a..3f9cc93 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -168,7 +168,8 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()));
+ header_mmap_.address()),
+ options_.include_property_existence_metadata_hits);
header_->Reset();
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
@@ -183,7 +184,8 @@ libtextclassifier3::Status LiteIndex::Initialize() {
header_mmap_.Remap(hit_buffer_fd_.get(), kHeaderFileOffset, header_size());
header_ = std::make_unique<LiteIndex_HeaderImpl>(
reinterpret_cast<LiteIndex_HeaderImpl::HeaderData*>(
- header_mmap_.address()));
+ header_mmap_.address()),
+ options_.include_property_existence_metadata_hits);
if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
sizeof(TermIdHitPair::Value), header_->cur_size(),