aboutsummaryrefslogtreecommitdiff
path: root/icing/monkey_test/in-memory-icing-search-engine.cc
diff options
context:
space:
mode:
authorJiayu Hu <hujiayu@google.com>2023-12-01 16:38:12 -0800
committerJiayu Hu <hujiayu@google.com>2023-12-01 16:40:46 -0800
commit77a3d7acfd36b403ebec87d7cbc7cf6486b71941 (patch)
tree0d83b3880be27e055a77217411e8c3bb327cf5d9 /icing/monkey_test/in-memory-icing-search-engine.cc
parentea81bd0613730609fcf3c6ffa7d2e52bcc10a9ac (diff)
parent680dbbd97ed411c19591e4c35616f0b418a56dc9 (diff)
downloadicing-77a3d7acfd36b403ebec87d7cbc7cf6486b71941.tar.gz
Merge remote-tracking branch 'aosp/upstream-master' into androidx-main
* aosp/upstream-master: Update Icing from upstream. Update Icing from upstream. Update Icing from upstream. Descriptions: ======================================================================== Fix the build error in aosp/2856025 ======================================================================== [Icing][version 3] Bump kVersion to 3 ======================================================================== Make lite index magic dependent on `IcingSearchEngineOptions::build_property_existence_metadata_hits` ======================================================================== Add a flag in IcingSearchEngineOptions to control whether to build property existence metadata hits ======================================================================== Support `hasProperty(property_path)` in the advanced query language ======================================================================== Add PropertyExistenceIndexingHandler to index property existence metadata hit ======================================================================== [JoinIndex Improvement][11/x] Add IcingSearchEngine initialization unit test for switching join index ======================================================================== [JoinIndex Improvement][10/x] Change/Add IcingSearchEngine unit tests ======================================================================== [JoinIndex Improvement][9/x] Integrate QualifiedIdJoinIndexImplV2 with IcingSearchEngine ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with JoinProcessor ======================================================================== [JoinIndex Improvement][8/x] Integrate QualifiedIdJoinIndexImplV2 with QualifiedIdJoinIndexingHandler ======================================================================== [JoinIndex Improvement][7/x] Create QualifiedIdJoinIndex interface ======================================================================== [JoinIndex Improvement][6.1/x] Unit test (Optimize) ======================================================================== [JoinIndex Improvement][6.0/x] Unit test (General, Put, GetIterator) ======================================================================== [JoinIndex Improvement][5.3/x] Implement Optimize ======================================================================== Remove accents from Greek letters in normalizer ======================================================================== Make arm emulator tests build-only. ======================================================================== [JoinIndex Improvement][5.2/x] Implement GetIterator ======================================================================== [JoinIndex Improvement][5.1/x] Implement Put ======================================================================== [JoinIndex Improvement][5.0/x] Branch QualifiedIdJoinIndex to QualifiedIdJoinIndexImplV2 ======================================================================== [JoinIndex Improvement][4/x] Implement PostingListJoinDataAccessor ======================================================================== [JoinIndex Improvement][3/x] Implement PostingListJoinDataSerializer and DocumentIdToJoinInfo data type ======================================================================== [JoinIndex Improvement][2/x] Create NamespaceFingerprintIdentifier ======================================================================== [JoinIndex Improvement][1/x] Implement namespace_id_old_to_new in Compaction ======================================================================== Update test to also handle ICU 74 segmentation rules. ======================================================================== [Icing][Expand QueryStats][3/x] Add new fields into QueryStats (1) ======================================================================== [Icing][Expand QueryStats][2/x] Refactor QueryStatsProto ======================================================================== [Icing][Expand QueryStats][1/x] Publish DocHitInfoIterator CallStats ======================================================================== Add additional property filter tests ======================================================================== Deprecate hit_intersect_section_ids_mask in DocHitInfoIterator ======================================================================== Change default requires_full_emulation to False for portable_cc_test (third_party/icing/testing) ======================================================================== Cleanup Set requires_full_emulation to True for selective tests ======================================================================== Fix monkey test failures ======================================================================== Complete monkey test logic to change schema during monkey test runtime ======================================================================== Refactor monkey test to prepare for schema update ======================================================================== Fix the schema bug found by monkey test with seed 2551429844 ======================================================================== Move set query stats to the very top of InternalSearch() ======================================================================== Apply section restriction only on leaf nodes ======================================================================== [6/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Advanced query parser) ======================================================================== [5/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PersistentHashMap) ======================================================================== [4/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListIntegerIndexSerializer) ======================================================================== [3/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (PostingListHitSerializer) ======================================================================== [2/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Posting list storage) ======================================================================== [1/n] Fix callsites in Icing that forgot to check libtextclassifier3::Status (Non-functional changes) ======================================================================== Decouple section restriction data from iterators ======================================================================== Fix the crash when a schema type gets more indexable properties than allowed ======================================================================== Add a checker to verify the property data type matches the schema. ======================================================================== Change global std::string in i18n-utils to constexpr std::string_view. ======================================================================== Adjust LiteIndex sort at indexing check conditions. ======================================================================== Add @UsedByNative annotations to native* methods in IcingSearchEngineImpl. ======================================================================== Bug: 305098009 Bug: 307508735 Bug: 291130542 Bug: 275121148 Bug: 303239901 Bug: 301116242 Bug: 299321977 Bug: 300135897 Bug: 297549761 Bug: 309826655 Bug: 296349369 Bug: 302192690 Bug: 302609704 Bug: 301566713 Bug: 296938196 NO_IFTTT="False Alarm: The path is only valid in G3. kVersion is changed to 3, and schema is compatible with version 1." Change-Id: I1d50cf70261c8977e52047c8051e8d143f62ba2c
Diffstat (limited to 'icing/monkey_test/in-memory-icing-search-engine.cc')
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.cc133
1 files changed, 120 insertions, 13 deletions
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc
index 405a7b0..7baa06e 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.cc
+++ b/icing/monkey_test/in-memory-icing-search-engine.cc
@@ -14,15 +14,27 @@
#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include <algorithm>
#include <cstdint>
+#include <memory>
+#include <random>
+#include <string>
#include <string_view>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -38,9 +50,80 @@ bool IsPrefix(std::string_view s1, std::string_view s2) {
return s1 == s2.substr(0, s1.length());
}
-bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
- const std::string &query,
- TermMatchType::Code term_match_type) {
+} // namespace
+
+libtextclassifier3::StatusOr<const PropertyConfigProto *>
+InMemoryIcingSearchEngine::GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const {
+ auto schema_iter = property_config_map_.find(schema_type);
+ if (schema_iter == property_config_map_.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Schema type: ", schema_type, " is not found."));
+ }
+ auto property_iter = schema_iter->second.find(property_name);
+ if (property_iter == schema_iter->second.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Property: ", property_name, " is not found."));
+ }
+ return &property_iter->second;
+}
+
+libtextclassifier3::StatusOr<TermMatchType::Code>
+InMemoryIcingSearchEngine::GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const {
+ bool in_indexable_properties_list = false;
+ bool all_indexable_from_top = true;
+
+ std::vector<std::string_view> properties_in_path =
+ absl_ports::StrSplit(section.path, ".");
+ if (properties_in_path.empty()) {
+ return absl_ports::InvalidArgumentError("Got empty path.");
+ }
+ std::string curr_schema_type = schema_type;
+ for (int i = 0; i < properties_in_path.size(); ++i) {
+ ICING_ASSIGN_OR_RETURN(
+ const PropertyConfigProto *prop,
+ GetPropertyConfig(curr_schema_type,
+ std::string(properties_in_path[i])));
+ if (prop->data_type() == PropertyConfigProto::DataType::STRING) {
+ return prop->string_indexing_config().term_match_type();
+ }
+
+ if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+
+ bool old_all_indexable_from_top = all_indexable_from_top;
+ all_indexable_from_top &=
+ prop->document_indexing_config().index_nested_properties();
+ if (!all_indexable_from_top && !in_indexable_properties_list) {
+ // Only try to update in_indexable_properties_list if this is the first
+ // level with index_nested_properties=false.
+ if (old_all_indexable_from_top) {
+ auto &indexable_properties =
+ prop->document_indexing_config().indexable_nested_properties_list();
+ std::string relative_path =
+ absl_ports::StrCatPieces(std::vector<std::string_view>(
+ properties_in_path.begin() + i + 1, properties_in_path.end()));
+ in_indexable_properties_list =
+ std::find(indexable_properties.begin(), indexable_properties.end(),
+ relative_path) != indexable_properties.end();
+ }
+ // Check in_indexable_properties_list again.
+ if (!in_indexable_properties_list) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+ }
+ curr_schema_type = prop->document_indexing_config().GetTypeName();
+ }
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+}
+
+libtextclassifier3::StatusOr<bool>
+InMemoryIcingSearchEngine::DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const {
std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":");
std::string_view query_term;
std::string_view section_restrict;
@@ -54,8 +137,15 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
if (!section_restrict.empty() && section.path != section_restrict) {
continue;
}
+ ICING_ASSIGN_OR_RETURN(
+ TermMatchType::Code section_term_match_type,
+ GetTermMatchType(document.document.schema(), section));
+ if (section_term_match_type == TermMatchType::UNKNOWN) {
+ // Skip non-indexable property.
+ continue;
+ }
for (const std::string &token : section.token_sequence) {
- if (section.term_match_type == TermMatchType::EXACT_ONLY ||
+ if (section_term_match_type == TermMatchType::EXACT_ONLY ||
term_match_type == TermMatchType::EXACT_ONLY) {
if (token == query_term) {
return true;
@@ -68,7 +158,18 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
return false;
}
-} // namespace
+void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) {
+ schema_ = std::make_unique<SchemaProto>(std::move(schema));
+ property_config_map_.clear();
+ for (const SchemaTypeConfigProto &type_config : schema_->types()) {
+ auto &curr_property_map = property_config_map_[type_config.schema_type()];
+ for (const PropertyConfigProto &property_config :
+ type_config.properties()) {
+ curr_property_map.insert(
+ {property_config.property_name(), property_config});
+ }
+ }
+}
InMemoryIcingSearchEngine::PickDocumentResult
InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
@@ -121,7 +222,7 @@ InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) {
// Delete the old one if existing.
- Delete(document.document.namespace_(), document.document.uri());
+ Delete(document.document.namespace_(), document.document.uri()).IgnoreError();
existing_doc_ids_.push_back(documents_.size());
namespace_uri_docid_map[document.document.namespace_()]
[document.document.uri()] = documents_.size();
@@ -192,7 +293,8 @@ InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) {
libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
const SearchSpecProto &search_spec) {
- std::vector<DocumentId> doc_ids_to_delete = InternalSearch(search_spec);
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete,
+ InternalSearch(search_spec));
for (DocumentId doc_id : doc_ids_to_delete) {
const DocumentProto &document = documents_[doc_id].document;
if (!Delete(document.namespace_(), document.uri()).ok()) {
@@ -204,9 +306,10 @@ libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
return doc_ids_to_delete.size();
}
-std::vector<DocumentProto> InMemoryIcingSearchEngine::Search(
- const SearchSpecProto &search_spec) const {
- std::vector<DocumentId> matched_doc_ids = InternalSearch(search_spec);
+libtextclassifier3::StatusOr<std::vector<DocumentProto>>
+InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const {
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids,
+ InternalSearch(search_spec));
std::vector<DocumentProto> result;
result.reserve(matched_doc_ids.size());
for (DocumentId doc_id : matched_doc_ids) {
@@ -229,12 +332,16 @@ libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet(
" is not found by InMemoryIcingSearchEngine::InternalGet."));
}
-std::vector<DocumentId> InMemoryIcingSearchEngine::InternalSearch(
+libtextclassifier3::StatusOr<std::vector<DocumentId>>
+InMemoryIcingSearchEngine::InternalSearch(
const SearchSpecProto &search_spec) const {
std::vector<DocumentId> matched_doc_ids;
for (DocumentId doc_id : existing_doc_ids_) {
- if (DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
- search_spec.term_match_type())) {
+ ICING_ASSIGN_OR_RETURN(
+ bool match,
+ DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
+ search_spec.term_match_type()));
+ if (match) {
matched_doc_ids.push_back(doc_id);
}
}