aboutsummaryrefslogtreecommitdiff
path: root/icing/monkey_test/in-memory-icing-search-engine.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/monkey_test/in-memory-icing-search-engine.cc')
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.cc133
1 files changed, 120 insertions, 13 deletions
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc
index 405a7b0..7baa06e 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.cc
+++ b/icing/monkey_test/in-memory-icing-search-engine.cc
@@ -14,15 +14,27 @@
#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include <algorithm>
#include <cstdint>
+#include <memory>
+#include <random>
+#include <string>
#include <string_view>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -38,9 +50,80 @@ bool IsPrefix(std::string_view s1, std::string_view s2) {
return s1 == s2.substr(0, s1.length());
}
-bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
- const std::string &query,
- TermMatchType::Code term_match_type) {
+} // namespace
+
+libtextclassifier3::StatusOr<const PropertyConfigProto *>
+InMemoryIcingSearchEngine::GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const {
+ auto schema_iter = property_config_map_.find(schema_type);
+ if (schema_iter == property_config_map_.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Schema type: ", schema_type, " is not found."));
+ }
+ auto property_iter = schema_iter->second.find(property_name);
+ if (property_iter == schema_iter->second.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Property: ", property_name, " is not found."));
+ }
+ return &property_iter->second;
+}
+
+libtextclassifier3::StatusOr<TermMatchType::Code>
+InMemoryIcingSearchEngine::GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const {
+ bool in_indexable_properties_list = false;
+ bool all_indexable_from_top = true;
+
+ std::vector<std::string_view> properties_in_path =
+ absl_ports::StrSplit(section.path, ".");
+ if (properties_in_path.empty()) {
+ return absl_ports::InvalidArgumentError("Got empty path.");
+ }
+ std::string curr_schema_type = schema_type;
+ for (int i = 0; i < properties_in_path.size(); ++i) {
+ ICING_ASSIGN_OR_RETURN(
+ const PropertyConfigProto *prop,
+ GetPropertyConfig(curr_schema_type,
+ std::string(properties_in_path[i])));
+ if (prop->data_type() == PropertyConfigProto::DataType::STRING) {
+ return prop->string_indexing_config().term_match_type();
+ }
+
+ if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+
+ bool old_all_indexable_from_top = all_indexable_from_top;
+ all_indexable_from_top &=
+ prop->document_indexing_config().index_nested_properties();
+ if (!all_indexable_from_top && !in_indexable_properties_list) {
+ // Only try to update in_indexable_properties_list if this is the first
+ // level with index_nested_properties=false.
+ if (old_all_indexable_from_top) {
+ auto &indexable_properties =
+ prop->document_indexing_config().indexable_nested_properties_list();
+ std::string relative_path =
+ absl_ports::StrCatPieces(std::vector<std::string_view>(
+ properties_in_path.begin() + i + 1, properties_in_path.end()));
+ in_indexable_properties_list =
+ std::find(indexable_properties.begin(), indexable_properties.end(),
+ relative_path) != indexable_properties.end();
+ }
+ // Check in_indexable_properties_list again.
+ if (!in_indexable_properties_list) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+ }
+ curr_schema_type = prop->document_indexing_config().GetTypeName();
+ }
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+}
+
+libtextclassifier3::StatusOr<bool>
+InMemoryIcingSearchEngine::DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const {
std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":");
std::string_view query_term;
std::string_view section_restrict;
@@ -54,8 +137,15 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
if (!section_restrict.empty() && section.path != section_restrict) {
continue;
}
+ ICING_ASSIGN_OR_RETURN(
+ TermMatchType::Code section_term_match_type,
+ GetTermMatchType(document.document.schema(), section));
+ if (section_term_match_type == TermMatchType::UNKNOWN) {
+ // Skip non-indexable property.
+ continue;
+ }
for (const std::string &token : section.token_sequence) {
- if (section.term_match_type == TermMatchType::EXACT_ONLY ||
+ if (section_term_match_type == TermMatchType::EXACT_ONLY ||
term_match_type == TermMatchType::EXACT_ONLY) {
if (token == query_term) {
return true;
@@ -68,7 +158,18 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
return false;
}
-} // namespace
+void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) {
+ schema_ = std::make_unique<SchemaProto>(std::move(schema));
+ property_config_map_.clear();
+ for (const SchemaTypeConfigProto &type_config : schema_->types()) {
+ auto &curr_property_map = property_config_map_[type_config.schema_type()];
+ for (const PropertyConfigProto &property_config :
+ type_config.properties()) {
+ curr_property_map.insert(
+ {property_config.property_name(), property_config});
+ }
+ }
+}
InMemoryIcingSearchEngine::PickDocumentResult
InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
@@ -121,7 +222,7 @@ InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) {
// Delete the old one if existing.
- Delete(document.document.namespace_(), document.document.uri());
+ Delete(document.document.namespace_(), document.document.uri()).IgnoreError();
existing_doc_ids_.push_back(documents_.size());
namespace_uri_docid_map[document.document.namespace_()]
[document.document.uri()] = documents_.size();
@@ -192,7 +293,8 @@ InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) {
libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
const SearchSpecProto &search_spec) {
- std::vector<DocumentId> doc_ids_to_delete = InternalSearch(search_spec);
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete,
+ InternalSearch(search_spec));
for (DocumentId doc_id : doc_ids_to_delete) {
const DocumentProto &document = documents_[doc_id].document;
if (!Delete(document.namespace_(), document.uri()).ok()) {
@@ -204,9 +306,10 @@ libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
return doc_ids_to_delete.size();
}
-std::vector<DocumentProto> InMemoryIcingSearchEngine::Search(
- const SearchSpecProto &search_spec) const {
- std::vector<DocumentId> matched_doc_ids = InternalSearch(search_spec);
+libtextclassifier3::StatusOr<std::vector<DocumentProto>>
+InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const {
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids,
+ InternalSearch(search_spec));
std::vector<DocumentProto> result;
result.reserve(matched_doc_ids.size());
for (DocumentId doc_id : matched_doc_ids) {
@@ -229,12 +332,16 @@ libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet(
" is not found by InMemoryIcingSearchEngine::InternalGet."));
}
-std::vector<DocumentId> InMemoryIcingSearchEngine::InternalSearch(
+libtextclassifier3::StatusOr<std::vector<DocumentId>>
+InMemoryIcingSearchEngine::InternalSearch(
const SearchSpecProto &search_spec) const {
std::vector<DocumentId> matched_doc_ids;
for (DocumentId doc_id : existing_doc_ids_) {
- if (DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
- search_spec.term_match_type())) {
+ ICING_ASSIGN_OR_RETURN(
+ bool match,
+ DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
+ search_spec.term_match_type()));
+ if (match) {
matched_doc_ids.push_back(doc_id);
}
}