diff options
Diffstat (limited to 'icing/monkey_test/in-memory-icing-search-engine.cc')
-rw-r--r-- | icing/monkey_test/in-memory-icing-search-engine.cc | 133 |
1 files changed, 120 insertions, 13 deletions
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc index 405a7b0..7baa06e 100644 --- a/icing/monkey_test/in-memory-icing-search-engine.cc +++ b/icing/monkey_test/in-memory-icing-search-engine.cc @@ -14,15 +14,27 @@ #include "icing/monkey_test/in-memory-icing-search-engine.h" +#include <algorithm> #include <cstdint> +#include <memory> +#include <random> +#include <string> #include <string_view> #include <unordered_set> +#include <utility> #include <vector> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/absl_ports/str_join.h" +#include "icing/monkey_test/monkey-tokenized-document.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/store/document-id.h" #include "icing/util/status-macros.h" namespace icing { @@ -38,9 +50,80 @@ bool IsPrefix(std::string_view s1, std::string_view s2) { return s1 == s2.substr(0, s1.length()); } -bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document, - const std::string &query, - TermMatchType::Code term_match_type) { +} // namespace + +libtextclassifier3::StatusOr<const PropertyConfigProto *> +InMemoryIcingSearchEngine::GetPropertyConfig( + const std::string &schema_type, const std::string &property_name) const { + auto schema_iter = property_config_map_.find(schema_type); + if (schema_iter == property_config_map_.end()) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Schema type: ", schema_type, " is not found.")); + } + auto property_iter = schema_iter->second.find(property_name); + if (property_iter == schema_iter->second.end()) { + return absl_ports::NotFoundError( + absl_ports::StrCat("Property: ", property_name, " is not found.")); + } + return &property_iter->second; +} + +libtextclassifier3::StatusOr<TermMatchType::Code> +InMemoryIcingSearchEngine::GetTermMatchType( + const std::string &schema_type, + const MonkeyTokenizedSection §ion) const { + bool in_indexable_properties_list = false; + bool all_indexable_from_top = true; + + std::vector<std::string_view> properties_in_path = + absl_ports::StrSplit(section.path, "."); + if (properties_in_path.empty()) { + return absl_ports::InvalidArgumentError("Got empty path."); + } + std::string curr_schema_type = schema_type; + for (int i = 0; i < properties_in_path.size(); ++i) { + ICING_ASSIGN_OR_RETURN( + const PropertyConfigProto *prop, + GetPropertyConfig(curr_schema_type, + std::string(properties_in_path[i]))); + if (prop->data_type() == PropertyConfigProto::DataType::STRING) { + return prop->string_indexing_config().term_match_type(); + } + + if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) { + return TermMatchType::Code::TermMatchType_Code_UNKNOWN; + } + + bool old_all_indexable_from_top = all_indexable_from_top; + all_indexable_from_top &= + prop->document_indexing_config().index_nested_properties(); + if (!all_indexable_from_top && !in_indexable_properties_list) { + // Only try to update in_indexable_properties_list if this is the first + // level with index_nested_properties=false. + if (old_all_indexable_from_top) { + auto &indexable_properties = + prop->document_indexing_config().indexable_nested_properties_list(); + std::string relative_path = + absl_ports::StrCatPieces(std::vector<std::string_view>( + properties_in_path.begin() + i + 1, properties_in_path.end())); + in_indexable_properties_list = + std::find(indexable_properties.begin(), indexable_properties.end(), + relative_path) != indexable_properties.end(); + } + // Check in_indexable_properties_list again. + if (!in_indexable_properties_list) { + return TermMatchType::Code::TermMatchType_Code_UNKNOWN; + } + } + curr_schema_type = prop->document_indexing_config().GetTypeName(); + } + return TermMatchType::Code::TermMatchType_Code_UNKNOWN; +} + +libtextclassifier3::StatusOr<bool> +InMemoryIcingSearchEngine::DoesDocumentMatchQuery( + const MonkeyTokenizedDocument &document, const std::string &query, + TermMatchType::Code term_match_type) const { std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":"); std::string_view query_term; std::string_view section_restrict; @@ -54,8 +137,15 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document, if (!section_restrict.empty() && section.path != section_restrict) { continue; } + ICING_ASSIGN_OR_RETURN( + TermMatchType::Code section_term_match_type, + GetTermMatchType(document.document.schema(), section)); + if (section_term_match_type == TermMatchType::UNKNOWN) { + // Skip non-indexable property. + continue; + } for (const std::string &token : section.token_sequence) { - if (section.term_match_type == TermMatchType::EXACT_ONLY || + if (section_term_match_type == TermMatchType::EXACT_ONLY || term_match_type == TermMatchType::EXACT_ONLY) { if (token == query_term) { return true; @@ -68,7 +158,18 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document, return false; } -} // namespace +void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) { + schema_ = std::make_unique<SchemaProto>(std::move(schema)); + property_config_map_.clear(); + for (const SchemaTypeConfigProto &type_config : schema_->types()) { + auto &curr_property_map = property_config_map_[type_config.schema_type()]; + for (const PropertyConfigProto &property_config : + type_config.properties()) { + curr_property_map.insert( + {property_config.property_name(), property_config}); + } + } +} InMemoryIcingSearchEngine::PickDocumentResult InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all, @@ -121,7 +222,7 @@ InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all, void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) { // Delete the old one if existing. - Delete(document.document.namespace_(), document.document.uri()); + Delete(document.document.namespace_(), document.document.uri()).IgnoreError(); existing_doc_ids_.push_back(documents_.size()); namespace_uri_docid_map[document.document.namespace_()] [document.document.uri()] = documents_.size(); @@ -192,7 +293,8 @@ InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) { libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery( const SearchSpecProto &search_spec) { - std::vector<DocumentId> doc_ids_to_delete = InternalSearch(search_spec); + ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete, + InternalSearch(search_spec)); for (DocumentId doc_id : doc_ids_to_delete) { const DocumentProto &document = documents_[doc_id].document; if (!Delete(document.namespace_(), document.uri()).ok()) { @@ -204,9 +306,10 @@ libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery( return doc_ids_to_delete.size(); } -std::vector<DocumentProto> InMemoryIcingSearchEngine::Search( - const SearchSpecProto &search_spec) const { - std::vector<DocumentId> matched_doc_ids = InternalSearch(search_spec); +libtextclassifier3::StatusOr<std::vector<DocumentProto>> +InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const { + ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids, + InternalSearch(search_spec)); std::vector<DocumentProto> result; result.reserve(matched_doc_ids.size()); for (DocumentId doc_id : matched_doc_ids) { @@ -229,12 +332,16 @@ libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet( " is not found by InMemoryIcingSearchEngine::InternalGet.")); } -std::vector<DocumentId> InMemoryIcingSearchEngine::InternalSearch( +libtextclassifier3::StatusOr<std::vector<DocumentId>> +InMemoryIcingSearchEngine::InternalSearch( const SearchSpecProto &search_spec) const { std::vector<DocumentId> matched_doc_ids; for (DocumentId doc_id : existing_doc_ids_) { - if (DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(), - search_spec.term_match_type())) { + ICING_ASSIGN_OR_RETURN( + bool match, + DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(), + search_spec.term_match_type())); + if (match) { matched_doc_ids.push_back(doc_id); } } |