// Copyright (C) 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_ #define ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_ #include #include #include #include #include #include #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/index/index.h" #include "icing/index/iterator/doc-hit-info-iterator-filter.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/index/numeric/numeric-index.h" #include "icing/query/advanced_query_parser/abstract-syntax-tree.h" #include "icing/query/advanced_query_parser/function.h" #include "icing/query/advanced_query_parser/pending-value.h" #include "icing/query/query-features.h" #include "icing/query/query-results.h" #include "icing/schema/schema-store.h" #include "icing/store/document-store.h" #include "icing/tokenization/tokenizer.h" #include "icing/transform/normalizer.h" namespace icing { namespace lib { // The Visitor used to create the DocHitInfoIterator tree from the AST output by // the parser. class QueryVisitor : public AbstractSyntaxTreeVisitor { public: explicit QueryVisitor( Index* index, const NumericIndex* numeric_index, const DocumentStore* document_store, const SchemaStore* schema_store, const Normalizer* normalizer, const Tokenizer* tokenizer, DocHitInfoIteratorFilter::Options filter_options, TermMatchType::Code match_type, bool needs_term_frequency_info) : QueryVisitor(index, numeric_index, document_store, schema_store, normalizer, tokenizer, filter_options, match_type, needs_term_frequency_info, PendingPropertyRestricts(), /*processing_not=*/false) {} void VisitFunctionName(const FunctionNameNode* node) override; void VisitString(const StringNode* node) override; void VisitText(const TextNode* node) override; void VisitMember(const MemberNode* node) override; void VisitFunction(const FunctionNode* node) override; void VisitUnaryOperator(const UnaryOperatorNode* node) override; void VisitNaryOperator(const NaryOperatorNode* node) override; // RETURNS: // - the QueryResults reflecting the AST that was visited // - INVALID_ARGUMENT if the AST does not conform to supported expressions // - NOT_FOUND if the AST refers to a property that does not exist libtextclassifier3::StatusOr ConsumeResults() &&; private: // An internal class to help manage property restricts being applied at // different levels. class PendingPropertyRestricts { public: // Add another set of property restricts. Elements of new_restricts that are // not present in active_property_rest void AddValidRestricts(std::set new_restricts); // Pops the most recently added set of property restricts. void PopRestricts() { if (has_active_property_restricts()) { pending_property_restricts_.pop_back(); } } bool has_active_property_restricts() const { return !pending_property_restricts_.empty(); } // The set of all property restrictions that are currently being applied. const std::set& active_property_restricts() const { return pending_property_restricts_.back(); } private: std::vector> pending_property_restricts_; }; explicit QueryVisitor( Index* index, const NumericIndex* numeric_index, const DocumentStore* document_store, const SchemaStore* schema_store, const Normalizer* normalizer, const Tokenizer* tokenizer, DocHitInfoIteratorFilter::Options filter_options, TermMatchType::Code match_type, bool needs_term_frequency_info, PendingPropertyRestricts pending_property_restricts, bool processing_not) : index_(*index), numeric_index_(*numeric_index), document_store_(*document_store), schema_store_(*schema_store), normalizer_(*normalizer), tokenizer_(*tokenizer), filter_options_(std::move(filter_options)), match_type_(match_type), needs_term_frequency_info_(needs_term_frequency_info), pending_property_restricts_(std::move(pending_property_restricts)), processing_not_(processing_not) { RegisterFunctions(); } bool has_pending_error() const { return !pending_error_.ok(); } // Creates a DocHitInfoIterator reflecting the provided term. Also populates, // property_query_terms_map_ and query_term_iterators_ as appropriate. // Returns: // - On success, a DocHitInfoIterator for the provided term // - INVALID_ARGUMENT if unable to create an iterator for the term. libtextclassifier3::StatusOr> CreateTermIterator(const std::string& term); // Processes the PendingValue at the top of pending_values_, parses it into a // int64_t and pops the top. // Returns: // - On success, the int value stored in the text at the top // - INVALID_ARGUMENT if pending_values_ is empty, doesn't hold a text or // can't be parsed as an int. libtextclassifier3::StatusOr PopPendingIntValue(); // Processes the PendingValue at the top of pending_values_ and pops the top. // Returns: // - On success, the string value stored in the text at the top // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a string. libtextclassifier3::StatusOr PopPendingStringValue(); // Processes the PendingValue at the top of pending_values_ and pops the top. // Returns: // - On success, the string value stored in the text at the top // - INVALID_ARGUMENT if pending_values_ is empty or doesn't hold a text. libtextclassifier3::StatusOr PopPendingTextValue(); // Processes the PendingValue at the top of pending_values_ and pops the top. // Returns: // - On success, a DocHitInfoIterator representing for the term at the top // - INVALID_ARGUMENT if pending_values_ is empty or if unable to create an // iterator for the term. libtextclassifier3::StatusOr> PopPendingIterator(); // Processes all PendingValues at the top of pending_values_ until the first // placeholder is encounter. // Returns: // - On success, a vector containing all DocHitInfoIterators representing // the values at the top of pending_values_ // - INVALID_ARGUMENT if pending_values_is empty or if unable to create an // iterator for any of the terms at the top of pending_values_ libtextclassifier3::StatusOr>> PopAllPendingIterators(); // Processes the NumericComparator represented by node. This must be called // *after* this node's children have been visited. The PendingValues added by // this node's children will be consumed by this function and the PendingValue // for this node will be returned. // Returns: // - On success, then PendingValue representing this node and it's children. // - INVALID_ARGUMENT if unable to retrieve string value or int value // - NOT_FOUND if there is no entry in the numeric index for the property libtextclassifier3::StatusOr ProcessNumericComparator( const NaryOperatorNode* node); // Processes the AND and OR operators represented by the node. This must be // called *after* this node's children have been visited. The PendingValues // added by this node's children will be consumed by this function and the // PendingValue for this node will be returned. // Returns: // - On success, then PendingValue representing this node and it's children. // - INVALID_ARGUMENT if unable to retrieve iterators for any of this node's // children. libtextclassifier3::StatusOr ProcessAndOperator( const NaryOperatorNode* node); // Processes the OR operator represented by the node. This must be called // *after* this node's children have been visited. The PendingValues added by // this node's children will be consumed by this function and the PendingValue // for this node will be returned. // Returns: // - On success, then PendingValue representing this node and it's children. // - INVALID_ARGUMENT if unable to retrieve iterators for any of this node's // children. libtextclassifier3::StatusOr ProcessOrOperator( const NaryOperatorNode* node); // Populates registered_functions with the currently supported set of // functions. void RegisterFunctions(); // Implementation of `search` custom function in the query language. // Returns: // - a PendingValue holding the DocHitInfoIterator reflecting the query // provided to SearchFunction // - any errors returned by Lexer::ExtractTokens, Parser::ConsumeQuery or // QueryVisitor::ConsumeResults. libtextclassifier3::StatusOr SearchFunction( std::vector&& args); // Handles a NaryOperatorNode where the operator is HAS (':') and pushes an // iterator with the proper section filter applied. If the current property // restriction represented by pending_property_restricts and the first child // of this node is unsatisfiable (ex. `prop1:(prop2:foo)`), then a NONE // iterator is returned immediately and subtree represented by the second // child is not traversed. // // Returns: // - OK on success // - INVALID_ARGUMENT node does not have exactly two children or the two // children cannot be resolved to a MEMBER or an iterator respectively. libtextclassifier3::Status ProcessHasOperator(const NaryOperatorNode* node); std::stack pending_values_; libtextclassifier3::Status pending_error_; // A map from function name to Function instance. std::unordered_map registered_functions_; SectionRestrictQueryTermsMap property_query_terms_map_; QueryTermIteratorsMap query_term_iterators_; // Set of features invoked in the query. std::unordered_set features_; Index& index_; // Does not own! const NumericIndex& numeric_index_; // Does not own! const DocumentStore& document_store_; // Does not own! const SchemaStore& schema_store_; // Does not own! const Normalizer& normalizer_; // Does not own! const Tokenizer& tokenizer_; // Does not own! DocHitInfoIteratorFilter::Options filter_options_; TermMatchType::Code match_type_; // Whether or not term_frequency information is needed. This affects: // - how DocHitInfoIteratorTerms are constructed // - whether the QueryTermIteratorsMap is populated in the QueryResults. bool needs_term_frequency_info_; // The stack of property restricts currently being processed by the visitor. PendingPropertyRestricts pending_property_restricts_; bool processing_not_; }; } // namespace lib } // namespace icing #endif // ICING_QUERY_ADVANCED_QUERY_PARSER_QUERY_VISITOR_H_