// // Copyright (C) 2018 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // include "actions/actions-entity-data.fbs"; include "annotator/model.fbs"; include "utils/codepoint-range.fbs"; include "utils/flatbuffers/flatbuffers.fbs"; include "utils/grammar/rules.fbs"; include "utils/intents/intent-config.fbs"; include "utils/normalization.fbs"; include "utils/resources.fbs"; include "utils/tokenizer.fbs"; include "utils/zlib/buffer.fbs"; file_identifier "TC3A"; // Prediction type for a multi-task model. namespace libtextclassifier3; enum PredictionType : int { UNSUPPORTED = 0, NEXT_MESSAGE_PREDICTION = 1, INTENT_TRIGGERING = 2, ENTITY_ANNOTATION = 3, } namespace libtextclassifier3; enum RankingOptionsSortType : int { SORT_TYPE_UNSPECIFIED = 0, // Rank results (or groups) by score, then type SORT_TYPE_SCORE = 1, // Rank results (or groups) by priority score, then score, then type SORT_TYPE_PRIORITY_SCORE = 2, } // Prediction metadata for an arbitrary task. namespace libtextclassifier3; table PredictionMetadata { prediction_type:PredictionType; task_spec:ActionSuggestionSpec; output_suggestions:int; output_suggestions_scores:int; output_suggestions_spans:int; } namespace libtextclassifier3.TensorflowLiteModelSpec_; table InputNameIndexEntry { key:string (key, shared); value:int; } // TensorFlow Lite model for suggesting actions. namespace libtextclassifier3; table TensorflowLiteModelSpec { // TensorFlow Lite model for suggesting actions. tflite_model:[ubyte] (force_align: 16); // Input specification. // (num messages,) int32 tensor, the user id per message. input_user_id:int = 0; // (num messages,) string tensor, each message of the conversation. input_context:int = 1; // int, the number of messages in the conversation. input_context_length:int = 2; // (num messages,) float tensor, the time difference in seconds of the // messages in the conversation. input_time_diffs:int = 3; // int, the number of smart replies to produce. input_num_suggestions:int = 4; reserved_7:int (deprecated); reserved_8:int (deprecated); reserved_9:int (deprecated); // Input port for hashed and embedded tokens, a (num messages, max tokens, // embedding size) float tensor specifying the embeddings of each token of // each message in the conversation. input_token_embeddings:int = -1; // Input port for the number of tokens per message. // (num messages) int32 tensor specifying the number of tokens in each message // in the conversation. input_num_tokens:int = -1; // Output specification. output_replies:int = 0; output_replies_scores:int = 1; output_sensitive_topic_score:int = 3; output_triggering_score:int = 4; output_actions_scores:int = 5; // Model setup. // When true, the inputs are resized to the concrete input sizes before // inference otherwise, it's assumed that the model has the correct input // shapes set. resize_inputs:bool = false; // Input port for the hashed, embedded and flattened/concatenated tokens. // A (max tokens, embedding_size) float tensor specifying the embeddings of // each token. input_flattened_token_embeddings:int = -1; // Generalized output specification that handles arbitrary number of // prediction tasks. prediction_metadata:[PredictionMetadata]; // Map of additional input tensor name to its index. input_name_index:[TensorflowLiteModelSpec_.InputNameIndexEntry]; // If greater than 0, pad or truncate the input_user_id and input_context // tensor to length of input_length_to_pad. input_length_to_pad:int = 0; } // Configuration for the tokenizer. namespace libtextclassifier3; table ActionsTokenizerOptions { type:TokenizationType = INTERNAL_TOKENIZER; // If true, white space tokens will be kept when using the icu tokenizer. icu_preserve_whitespace_tokens:bool = false; // Codepoint ranges that determine what role the different codepoints play // during tokenized. The ranges must not overlap. tokenization_codepoint_config:[TokenizationCodepointRange]; // A set of codepoint ranges to use in the mixed tokenization mode to identify // stretches of tokens to re-tokenize using the internal tokenizer. internal_tokenizer_codepoint_ranges:[CodepointRange]; // If true, tokens will be also split when the codepoint's script_id changes // as defined in TokenizationCodepointRange. tokenize_on_script_change:bool = false; } // Configuration for the feature processor. namespace libtextclassifier3; table ActionsTokenFeatureProcessorOptions { // Tokenizer options. tokenizer_options:ActionsTokenizerOptions; // Serialized TensorFlow Lite model with weights for the token embeddings. embedding_model:[ubyte] (force_align: 16); // Size of the embedding. embedding_size:int = -1; // Number of bits for quantization for embeddings. embedding_quantization_bits:int = 8; // Number of buckets used for hashing charactergrams. num_buckets:int = -1; // Orders of charactergrams to extract, e.g. 2 means character bigrams, 3 // character trigrams etc. chargram_orders:[int]; // Whether to extract the token case feature. extract_case_feature:bool; // If true, will use the unicode-aware functionality for extracting features. unicode_aware_features:bool; // Regexp features to extract. regexp_features:[string]; // Whether to remap digits to a single number. remap_digits:bool; // Whether to lowercase all tokens. lowercase_tokens:bool; // Maximum length of a word. max_token_length:int = 20; // The `max_num_tokens_per_message` and `min_num_tokens_per_message` are // applied when tokens are embedded per message. // If set and the number of tokens of a message is bigger than this limit, // tokens at the beginning of the message are dropped to fit the limit. max_num_tokens_per_message:int = -1; // If set, the tokens of each message will be padded to this fixed number of // tokens. min_num_tokens_per_message:int = -1; // If set and the total number of concatenated tokens is bigger than this // limit, tokens at the start of the conversation are dropped. max_num_total_tokens:int = -1; // If set and the total number of concatenaed tokens is smaller than this // limit, the conversation is padded with padding tokens. min_num_total_tokens:int = -1; // Id that is used as encoding of the padding token. padding_token_id:int = 0; // Id that is used as encoding of the start of message token. start_token_id:int = 1; // Id that is used as encoding of the end of message token. end_token_id:int = 2; } // N-Gram based linear regression model. namespace libtextclassifier3; table NGramLinearRegressionModel { // A flat list of all the hashed n-grams concatenated back to back. Elements // should only ever be accessed via the offset table below. hashed_ngram_tokens:[uint]; // Offsets to the start of the n-grams in hashed_ngram_tokens. The last // element in this array is the length of hashed_ngrams to make it easier to // compute n-gram lengths. ngram_start_offsets:[ushort]; // Weights of the n-grams. ngram_weights:[float]; // The default weight assigned to n-grams that weren't matched. default_token_weight:float; // Maximum n-gram length to consider when calculating the denominatior. // This should usually be the same as max_ngram_length but can diverge // if additional (longer) n-grams are added to a model as part of a minor // update. max_denom_ngram_length:int; // If non-zero, the order of the skip-gram to match. max_skips:int; // The threshold above which the model output is considered positive. threshold:float; // Model specific tokenizer options. // If not specified, will reuse the feature processor tokenizer. tokenizer_options:ActionsTokenizerOptions; } // TFLite based sensitive topic classifier model. namespace libtextclassifier3; table TFLiteSensitiveClassifierConfig { // Specification of the model. model_spec:TensorflowLiteModelSpec; // Triggering threshold, if a sensitive topic has a score higher than this // value, it triggers the classifier. threshold:float; } namespace libtextclassifier3; table TriggeringPreconditions { // Lower bound thresholds for the smart reply model prediction output. min_smart_reply_triggering_score:float; // Maximum sensitive score for which actions and smart replies are shown. max_sensitive_topic_score:float = 1; // Whether to suppress all model output when a conversation is classified as // sensitive. suppress_on_sensitive_topic:bool = true; // Thresholds on the model prediction input. // The minimal length of input to consider for prediction. min_input_length:int = 0; // The maximal length of input to consider for prediciton, -1 if unbounded. max_input_length:int = -1; // Minimal fraction of messages in the input conversation that need to match // a locale that the model can handle. min_locale_match_fraction:float = 0.75; handle_missing_locale_as_supported:bool = false; handle_unknown_locale_as_supported:bool = false; // Filter input with low-confidence triggers. suppress_on_low_confidence_input:bool = true; // Same as low_confidence_rules in ActionsModel. // NOTE: Only fill this when the TriggeringPreconditions are pushed separately // as a flag value (i.e. as overlay). low_confidence_rules:RulesModel; reserved_11:float (deprecated); reserved_12:float (deprecated); reserved_13:float (deprecated); // Smart reply thresholds. min_reply_score_threshold:float = 0; } // This proto handles model outputs that are concepts, such as emoji concept // suggestion models. Each concept maps to a list of candidates. One of // the candidates is chosen randomly as the final suggestion. namespace libtextclassifier3; table ActionConceptToSuggestion { concept_name:string (shared); candidates:[string]; } namespace libtextclassifier3; table ActionSuggestionSpec { // Type of the action suggestion. type:string (shared); // Text of a smart reply action. response_text:string (shared); // Score. score:float; // Additional entity information. serialized_entity_data:string (shared); // For ranking and internal conflict resolution. priority_score:float = 0; entity_data:ActionsEntityData; response_text_blocklist:[string]; // If provided, map the response as concept to one of the corresponding // candidates. concept_mappings:[ActionConceptToSuggestion]; } // Options to specify triggering behaviour per action class. namespace libtextclassifier3; table ActionTypeOptions { // The name of the predicted action. name:string (shared); // Triggering behaviour. // Whether the action class is considered in the model output or not. enabled:bool = true; // Minimal output score threshold. min_triggering_score:float = 0; // The action to trigger. action:ActionSuggestionSpec; } namespace libtextclassifier3.AnnotationActionsSpec_; table AnnotationMapping { // The annotation collection. annotation_collection:string (shared); // The action name to use. action:ActionSuggestionSpec; // Whether to use the score of the annotation as the action score. use_annotation_score:bool = true; // Minimum threshold for the annotation score for filtering. min_annotation_score:float; // If set, the text of the annotation will be used to set a field in the // action entity data. entity_field:FlatbufferFieldPath; // If set, normalization to apply to the annotation text. normalization_options:NormalizationOptions; } // Configuration for actions based on annotatations. namespace libtextclassifier3; table AnnotationActionsSpec { annotation_mapping:[AnnotationActionsSpec_.AnnotationMapping]; // Whether to deduplicate annotations by type and text prior to generating // actions. deduplicate_annotations:bool = true; // Annotation usecase to specify for text annotation. annotation_usecase:AnnotationUsecase = ANNOTATION_USECASE_SMART; // Maximum number of recent messages to consider from any person. // We consider at most `max_history_from_any_person` many recent messages if // they were received from different users or at most the maximum of this and // `max_history_from_last_person` if they are all from the same user. max_history_from_any_person:int = 1; // Maximum number of recent messages to consider from the last person. max_history_from_last_person:int = 1; // Whether to include messages from the local user. include_local_user_messages:bool = false; // Whether to only consider messages up to the last one sent by the local // user. only_until_last_sent:bool = true; // If true, annotator would populate serialized_entity_data in the results. is_serialized_entity_data_enabled:bool = true; } // Ranking options. namespace libtextclassifier3; table RankingOptions { // When true, actions suggestions are deduplicated by `type`, `response_text` // and associated annotations, keeping the higher scoring actions. deduplicate_suggestions:bool = true; // When true, actions are deduplicated by the span they are referring to. deduplicate_suggestions_by_span:bool = true; // Optional script to run for ranking and filtering the action suggestions. // The following global variables are available to the script: // * input: (optionally deduplicated) action suggestions, via the `actions` // global // * output: indices of the actions to keep in the provided order. lua_ranking_script:string (shared); compressed_lua_ranking_script:CompressedBuffer; // If true, suppresses smart replies if other smart actions are suggested. suppress_smart_replies_with_actions:bool = false; // If true, keep actions from the same entities together for ranking. group_by_annotations:bool = true; sort_type:RankingOptionsSortType = SORT_TYPE_SCORE; } // Entity data to set from capturing groups. namespace libtextclassifier3.RulesModel_.RuleActionSpec_; table RuleCapturingGroup { // The id of group. group_id:int; // If set, the text of the capturing group will be used to set a field // in the action entity data. entity_field:FlatbufferFieldPath; // If set, the capturing group will be used to create a text annotation // with the given name and type. annotation_type:string (shared); annotation_name:string (shared); // If set, the capturing group text will be used to create a text // reply. text_reply:ActionSuggestionSpec; // If set, normalization to apply to the capturing group text. normalization_options:NormalizationOptions; // If set to true, an existing annotator annotation will be used to // create the actions suggestions text annotation. use_annotation_match:bool; // If set, merge in fixed entity data for a match. entity_data:ActionsEntityData; } // The actions to produce upon triggering. namespace libtextclassifier3.RulesModel_; table RuleActionSpec { // The action. action:ActionSuggestionSpec; capturing_group:[RuleActionSpec_.RuleCapturingGroup]; } // List of regular expression matchers. namespace libtextclassifier3.RulesModel_; table RegexRule { // The regular expression pattern. pattern:string (shared); compressed_pattern:CompressedBuffer; actions:[RuleActionSpec]; // Patterns for post-checking the outputs. output_pattern:string (shared); compressed_output_pattern:CompressedBuffer; } // Action configuration. // Specifies an action rules match. namespace libtextclassifier3.RulesModel_.GrammarRules_; table RuleMatch { // The actions to produce as part of this match. // These are indices into the `actions` array below. action_id:[uint]; } // Configuration for actions based on context-free grammars. namespace libtextclassifier3.RulesModel_; table GrammarRules { // The tokenizer config. tokenizer_options:ActionsTokenizerOptions; // The grammar. rules:grammar.RulesSet; rule_match:[GrammarRules_.RuleMatch]; // The action specifications used by the rule matches. actions:[RuleActionSpec]; } // Rule based actions. namespace libtextclassifier3; table RulesModel { regex_rule:[RulesModel_.RegexRule]; // If true, will compile the regexes only on first use. lazy_regex_compilation:bool = true; grammar_rules:RulesModel_.GrammarRules; } namespace libtextclassifier3; table ActionsModel { // Comma-separated list of locales supported by the model as BCP 47 tags. locales:string (shared); // Version of the actions model. version:int; // A name for the model that can be used e.g. for logging. name:string (shared); tflite_model_spec:TensorflowLiteModelSpec; // Output classes. smart_reply_action_type:string (shared); action_type:[ActionTypeOptions]; // Triggering conditions of the model. preconditions:TriggeringPreconditions; // Default number of smart reply predictions. num_smart_replies:int = 3; // Length of message history to consider, -1 if unbounded. max_conversation_history_length:int = 1; // Configuration for mapping annotations to action suggestions. annotation_actions_spec:AnnotationActionsSpec; // Configuration for rules. rules:RulesModel; // Configuration for intent generation on Android. android_intent_options:IntentFactoryModel; // Model resources. resources:ResourcePool; // Schema data for handling entity data. actions_entity_data_schema:[ubyte]; // Action ranking options. ranking_options:RankingOptions; // Lua based actions. lua_actions_script:string (shared); compressed_lua_actions_script:CompressedBuffer; // Low confidence classifiers. low_confidence_rules:RulesModel; low_confidence_ngram_model:NGramLinearRegressionModel; // Feature processor options. feature_processor_options:ActionsTokenFeatureProcessorOptions; low_confidence_tflite_model:TFLiteSensitiveClassifierConfig; } root_type libtextclassifier3.ActionsModel;