aboutsummaryrefslogtreecommitdiff
path: root/icing/tokenization/tokenizer.h
diff options
context:
space:
mode:
Diffstat (limited to 'icing/tokenization/tokenizer.h')
-rw-r--r--icing/tokenization/tokenizer.h20
1 files changed, 12 insertions, 8 deletions
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index 24f8269..b4f0c6e 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -40,6 +40,14 @@ class Tokenizer {
public:
virtual ~Tokenizer() = default;
+ enum Type {
+ // Index tokenizers
+ PLAIN, // Used to tokenize plain text input
+
+ // Query tokenizers
+ RAW_QUERY, // Used to tokenize raw queries
+ };
+
// An iterator helping to get tokens.
// Example usage:
//
@@ -75,26 +83,22 @@ class Tokenizer {
// offset.
// Ex.
// auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
- // iterator.ResetToTokenStartingAfter(4);
+ // iterator.ResetToTokenAfter(4);
// // The first full token starting after position 4 (the 'b' in "bar") is
// // "baz".
// PrintToken(iterator.GetToken()); // prints "baz"
- virtual bool ResetToTokenStartingAfter(int32_t utf32_offset) {
- return false;
- }
+ virtual bool ResetToTokenAfter(int32_t offset) { return false; }
// Sets the tokenizer to point at the first token that *ends* *before*
// offset. Returns false if there are no valid tokens ending
// before offset.
// Ex.
// auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
- // iterator.ResetToTokenEndingBefore(4);
+ // iterator.ResetToTokenBefore(4);
// // The first full token ending before position 4 (the 'b' in "bar") is
// // "foo".
// PrintToken(iterator.GetToken()); // prints "foo"
- virtual bool ResetToTokenEndingBefore(int32_t utf32_offset) {
- return false;
- }
+ virtual bool ResetToTokenBefore(int32_t offset) { return false; }
virtual bool ResetToStart() { return false; }
};