aboutsummaryrefslogtreecommitdiff
path: root/icing/query/advanced_query_parser/lexer_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'icing/query/advanced_query_parser/lexer_test.cc')
-rw-r--r--icing/query/advanced_query_parser/lexer_test.cc82
1 files changed, 67 insertions, 15 deletions
diff --git a/icing/query/advanced_query_parser/lexer_test.cc b/icing/query/advanced_query_parser/lexer_test.cc
index 86284fb..c6d215c 100644
--- a/icing/query/advanced_query_parser/lexer_test.cc
+++ b/icing/query/advanced_query_parser/lexer_test.cc
@@ -73,22 +73,26 @@ TEST(LexerTest, PrefixQuery) {
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("foo*", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
lexer = std::make_unique<Lexer>("fooAND*", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("fooAND*", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("fooAND", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
lexer = std::make_unique<Lexer>("*ORfoo", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("*ORfoo", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("", Lexer::TokenType::STAR),
+ EqualsLexerToken("ORfoo", Lexer::TokenType::TEXT)));
lexer = std::make_unique<Lexer>("fooANDbar*", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
- EXPECT_THAT(tokens, ElementsAre(EqualsLexerToken("fooANDbar*",
- Lexer::TokenType::TEXT)));
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsLexerToken("fooANDbar", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR)));
}
TEST(LexerTest, SimpleStringQuery) {
@@ -296,7 +300,8 @@ TEST(LexerTest, ComplexQuery) {
EqualsLexerToken("sender", Lexer::TokenType::TEXT),
EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
EqualsLexerToken(Lexer::TokenType::LPAREN),
- EqualsLexerToken("foo*", Lexer::TokenType::TEXT),
+ EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR),
EqualsLexerToken(Lexer::TokenType::AND),
EqualsLexerToken("bar", Lexer::TokenType::TEXT),
EqualsLexerToken(Lexer::TokenType::OR),
@@ -376,14 +381,13 @@ TEST(LexerTest, CJKT) {
lexer = std::make_unique<Lexer>("ញុំ&&ដើរទៅ||ធ្វើការ-រាល់ថ្ងៃ",
Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
- EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("ញុំ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::AND),
- EqualsLexerToken("ដើរទៅ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::OR),
- EqualsLexerToken("ធ្វើការ", Lexer::TokenType::TEXT),
- EqualsLexerToken(Lexer::TokenType::MINUS),
- EqualsLexerToken("រាល់ថ្ងៃ", Lexer::TokenType::TEXT)));
+ EXPECT_THAT(
+ tokens,
+ ElementsAre(EqualsLexerToken("ញុំ", Lexer::TokenType::TEXT),
+ EqualsLexerToken(Lexer::TokenType::AND),
+ EqualsLexerToken("ដើរទៅ", Lexer::TokenType::TEXT),
+ EqualsLexerToken(Lexer::TokenType::OR),
+ EqualsLexerToken("ធ្វើការ-រាល់ថ្ងៃ", Lexer::TokenType::TEXT)));
lexer = std::make_unique<Lexer>(
"나는"
@@ -477,7 +481,9 @@ TEST(LexerTest, ScoringArithmetic) {
lexer = std::make_unique<Lexer>("1+2*3/4", Lexer::Language::QUERY);
ICING_ASSERT_OK_AND_ASSIGN(tokens, lexer->ExtractTokens());
EXPECT_THAT(tokens,
- ElementsAre(EqualsLexerToken("1+2*3/4", Lexer::TokenType::TEXT)));
+ ElementsAre(EqualsLexerToken("1+2", Lexer::TokenType::TEXT),
+ EqualsLexerToken("", Lexer::TokenType::STAR),
+ EqualsLexerToken("3/4", Lexer::TokenType::TEXT)));
}
// Currently, in scoring language, the lexer will view these logic operators as
@@ -609,6 +615,52 @@ TEST(LexerTest, ComplexScoring) {
EqualsLexerToken(Lexer::TokenType::RPAREN)));
}
+// foo:bar:baz is considered an invalid query as proposed in
+// http://go/appsearch-advanced-query-impl-plan#bookmark=id.yoeyepokmbc5 ; this
+// ensures that the lexer consistently tokenizes colons independently.
+TEST(LexerTest, NoAmbiguousTokenizing) {
+ // This is an invalid query; the lexer doesn't treat `bar:baz` as one token.
+ std::unique_ptr<Lexer> lexer =
+ std::make_unique<Lexer>("foo:bar:baz", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> invalidQueryTokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(invalidQueryTokens,
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("bar", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("baz", Lexer::TokenType::TEXT)));
+
+ lexer = std::make_unique<Lexer>("foo:\"bar:baz\"", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> validQueryTokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(
+ validQueryTokens,
+ ElementsAre(EqualsLexerToken("foo", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("bar:baz", Lexer::TokenType::STRING)));
+}
+
+TEST(LexerTest, WhiteSpacesDoNotAffectColonTokenization) {
+ std::unique_ptr<Lexer> lexer =
+ std::make_unique<Lexer>("a:b c : d e: f g :h", Lexer::Language::QUERY);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Lexer::LexerToken> tokens,
+ lexer->ExtractTokens());
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsLexerToken("a", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("b", Lexer::TokenType::TEXT),
+ EqualsLexerToken("c", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("d", Lexer::TokenType::TEXT),
+ EqualsLexerToken("e", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("f", Lexer::TokenType::TEXT),
+ EqualsLexerToken("g", Lexer::TokenType::TEXT),
+ EqualsLexerToken(":", Lexer::TokenType::COMPARATOR),
+ EqualsLexerToken("h", Lexer::TokenType::TEXT)));
+}
+
TEST(LexerTest, QueryShouldRejectTokensBeyondLimit) {
std::string query;
for (int i = 0; i < Lexer::kMaxNumTokens + 1; ++i) {