diff options
Diffstat (limited to 'icing/query/query-processor_test.cc')
-rw-r--r-- | icing/query/query-processor_test.cc | 494 |
1 files changed, 372 insertions, 122 deletions
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index d1cce87..d8b987a 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -213,8 +213,10 @@ TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) { SearchSpecProto search_spec; search_spec.set_query("()"); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); // Descending order of valid DocumentIds EXPECT_THAT(GetDocumentIds(results.root_iterator.get()), @@ -264,8 +266,10 @@ TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) { SearchSpecProto search_spec; search_spec.set_query(""); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); // Descending order of valid DocumentIds EXPECT_THAT(GetDocumentIds(results.root_iterator.get()), @@ -304,8 +308,7 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -325,8 +328,10 @@ TEST_F(QueryProcessorTest, QueryTermNormalized) { search_spec.set_query("hElLo WORLD"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -380,8 +385,7 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::PREFIX; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -398,8 +402,80 @@ TEST_F(QueryProcessorTest, OneTermPrefixMatch) { search_spec.set_query("he"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); + + std::vector<TermMatchInfo> matched_terms_stats; + ASSERT_THAT(results.root_iterator->Advance(), IsOk()); + EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id); + EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(), + section_id_mask); + results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats); + ASSERT_THAT(matched_terms_stats, SizeIs(1)); // 1 term + EXPECT_EQ(matched_terms_stats.at(0).term, "he"); + EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask); + EXPECT_THAT(matched_terms_stats.at(0).term_frequencies, + ElementsAreArray(term_frequencies)); + + EXPECT_THAT(results.query_terms, SizeIs(1)); + EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he")); + EXPECT_THAT(results.query_term_iterators, SizeIs(1)); +} + +TEST_F(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) { + // Create the schema and document store + SchemaProto schema = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email")) + .Build(); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(create_result.document_store); + + // These documents don't actually match to the tokens in the index. We're + // inserting the documents to get the appropriate number of documents and + // namespaces populated. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace1", "1") + .SetSchema("email") + .Build())); + + // Populate the index + SectionId section_id = kMaxSectionId; + SectionIdMask section_id_mask = UINT64_C(1) << section_id; + TermMatchType::Code term_match_type = TermMatchType::PREFIX; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{}; + term_frequencies[kMaxSectionId] = 1; + + EXPECT_THAT( + AddTokenToIndex(document_id, section_id, term_match_type, "hello"), + IsOk()); + + // Perform query + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QueryProcessor> query_processor, + QueryProcessor::Create(index_.get(), language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get())); + + SearchSpecProto search_spec; + search_spec.set_query("he"); + search_spec.set_term_match_type(term_match_type); + + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -448,8 +524,7 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -466,8 +541,10 @@ TEST_F(QueryProcessorTest, OneTermExactMatch) { search_spec.set_query("hello"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -516,8 +593,7 @@ TEST_F(QueryProcessorTest, AndSameTermExactMatch) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -534,8 +610,10 @@ TEST_F(QueryProcessorTest, AndSameTermExactMatch) { search_spec.set_query("hello hello"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -586,8 +664,7 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -607,8 +684,10 @@ TEST_F(QueryProcessorTest, AndTwoTermExactMatch) { search_spec.set_query("hello world"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -661,8 +740,7 @@ TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) { SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; TermMatchType::Code term_match_type = TermMatchType::PREFIX; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT( AddTokenToIndex(document_id, section_id, term_match_type, "hello"), @@ -679,8 +757,10 @@ TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) { search_spec.set_query("he he"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); std::vector<TermMatchInfo> matched_terms_stats; ASSERT_THAT(results.root_iterator->Advance(), IsOk()); @@ -730,8 +810,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; TermMatchType::Code term_match_type = TermMatchType::PREFIX; EXPECT_THAT( @@ -752,8 +831,10 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) { search_spec.set_query("he wo"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds std::vector<TermMatchInfo> matched_terms_stats; @@ -806,8 +887,7 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; TermMatchType::Code term_match_type = TermMatchType::PREFIX; EXPECT_THAT(AddTokenToIndex(document_id, section_id, @@ -828,8 +908,10 @@ TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) { search_spec.set_query("hello wo"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds std::vector<TermMatchInfo> matched_terms_stats; @@ -887,8 +969,7 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; EXPECT_THAT( @@ -909,8 +990,10 @@ TEST_F(QueryProcessorTest, OrTwoTermExactMatch) { search_spec.set_query("hello OR world"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds std::vector<TermMatchInfo> matched_terms_stats; @@ -976,8 +1059,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; TermMatchType::Code term_match_type = TermMatchType::PREFIX; EXPECT_THAT( @@ -998,8 +1080,10 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) { search_spec.set_query("he OR wo"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds std::vector<TermMatchInfo> matched_terms_stats; @@ -1064,8 +1148,7 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; EXPECT_THAT(AddTokenToIndex(document_id1, section_id, TermMatchType::EXACT_ONLY, "hello"), @@ -1085,8 +1168,10 @@ TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) { search_spec.set_query("hello OR wo"); search_spec.set_term_match_type(TermMatchType::PREFIX); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds std::vector<TermMatchInfo> matched_terms_stats; @@ -1150,8 +1235,7 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) { // Populate the index SectionId section_id = 0; SectionIdMask section_id_mask = 1U << section_id; - std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{1}; TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; // Document 1 has content "animal puppy dog" @@ -1163,6 +1247,7 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) { IsOk()); EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"), IsOk()); + index_->Merge(); // Document 2 has content "animal kitten cat" EXPECT_THAT( @@ -1188,8 +1273,10 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) { search_spec.set_query("puppy OR kitten dog"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Only Document 1 matches since it has puppy AND dog std::vector<TermMatchInfo> matched_terms_stats; @@ -1222,8 +1309,10 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) { search_spec.set_query("animal puppy OR kitten"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Both Document 1 and 2 match since Document 1 has animal AND puppy, and // Document 2 has animal AND kitten @@ -1275,8 +1364,10 @@ TEST_F(QueryProcessorTest, CombinedAndOrTerms) { search_spec.set_query("kitten foo OR bar OR cat"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Only Document 2 matches since it has both kitten and cat std::vector<TermMatchInfo> matched_terms_stats; @@ -1365,12 +1456,14 @@ TEST_F(QueryProcessorTest, OneGroup) { search_spec.set_query("puppy OR (kitten foo)"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id1); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -1441,14 +1534,16 @@ TEST_F(QueryProcessorTest, TwoGroups) { search_spec.set_query("(puppy dog) OR (kitten cat)"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo1(document_id1); - expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo1.UpdateSection(/*section_id=*/0); DocHitInfo expectedDocHitInfo2(document_id2); - expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo2.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -1519,12 +1614,14 @@ TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) { search_spec.set_query("puppy OR ((((kitten foo))))"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id1); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -1594,14 +1691,16 @@ TEST_F(QueryProcessorTest, OneLevelNestedGrouping) { search_spec.set_query("puppy OR (kitten(cat))"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo1(document_id1); - expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo1.UpdateSection(/*section_id=*/0); DocHitInfo expectedDocHitInfo2(document_id2); - expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo2.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -1663,8 +1762,10 @@ TEST_F(QueryProcessorTest, ExcludeTerm) { search_spec.set_query("-hello"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); // We don't know have the section mask to indicate what section "world" // came. It doesn't matter which section it was in since the query doesn't @@ -1727,8 +1828,10 @@ TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) { search_spec.set_query("-foo"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); // Descending order of valid DocumentIds EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), @@ -1799,8 +1902,10 @@ TEST_F(QueryProcessorTest, ExcludeAnd) { search_spec.set_query("-dog -cat"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // The query is interpreted as "exclude all documents that have animal, // and exclude all documents that have cat". Since both documents contain @@ -1815,8 +1920,10 @@ TEST_F(QueryProcessorTest, ExcludeAnd) { search_spec.set_query("-animal cat"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // The query is interpreted as "exclude all documents that have animal, // and include all documents that have cat". Since both documents contain @@ -1889,8 +1996,10 @@ TEST_F(QueryProcessorTest, ExcludeOr) { search_spec.set_query("-animal OR -cat"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // We don't have a section mask indicating which sections in this document // matched the query since it's not based on section-term matching. It's @@ -1906,24 +2015,141 @@ TEST_F(QueryProcessorTest, ExcludeOr) { search_spec.set_query("animal OR -cat"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo1(document_id1); - expectedDocHitInfo1.UpdateSection(/*section_id=*/0, - /*hit_term_frequency=*/1); + expectedDocHitInfo1.UpdateSection(/*section_id=*/0); DocHitInfo expectedDocHitInfo2(document_id2); - expectedDocHitInfo2.UpdateSection(/*section_id=*/0, - /*hit_term_frequency=*/1); + expectedDocHitInfo2.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1)); EXPECT_THAT(results.query_terms, SizeIs(1)); EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal")); - EXPECT_THAT(results.query_term_iterators, SizeIs(1)); } } +TEST_F(QueryProcessorTest, WithoutTermFrequency) { + // Create the schema and document store + SchemaProto schema = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email")) + .Build(); + + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(create_result.document_store); + + // These documents don't actually match to the tokens in the index. We're + // just inserting the documents so that the DocHitInfoIterators will see + // that the document exists and not filter out the DocumentId as deleted. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema("email") + .Build())); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema("email") + .Build())); + + // Populate the index + SectionId section_id = 0; + SectionIdMask section_id_mask = 1U << section_id; + TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; + + // Document 1 has content "animal puppy dog", which is added to the main + // index. + EXPECT_THAT( + AddTokenToIndex(document_id1, section_id, term_match_type, "animal"), + IsOk()); + EXPECT_THAT( + AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"), + IsOk()); + EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"), + IsOk()); + ASSERT_THAT(index_->Merge(), IsOk()); + + // Document 2 has content "animal kitten cat", which is added to the lite + // index. + EXPECT_THAT( + AddTokenToIndex(document_id2, section_id, term_match_type, "animal"), + IsOk()); + EXPECT_THAT( + AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"), + IsOk()); + EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"), + IsOk()); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QueryProcessor> query_processor, + QueryProcessor::Create(index_.get(), language_segmenter_.get(), + normalizer_.get(), document_store_.get(), + schema_store_.get())); + + // OR gets precedence over AND, this is parsed as (animal AND (puppy OR + // kitten)) + SearchSpecProto search_spec; + search_spec.set_query("animal puppy OR kitten"); + search_spec.set_term_match_type(term_match_type); + + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); + // Since need_hit_term_frequency is false, the expected term frequencies + // should all be 0. + Hit::TermFrequencyArray exp_term_frequencies{0}; + + // Descending order of valid DocumentIds + // The first Document to match (Document 2) matches on 'animal' AND 'kitten' + std::vector<TermMatchInfo> matched_terms_stats; + ASSERT_THAT(results.root_iterator->Advance(), IsOk()); + EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2); + EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(), + section_id_mask); + results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats); + ASSERT_THAT(matched_terms_stats, SizeIs(2)); // 2 terms + EXPECT_EQ(matched_terms_stats.at(0).term, "animal"); + EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask); + EXPECT_THAT(matched_terms_stats.at(0).term_frequencies, + ElementsAreArray(exp_term_frequencies)); + EXPECT_EQ(matched_terms_stats.at(1).term, "kitten"); + EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask); + EXPECT_THAT(matched_terms_stats.at(1).term_frequencies, + ElementsAreArray(exp_term_frequencies)); + + // The second Document to match (Document 1) matches on 'animal' AND 'puppy' + matched_terms_stats.clear(); + ASSERT_THAT(results.root_iterator->Advance(), IsOk()); + EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1); + EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(), + section_id_mask); + results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats); + ASSERT_THAT(matched_terms_stats, SizeIs(2)); // 2 terms + EXPECT_EQ(matched_terms_stats.at(0).term, "animal"); + EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask); + EXPECT_THAT(matched_terms_stats.at(0).term_frequencies, + ElementsAreArray(exp_term_frequencies)); + EXPECT_EQ(matched_terms_stats.at(1).term, "puppy"); + EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask); + EXPECT_THAT(matched_terms_stats.at(1).term_frequencies, + ElementsAreArray(exp_term_frequencies)); + + // This should be empty because ranking_strategy != RELEVANCE_SCORE + EXPECT_THAT(results.query_term_iterators, IsEmpty()); +} + TEST_F(QueryProcessorTest, DeletedFilter) { // Create the schema and document store SchemaProto schema = SchemaBuilder() @@ -1985,12 +2211,14 @@ TEST_F(QueryProcessorTest, DeletedFilter) { search_spec.set_query("animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id2); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2059,12 +2287,14 @@ TEST_F(QueryProcessorTest, NamespaceFilter) { search_spec.set_term_match_type(term_match_type); search_spec.add_namespace_filters("namespace1"); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id1); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2131,12 +2361,14 @@ TEST_F(QueryProcessorTest, SchemaTypeFilter) { search_spec.set_term_match_type(term_match_type); search_spec.add_schema_type_filters("email"); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id1); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2197,12 +2429,14 @@ TEST_F(QueryProcessorTest, SectionFilterForOneDocument) { search_spec.set_query("subject:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Descending order of valid DocumentIds DocHitInfo expectedDocHitInfo(document_id); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2289,15 +2523,17 @@ TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) { search_spec.set_query("foo:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Ordered by descending DocumentId, so message comes first since it was // inserted last DocHitInfo expectedDocHitInfo1(message_document_id); - expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo1.UpdateSection(/*section_id=*/0); DocHitInfo expectedDocHitInfo2(email_document_id); - expectedDocHitInfo2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/1); + expectedDocHitInfo2.UpdateSection(/*section_id=*/1); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2374,13 +2610,15 @@ TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) { search_spec.add_schema_type_filters("email"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Shouldn't include the message document since we're only looking at email // types DocHitInfo expectedDocHitInfo(email_document_id); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2459,13 +2697,15 @@ TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) { search_spec.set_query("foo:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Even though the section id is the same, we should be able to tell that it // doesn't match to the name of the section filter DocHitInfo expectedDocHitInfo(email_document_id); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); EXPECT_THAT(results.query_terms, SizeIs(1)); @@ -2520,8 +2760,10 @@ TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) { search_spec.set_query("nonexistent:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Even though the section id is the same, we should be able to tell that it // doesn't match to the name of the section filter @@ -2587,8 +2829,10 @@ TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) { search_spec.set_query("foo:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Even though the section id is the same, we should be able to tell that it // doesn't match to the name of the section filter @@ -2668,15 +2912,17 @@ TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) { search_spec.set_query("cat OR foo:animal"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE)); // Ordered by descending DocumentId, so message comes first since it was // inserted last DocHitInfo expectedDocHitInfo1(message_document_id); - expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo1.UpdateSection(/*section_id=*/0); DocHitInfo expectedDocHitInfo2(email_document_id); - expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo2.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2)); EXPECT_THAT(results.query_terms, SizeIs(2)); @@ -2735,11 +2981,13 @@ TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) { search_spec.set_query("hello"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); DocHitInfo expectedDocHitInfo(document_id); - expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1); + expectedDocHitInfo.UpdateSection(/*section_id=*/0); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), ElementsAre(expectedDocHitInfo)); } @@ -2794,8 +3042,10 @@ TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) { search_spec.set_query("hello"); search_spec.set_term_match_type(term_match_type); - ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results, - query_processor->ParseSearch(search_spec)); + ICING_ASSERT_OK_AND_ASSIGN( + QueryProcessor::QueryResults results, + query_processor->ParseSearch(search_spec, + ScoringSpecProto::RankingStrategy::NONE)); EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty()); } |