diff options
author | Tim Barron <tjbarron@google.com> | 2023-03-31 16:48:52 -0700 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2023-03-31 16:48:52 -0700 |
commit | b59049b030cc330b6c8ae1d03ea4c1a34235ac9b (patch) | |
tree | 09bb4692739de390a5ad70e5dc4f9acb7830fa3e /icing | |
parent | f3155ae11285c16d8d9de56b1ec1a1e0def2cf62 (diff) | |
download | icing-b59049b030cc330b6c8ae1d03ea4c1a34235ac9b.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
Make int64_t the CreateUsageReport timestamp type
========================================================================
Create ResultAdjustmentInfo and refactor ResultStateV2
========================================================================
Wrap parent/child adjustment info to std::unique_ptr
========================================================================
Apply join child snippet
========================================================================
Apply join child projection
========================================================================
Add IntegerSectionIndexingHandlerTest
========================================================================
Delete Result Retriever this class is dead code.
========================================================================
Introduce a placeholder for the custom function hasPropertyDefined(member)
========================================================================
Fix libtextclassifier3::StatusOr
========================================================================
Performance improvements to SnippetRetriever.
========================================================================
Bug: 193244409
Bug: 256022027
Bug: 259744228
Bug: 268680462
Bug: 270102295
Bug: 271015984
Bug: 274627497
Change-Id: I6bad316b28bb289fa8e3f5b0982d6aaa9e0d135f
Diffstat (limited to 'icing')
43 files changed, 3123 insertions, 2915 deletions
diff --git a/icing/absl_ports/status_test.cc b/icing/absl_ports/status_test.cc new file mode 100644 index 0000000..1909302 --- /dev/null +++ b/icing/absl_ports/status_test.cc @@ -0,0 +1,53 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <utility> + +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "gtest/gtest.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/proto/document.pb.h" + +namespace icing { +namespace lib { + +TEST(StatusTest, StatusOrOfProtoConstructorTest) { + libtextclassifier3::StatusOr<DocumentProto> status_or = + absl_ports::InvalidArgumentError("test"); + libtextclassifier3::StatusOr<DocumentProto> new_status_or = status_or; +} + +TEST(StatusTest, StatusOrOfProtoMoveConstructorTest) { + libtextclassifier3::StatusOr<DocumentProto> status_or = + absl_ports::InvalidArgumentError("test"); + libtextclassifier3::StatusOr<DocumentProto> new_status_or = + std::move(status_or); +} + +TEST(StatusTest, StatusOrOfProtoAssignmentTest) { + libtextclassifier3::StatusOr<DocumentProto> status_or = + absl_ports::InvalidArgumentError("test"); + libtextclassifier3::StatusOr<DocumentProto> new_status_or; + new_status_or = status_or; +} + +TEST(StatusTest, StatusOrOfProtoMoveAssignmentTest) { + libtextclassifier3::StatusOr<DocumentProto> status_or = + absl_ports::InvalidArgumentError("test"); + libtextclassifier3::StatusOr<DocumentProto> new_status_or; + new_status_or = std::move(status_or); +} + +} // namespace lib +} // namespace icing diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index 7800e7e..b81390f 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -68,6 +68,7 @@ #include "icing/result/page-result.h" #include "icing/result/projection-tree.h" #include "icing/result/projector.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/schema/schema-store.h" #include "icing/schema/schema-util.h" @@ -1782,6 +1783,7 @@ SearchResultProto IcingSearchEngine::Search( const JoinSpecProto& join_spec = search_spec.join_spec(); std::unique_ptr<JoinChildrenFetcher> join_children_fetcher; + std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info; if (!join_spec.parent_property_expression().empty() && !join_spec.child_property_expression().empty()) { // Process child query @@ -1810,6 +1812,13 @@ SearchResultProto IcingSearchEngine::Search( } join_children_fetcher = std::make_unique<JoinChildrenFetcher>( std::move(join_children_fetcher_or).ValueOrDie()); + + // Assign child's ResultAdjustmentInfo. + child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>( + join_spec.nested_spec().search_spec(), + join_spec.nested_spec().scoring_spec(), + join_spec.nested_spec().result_spec(), + std::move(nested_query_scoring_results.query_terms)); } // Process parent query @@ -1836,6 +1845,11 @@ SearchResultProto IcingSearchEngine::Search( return result_proto; } + // Construct parent's result adjustment info. + auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>( + search_spec, scoring_spec, result_spec, + std::move(query_scoring_results.query_terms)); + std::unique_ptr<ScoredDocumentHitsRanker> ranker; if (join_children_fetcher != nullptr) { // Join 2 scored document hits @@ -1892,9 +1906,9 @@ SearchResultProto IcingSearchEngine::Search( libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> page_result_info_or = result_state_manager_->CacheAndRetrieveFirstPage( - std::move(ranker), std::move(query_scoring_results.query_terms), - search_spec, scoring_spec, result_spec, *document_store_, - *result_retriever); + std::move(ranker), std::move(parent_result_adjustment_info), + std::move(child_result_adjustment_info), result_spec, + *document_store_, *result_retriever); if (!page_result_info_or.ok()) { TransformStatus(page_result_info_or.status(), result_status); query_stats->set_document_retrieval_latency_ms( diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index 2c2fbeb..9e8686d 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -90,14 +90,6 @@ constexpr int kAvgDocumentSize = 300; // ASSUME: ~75% of the document's size comes from it's content. constexpr float kContentSizePct = 0.7; -// Average length of word in English is 4.7 characters. -constexpr int kAvgTokenLen = 5; -// Made up value. This results in a fairly reasonable language - the majority of -// generated words are 3-9 characters, ~3% of words are >=20 chars, and the -// longest ones are 27 chars, (roughly consistent with the longest, -// non-contrived English words -// https://en.wikipedia.org/wiki/Longest_word_in_English) -constexpr int kTokenStdDev = 7; constexpr int kLanguageSize = 1000; // Lite Index size required to fit 128k docs, each doc requires ~64 bytes of @@ -117,22 +109,6 @@ std::vector<std::string> CreateNamespaces(int num_namespaces) { return namespaces; } -// Creates a vector containing num_words randomly-generated words for use by -// documents. -template <typename Rand> -std::vector<std::string> CreateLanguages(int num_words, Rand* r) { - std::vector<std::string> language; - std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev); - while (--num_words >= 0) { - int word_length = 0; - while (word_length < 1) { - word_length = std::round(norm_dist(*r)); - } - language.push_back(RandomString(kAlNumAlphabet, word_length, r)); - } - return language; -} - SearchSpecProto CreateSearchSpec(const std::string& query, const std::vector<std::string>& namespaces, TermMatchType::Code match_type) { diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc index e953d71..f5b747b 100644 --- a/icing/icing-search-engine_search_test.cc +++ b/icing/icing-search-engine_search_test.cc @@ -213,7 +213,7 @@ ScoringSpecProto GetDefaultScoringSpec() { } UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); @@ -3866,6 +3866,260 @@ TEST_P(IcingSearchEngineSearchTest, JoinByQualifiedId) { EqualsSearchResultIgnoreStatsAndScores(expected_result3)); } +TEST_P(IcingSearchEngineSearchTest, JoinSnippet) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person") + .SetSchema("Person") + .AddStringProperty("firstName", "first") + .AddStringProperty("lastName", "last") + .AddStringProperty("emailAddress", "email@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + + DocumentProto email = + DocumentBuilder() + .SetKey("namespace", "email") + .SetSchema("Email") + .AddStringProperty("subject", "test subject") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_max_joined_child_count(100); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::MAX); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + // Child ResultSpec (with snippet) + ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec(); + nested_result_spec->mutable_snippet_spec()->set_max_window_utf32_length(64); + nested_result_spec->mutable_snippet_spec()->set_num_matches_per_property(1); + nested_result_spec->mutable_snippet_spec()->set_num_to_snippet(1); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + + // Parent ResultSpec (without snippet) + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + + SearchResultProto result = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken)); + + ASSERT_THAT(result.results(), SizeIs(1)); + // Check parent doc (person). + const DocumentProto& result_parent_document = result.results(0).document(); + EXPECT_THAT(result_parent_document, EqualsProto(person)); + EXPECT_THAT(result.results(0).snippet().entries(), IsEmpty()); + + // Check child doc (email). + ASSERT_THAT(result.results(0).joined_results(), SizeIs(1)); + const DocumentProto& result_child_document = + result.results(0).joined_results(0).document(); + const SnippetProto& result_child_snippet = + result.results(0).joined_results(0).snippet(); + EXPECT_THAT(result_child_document, EqualsProto(email)); + ASSERT_THAT(result_child_snippet.entries(), SizeIs(1)); + EXPECT_THAT(result_child_snippet.entries(0).property_name(), Eq("subject")); + std::string_view content = GetString( + &result_child_document, result_child_snippet.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet.entries(0)), + ElementsAre("test subject")); + EXPECT_THAT(GetMatches(content, result_child_snippet.entries(0)), + ElementsAre("test")); +} + +TEST_P(IcingSearchEngineSearchTest, JoinProjection) { + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("firstName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("lastName") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("personQualifiedId") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + DocumentProto person = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person") + .SetSchema("Person") + .AddStringProperty("firstName", "first") + .AddStringProperty("lastName", "last") + .AddStringProperty("emailAddress", "email@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + + DocumentProto email = + DocumentBuilder() + .SetKey("namespace", "email") + .SetSchema("Email") + .AddStringProperty("subject", "test subject") + .AddStringProperty("personQualifiedId", "pkg$db/namespace#person") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(person).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(email).status(), ProtoIsOk()); + + // Parent SearchSpec + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.set_query("firstName:first"); + search_spec.set_search_type(GetParam()); + + // JoinSpec + JoinSpecProto* join_spec = search_spec.mutable_join_spec(); + join_spec->set_max_joined_child_count(100); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("personQualifiedId"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::MAX); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::PREFIX); + nested_search_spec->set_query("subject:test"); + nested_search_spec->set_search_type(GetParam()); + // Child ResultSpec (with projection) + ResultSpecProto* nested_result_spec = nested_spec->mutable_result_spec(); + TypePropertyMask* type_property_mask = + nested_result_spec->add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("subject"); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + + // Parent ScoringSpec + ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); + + // Parent ResultSpec (with projection) + ResultSpecProto result_spec; + result_spec.set_num_per_page(1); + type_property_mask = result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Person"); + type_property_mask->add_paths("emailAddress"); + + SearchResultProto result = + icing.Search(search_spec, scoring_spec, result_spec); + EXPECT_THAT(result.status(), ProtoIsOk()); + EXPECT_THAT(result.next_page_token(), Eq(kInvalidNextPageToken)); + + ASSERT_THAT(result.results(), SizeIs(1)); + // Check parent doc (person): should contain only the "emailAddress" property. + DocumentProto projected_person_document = + DocumentBuilder() + .SetKey("pkg$db/namespace", "person") + .SetSchema("Person") + .AddStringProperty("emailAddress", "email@gmail.com") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(1) + .Build(); + EXPECT_THAT(result.results().at(0).document(), + EqualsProto(projected_person_document)); + + // Check child doc (email): should contain only the "subject" property. + ASSERT_THAT(result.results(0).joined_results(), SizeIs(1)); + DocumentProto projected_email_document = + DocumentBuilder() + .SetKey("namespace", "email") + .SetSchema("Email") + .AddStringProperty("subject", "test subject") + .SetCreationTimestampMs(kDefaultCreationTimestampMs) + .SetScore(3) + .Build(); + EXPECT_THAT(result.results(0).joined_results(0).document(), + EqualsProto(projected_email_document)); +} + TEST_F(IcingSearchEngineSearchTest, JoinWithAdvancedScoring) { SchemaProto schema = SchemaBuilder() diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index e0070e0..1340ebb 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -184,7 +184,7 @@ ScoringSpecProto GetDefaultScoringSpec() { } UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); diff --git a/icing/index/data-indexing-handler.h b/icing/index/data-indexing-handler.h index 0061b79..16a1796 100644 --- a/icing/index/data-indexing-handler.h +++ b/icing/index/data-indexing-handler.h @@ -50,9 +50,10 @@ class DataIndexingHandler { // nullptr. // /// Returns: - // - OK on success - // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the - // document_id of a previously indexed document in non recovery mode + // - OK on success. + // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less + // than or equal to the document_id of a previously indexed document in + // non recovery mode. // - Any other errors. It depends on each implementation. virtual libtextclassifier3::Status Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, diff --git a/icing/index/integer-section-indexing-handler.cc b/icing/index/integer-section-indexing-handler.cc index d201a1a..584f028 100644 --- a/icing/index/integer-section-indexing-handler.cc +++ b/icing/index/integer-section-indexing-handler.cc @@ -43,6 +43,11 @@ libtextclassifier3::Status IntegerSectionIndexingHandler::Handle( bool recovery_mode, PutDocumentStatsProto* put_document_stats) { // TODO(b/259744228): set integer indexing latency and other stats + if (!IsDocumentIdValid(document_id)) { + return absl_ports::InvalidArgumentError( + IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id)); + } + if (integer_index_.last_added_document_id() != kInvalidDocumentId && document_id <= integer_index_.last_added_document_id()) { if (recovery_mode) { diff --git a/icing/index/integer-section-indexing-handler.h b/icing/index/integer-section-indexing-handler.h index 42ce07e..0a501aa 100644 --- a/icing/index/integer-section-indexing-handler.h +++ b/icing/index/integer-section-indexing-handler.h @@ -48,9 +48,10 @@ class IntegerSectionIndexingHandler : public DataIndexingHandler { // all contents in tokenized_document.integer_sections. // // Returns: - // - OK on success - // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the - // document_id of a previously indexed document in non recovery mode. + // - OK on success. + // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less + // than or equal to the document_id of a previously indexed document in + // non recovery mode. // - Any NumericIndex<int64_t>::Editor errors. libtextclassifier3::Status Handle( const TokenizedDocument& tokenized_document, DocumentId document_id, diff --git a/icing/index/integer-section-indexing-handler_test.cc b/icing/index/integer-section-indexing-handler_test.cc new file mode 100644 index 0000000..71c6bd5 --- /dev/null +++ b/icing/index/integer-section-indexing-handler_test.cc @@ -0,0 +1,590 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/index/integer-section-indexing-handler.h" + +#include <limits> +#include <memory> +#include <string> +#include <string_view> +#include <utility> +#include <vector> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/index/hit/doc-hit-info.h" +#include "icing/index/iterator/doc-hit-info-iterator.h" +#include "icing/index/numeric/integer-index.h" +#include "icing/index/numeric/numeric-index.h" +#include "icing/portable/platform.h" +#include "icing/proto/document.pb.h" +#include "icing/proto/schema.pb.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-id.h" +#include "icing/store/document-store.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/fake-clock.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/tokenization/language-segmenter.h" +#include "icing/util/tokenized-document.h" +#include "unicode/uloc.h" + +namespace icing { +namespace lib { + +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::IsTrue; + +// Indexable properties (section) and section id. Section id is determined by +// the lexicographical order of indexable property paths. +// Schema type with indexable properties: FakeType +// Section id = 0: "body" +// Section id = 1: "timestamp" +// Section id = 2: "title" +static constexpr std::string_view kFakeType = "FakeType"; +static constexpr std::string_view kPropertyBody = "body"; +static constexpr std::string_view kPropertyTimestamp = "timestamp"; +static constexpr std::string_view kPropertyTitle = "title"; + +static constexpr SectionId kSectionIdTimestamp = 1; + +// Schema type with nested indexable properties: NestedType +// Section id = 0: "name" +// Section id = 1: "nested.body" +// Section id = 2: "nested.timestamp" +// Section id = 3: "nested.title" +// Section id = 4: "price" +static constexpr std::string_view kNestedType = "NestedType"; +static constexpr std::string_view kPropertyName = "name"; +static constexpr std::string_view kPropertyNestedDoc = "nested"; +static constexpr std::string_view kPropertyPrice = "price"; + +static constexpr SectionId kSectionIdNestedTimestamp = 2; +static constexpr SectionId kSectionIdPrice = 4; + +class IntegerSectionIndexingHandlerTest : public ::testing::Test { + protected: + void SetUp() override { + if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { + ICING_ASSERT_OK( + // File generated via icu_data_file rule in //icing/BUILD. + icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + base_dir_ = GetTestTempDir() + "/icing_test"; + ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()), + IsTrue()); + + integer_index_working_path_ = base_dir_ + "/integer_index"; + schema_store_dir_ = base_dir_ + "/schema_store"; + document_store_dir_ = base_dir_ + "/document_store"; + + ICING_ASSERT_OK_AND_ASSIGN( + integer_index_, + IntegerIndex::Create(filesystem_, integer_index_working_path_)); + + language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); + ICING_ASSERT_OK_AND_ASSIGN( + lang_segmenter_, + language_segmenter_factory::Create(std::move(segmenter_options))); + + ASSERT_THAT( + filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()), + IsTrue()); + ICING_ASSERT_OK_AND_ASSIGN( + schema_store_, + SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_)); + SchemaProto schema = + SchemaBuilder() + .AddType( + SchemaTypeConfigBuilder() + .SetType(kFakeType) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyTitle) + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyBody) + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyTimestamp) + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType( + SchemaTypeConfigBuilder() + .SetType(kNestedType) + .AddProperty( + PropertyConfigBuilder() + .SetName(kPropertyNestedDoc) + .SetDataTypeDocument( + kFakeType, /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyPrice) + .SetDataTypeInt64(NUMERIC_MATCH_RANGE) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName(kPropertyName) + .SetDataTypeString(TERM_MATCH_EXACT, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ICING_ASSERT_OK(schema_store_->SetSchema(schema)); + + ASSERT_TRUE( + filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult doc_store_create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + document_store_ = std::move(doc_store_create_result.document_store); + } + + void TearDown() override { + document_store_.reset(); + schema_store_.reset(); + lang_segmenter_.reset(); + integer_index_.reset(); + + filesystem_.DeleteDirectoryRecursively(base_dir_.c_str()); + } + + Filesystem filesystem_; + FakeClock fake_clock_; + std::string base_dir_; + std::string integer_index_working_path_; + std::string schema_store_dir_; + std::string document_store_dir_; + + std::unique_ptr<NumericIndex<int64_t>> integer_index_; + std::unique_ptr<LanguageSegmenter> lang_segmenter_; + std::unique_ptr<SchemaStore> schema_store_; + std::unique_ptr<DocumentStore> document_store_; +}; + +std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) { + std::vector<DocHitInfo> infos; + while (iterator->Advance().ok()) { + infos.push_back(iterator->doc_hit_info()); + } + return infos; +} + +TEST_F(IntegerSectionIndexingHandlerTest, CreationWithNullPointerShouldFail) { + EXPECT_THAT(IntegerSectionIndexingHandler::Create(/*clock=*/nullptr, + integer_index_.get()), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); + + EXPECT_THAT(IntegerSectionIndexingHandler::Create(&fake_clock_, + /*integer_index=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); +} + +TEST_F(IntegerSectionIndexingHandlerTest, HandleIntegerSection) { + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title") + .AddStringProperty(std::string(kPropertyBody), "body") + .AddInt64Property(std::string(kPropertyTimestamp), 123) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store_->Put(tokenized_document.document())); + + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId)); + // Handle document. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + EXPECT_THAT( + handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id)); + + // Query "timestamp". + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id, std::vector<SectionId>{kSectionIdTimestamp}))); +} + +TEST_F(IntegerSectionIndexingHandlerTest, HandleNestedIntegerSection) { + DocumentProto nested_document = + DocumentBuilder() + .SetKey("icing", "nested_type/1") + .SetSchema(std::string(kNestedType)) + .AddDocumentProperty( + std::string(kPropertyNestedDoc), + DocumentBuilder() + .SetKey("icing", "nested_fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), + "nested title") + .AddStringProperty(std::string(kPropertyBody), "nested body") + .AddInt64Property(std::string(kPropertyTimestamp), 123) + .Build()) + .AddInt64Property(std::string(kPropertyPrice), 456) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(nested_document))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store_->Put(tokenized_document.document())); + + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId)); + // Handle nested_document. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + EXPECT_THAT( + handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id)); + + // Query "nested.timestamp". + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + "nested.timestamp", /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT( + GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id, std::vector<SectionId>{kSectionIdNestedTimestamp}))); + + // Query "price". + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyPrice, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id, std::vector<SectionId>{kSectionIdPrice}))); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); +} + +TEST_F(IntegerSectionIndexingHandlerTest, HandleShouldSkipEmptyIntegerSection) { + // Create a FakeType document without "timestamp". + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title") + .AddStringProperty(std::string(kPropertyBody), "body") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store_->Put(tokenized_document.document())); + + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId)); + // Handle document. Index data should remain unchanged since there is no + // indexable integer, but last_added_document_id should be updated. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + EXPECT_THAT( + handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id)); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); +} + +TEST_F(IntegerSectionIndexingHandlerTest, + HandleInvalidDocumentIdShouldReturnInvalidArgumentError) { + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title") + .AddStringProperty(std::string(kPropertyBody), "body") + .AddInt64Property(std::string(kPropertyTimestamp), 123) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document))); + ICING_ASSERT_OK(document_store_->Put(tokenized_document.document())); + + static constexpr DocumentId kCurrentDocumentId = 3; + integer_index_->set_last_added_document_id(kCurrentDocumentId); + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId)); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + + // Handling document with kInvalidDocumentId should cause a failure, and both + // index data and last_added_document_id should remain unchanged. + EXPECT_THAT( + handler->Handle(tokenized_document, kInvalidDocumentId, + /*recovery_mode=*/false, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId)); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); + + // Recovery mode should get the same result. + EXPECT_THAT( + handler->Handle(tokenized_document, kInvalidDocumentId, + /*recovery_mode=*/true, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId)); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); +} + +TEST_F(IntegerSectionIndexingHandlerTest, + HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) { + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title") + .AddStringProperty(std::string(kPropertyBody), "body") + .AddInt64Property(std::string(kPropertyTimestamp), 123) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store_->Put(tokenized_document.document())); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + + // Handling document with document_id == last_added_document_id should cause a + // failure, and both index data and last_added_document_id should remain + // unchanged. + integer_index_->set_last_added_document_id(document_id); + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id)); + EXPECT_THAT( + handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, + /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id)); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); + + // Handling document with document_id < last_added_document_id should cause a + // failure, and both index data and last_added_document_id should remain + // unchanged. + integer_index_->set_last_added_document_id(document_id + 1); + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1)); + EXPECT_THAT( + handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false, + /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1)); + + // Query "timestamp". Should get empty result. + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), IsEmpty()); +} + +TEST_F(IntegerSectionIndexingHandlerTest, + HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) { + DocumentProto document1 = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title one") + .AddStringProperty(std::string(kPropertyBody), "body one") + .AddInt64Property(std::string(kPropertyTimestamp), 123) + .Build(); + DocumentProto document2 = + DocumentBuilder() + .SetKey("icing", "fake_type/2") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyTitle), "title two") + .AddStringProperty(std::string(kPropertyBody), "body two") + .AddInt64Property(std::string(kPropertyTimestamp), 456) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document1, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document1))); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document2, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + std::move(document2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(tokenized_document1.document())); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(tokenized_document2.document())); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<IntegerSectionIndexingHandler> handler, + IntegerSectionIndexingHandler::Create(&fake_clock_, + integer_index_.get())); + + // Handle document with document_id > last_added_document_id in recovery mode. + // The handler should index this document and update last_added_document_id. + EXPECT_THAT( + handler->Handle(tokenized_document1, document_id1, /*recovery_mode=*/true, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id1)); + + // Query "timestamp". + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<DocHitInfoIterator> itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id1, std::vector<SectionId>{kSectionIdTimestamp}))); + + // Handle document with document_id == last_added_document_id in recovery + // mode. We should not get any error, but the handler should ignore the + // document, so both index data and last_added_document_id should remain + // unchanged. + integer_index_->set_last_added_document_id(document_id2); + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2)); + EXPECT_THAT( + handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2)); + + // Query "timestamp". Should not get hits for document2. + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id1, std::vector<SectionId>{kSectionIdTimestamp}))); + + // Handle document with document_id < last_added_document_id in recovery mode. + // We should not get any error, but the handler should ignore the document, so + // both index data and last_added_document_id should remain unchanged. + integer_index_->set_last_added_document_id(document_id2 + 1); + ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1)); + EXPECT_THAT( + handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true, + /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1)); + + // Query "timestamp". Should not get hits for document2. + ICING_ASSERT_OK_AND_ASSIGN( + itr, + integer_index_->GetIterator( + kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(), + /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_, + *schema_store_)); + EXPECT_THAT(GetHits(std::move(itr)), + ElementsAre(EqualsDocHitInfo( + document_id1, std::vector<SectionId>{kSectionIdTimestamp}))); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.cc b/icing/join/qualified-id-joinable-property-indexing-handler.cc index 0b28444..150b23b 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler.cc +++ b/icing/join/qualified-id-joinable-property-indexing-handler.cc @@ -49,6 +49,11 @@ libtextclassifier3::Status QualifiedIdJoinablePropertyIndexingHandler::Handle( // TODO(b/263890397): set qualified id join index processing latency and other // stats. + if (!IsDocumentIdValid(document_id)) { + return absl_ports::InvalidArgumentError( + IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id)); + } + if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId && document_id <= qualified_id_join_index_.last_added_document_id()) { if (recovery_mode) { diff --git a/icing/join/qualified-id-joinable-property-indexing-handler.h b/icing/join/qualified-id-joinable-property-indexing-handler.h index 111526e..0265874 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler.h +++ b/icing/join/qualified-id-joinable-property-indexing-handler.h @@ -47,9 +47,10 @@ class QualifiedIdJoinablePropertyIndexingHandler : public DataIndexingHandler { // qualified id type joinable cache. // /// Returns: - // - OK on success - // - INVALID_ARGUMENT_ERROR if document_id is less than or equal to the - // document_id of a previously indexed document in non recovery mode. + // - OK on success. + // - INVALID_ARGUMENT_ERROR if document_id is invalid OR document_id is less + // than or equal to the document_id of a previously indexed document in + // non recovery mode. // - INTERNAL_ERROR if any other errors occur. // - Any QualifiedIdTypeJoinableIndex errors. libtextclassifier3::Status Handle( diff --git a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc b/icing/join/qualified-id-joinable-property-indexing-handler_test.cc index aa5624c..846520e 100644 --- a/icing/join/qualified-id-joinable-property-indexing-handler_test.cc +++ b/icing/join/qualified-id-joinable-property-indexing-handler_test.cc @@ -188,6 +188,8 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) { TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), document)); + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kInvalidDocumentId)); // Handle document. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, @@ -196,7 +198,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, HandleJoinableProperty) { EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), - StatusIs(libtextclassifier3::StatusCode::OK)); + IsOk()); EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(kDefaultDocumentId)); @@ -240,6 +242,8 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), nested_document)); + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kInvalidDocumentId)); // Handle nested_document. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, @@ -248,7 +252,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, EXPECT_THAT(handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), - StatusIs(libtextclassifier3::StatusCode::OK)); + IsOk()); EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(kDefaultDocumentId)); @@ -279,7 +283,11 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), document)); - // Handle document. Handle() should ignore invalid format qualified id. + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kInvalidDocumentId)); + // Handle document. Should ignore invalid format qualified id. + // Index data should remain unchanged since there is no valid qualified id, + // but last_added_document_id should be updated. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, QualifiedIdJoinablePropertyIndexingHandler::Create( @@ -287,8 +295,7 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), - StatusIs(libtextclassifier3::StatusCode::OK)); - + IsOk()); EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(kDefaultDocumentId)); EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( @@ -309,7 +316,10 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, document)); ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty()); - // Handle document. Handle() should ignore invalid format qualified id. + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kInvalidDocumentId)); + // Handle document. Index data should remain unchanged since there is no + // qualified id, but last_added_document_id should be updated. ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, QualifiedIdJoinablePropertyIndexingHandler::Create( @@ -317,8 +327,119 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, EXPECT_THAT( handler->Handle(tokenized_document, kDefaultDocumentId, /*recovery_mode=*/false, /*put_document_stats=*/nullptr), - StatusIs(libtextclassifier3::StatusCode::OK)); + IsOk()); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kDefaultDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + +TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, + HandleInvalidDocumentIdShouldReturnInvalidArgumentError) { + DocumentProto referenced_document = + DocumentBuilder() + .SetKey("pkg$db/ns", "ref_type/1") + .SetSchema(std::string(kReferencedType)) + .AddStringProperty(std::string(kPropertyName), "one") + .Build(); + + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyQualifiedId), + "pkg$db/ns#ref_type/1") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + document)); + + qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId); + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, + QualifiedIdJoinablePropertyIndexingHandler::Create( + &fake_clock_, qualified_id_join_index_.get())); + + // Handling document with kInvalidDocumentId should cause a failure, and both + // index data and last_added_document_id should remain unchanged. + EXPECT_THAT( + handler->Handle(tokenized_document, kInvalidDocumentId, + /*recovery_mode=*/false, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kInvalidDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // Recovery mode should get the same result. + EXPECT_THAT( + handler->Handle(tokenized_document, kInvalidDocumentId, + /*recovery_mode=*/false, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kInvalidDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + +TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, + HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) { + DocumentProto referenced_document = + DocumentBuilder() + .SetKey("pkg$db/ns", "ref_type/1") + .SetSchema(std::string(kReferencedType)) + .AddStringProperty(std::string(kPropertyName), "one") + .Build(); + + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyQualifiedId), + "pkg$db/ns#ref_type/1") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + document)); + + qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId); + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, + QualifiedIdJoinablePropertyIndexingHandler::Create( + &fake_clock_, qualified_id_join_index_.get())); + + // Handling document with document_id < last_added_document_id should cause a + // failure, and both index data and last_added_document_id should remain + // unchanged. + ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue()); + EXPECT_THAT( + handler->Handle(tokenized_document, kDefaultDocumentId - 1, + /*recovery_mode=*/false, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kDefaultDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + // Handling document with document_id == last_added_document_id should cause a + // failure, and both index data and last_added_document_id should remain + // unchanged. + EXPECT_THAT( + handler->Handle(tokenized_document, kDefaultDocumentId, + /*recovery_mode=*/false, /*put_document_stats=*/nullptr), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(kDefaultDocumentId)); EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( @@ -326,6 +447,78 @@ TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } +TEST_F(QualifiedIdJoinablePropertyIndexingHandlerTest, + HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) { + DocumentProto referenced_document = + DocumentBuilder() + .SetKey("pkg$db/ns", "ref_type/1") + .SetSchema(std::string(kReferencedType)) + .AddStringProperty(std::string(kPropertyName), "one") + .Build(); + + DocumentProto document = + DocumentBuilder() + .SetKey("icing", "fake_type/1") + .SetSchema(std::string(kFakeType)) + .AddStringProperty(std::string(kPropertyQualifiedId), + "pkg$db/ns#ref_type/1") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN( + TokenizedDocument tokenized_document, + TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(), + document)); + + qualified_id_join_index_->set_last_added_document_id(kDefaultDocumentId); + ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<QualifiedIdJoinablePropertyIndexingHandler> handler, + QualifiedIdJoinablePropertyIndexingHandler::Create( + &fake_clock_, qualified_id_join_index_.get())); + + // Handle document with document_id < last_added_document_id in recovery mode. + // We should not get any error, but the handler should ignore the document, so + // both index data and last_added_document_id should remain unchanged. + ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId - 1), IsTrue()); + EXPECT_THAT( + handler->Handle(tokenized_document, kDefaultDocumentId - 1, + /*recovery_mode=*/true, /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kDefaultDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + // Handle document with document_id == last_added_document_id in recovery + // mode. We should not get any error, but the handler should ignore the + // document, so both index data and last_added_document_id should remain + // unchanged. + EXPECT_THAT( + handler->Handle(tokenized_document, kDefaultDocumentId, + /*recovery_mode=*/true, /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kDefaultDocumentId, kQualifiedIdJoinablePropertyId)), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + // Handle document with document_id > last_added_document_id in recovery mode. + // The handler should index this document and update last_added_document_id. + ASSERT_THAT(IsDocumentIdValid(kDefaultDocumentId + 1), IsTrue()); + EXPECT_THAT( + handler->Handle(tokenized_document, kDefaultDocumentId + 1, + /*recovery_mode=*/true, /*put_document_stats=*/nullptr), + IsOk()); + EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), + Eq(kDefaultDocumentId + 1)); + EXPECT_THAT(qualified_id_join_index_->Get(DocJoinInfo( + kDefaultDocumentId + 1, kQualifiedIdJoinablePropertyId)), + IsOkAndHolds("pkg$db/ns#ref_type/1")); +} + } // namespace } // namespace lib diff --git a/icing/query/advanced_query_parser/query-visitor.cc b/icing/query/advanced_query_parser/query-visitor.cc index 31223a5..c2cee47 100644 --- a/icing/query/advanced_query_parser/query-visitor.cc +++ b/icing/query/advanced_query_parser/query-visitor.cc @@ -28,6 +28,7 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" +#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h" #include "icing/index/iterator/doc-hit-info-iterator-and.h" #include "icing/index/iterator/doc-hit-info-iterator-none.h" #include "icing/index/iterator/doc-hit-info-iterator-not.h" @@ -204,8 +205,9 @@ void QueryVisitor::RegisterFunctions() { // DocHitInfoIterator search(std::string); // DocHitInfoIterator search(std::string, std::vector<std::string>); - Function::EvalFunction search_eval = - std::bind(&QueryVisitor::SearchFunction, this, std::placeholders::_1); + auto search_eval = [this](std::vector<PendingValue>&& args) { + return this->SearchFunction(std::move(args)); + }; Function search_function = Function::Create(DataType::kDocumentIterator, "search", {Param(DataType::kString), @@ -214,6 +216,18 @@ void QueryVisitor::RegisterFunctions() { .ValueOrDie(); registered_functions_.insert( {search_function.name(), std::move(search_function)}); + + // DocHitInfoIterator propertyDefined(std::string); + auto property_defined = [this](std::vector<PendingValue>&& args) { + return this->PropertyDefinedFunction(std::move(args)); + }; + + Function property_defined_function = + Function::Create(DataType::kDocumentIterator, "propertyDefined", + {Param(DataType::kText)}, std::move(property_defined)) + .ValueOrDie(); + registered_functions_.insert( + {property_defined_function.name(), std::move(property_defined_function)}); } libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction( @@ -285,6 +299,24 @@ libtextclassifier3::StatusOr<PendingValue> QueryVisitor::SearchFunction( return PendingValue(std::move(iterator)); } +libtextclassifier3::StatusOr<PendingValue> +QueryVisitor::PropertyDefinedFunction(std::vector<PendingValue>&& args) { + // The first arg is guaranteed to be a TEXT at this point. It should be safe + // to call ValueOrDie. + + // TODO(b/268680462): Consume this and implement the actual iterator. + // const QueryTerm* member = + args.at(0).text_val().ValueOrDie(); + + std::unique_ptr<DocHitInfoIterator> iterator = + std::make_unique<DocHitInfoIteratorAllDocumentId>( + document_store_.last_added_document_id()); + + features_.insert(kPropertyDefinedInSchemaCustomFunctionFeature); + + return PendingValue(std::move(iterator)); +} + libtextclassifier3::StatusOr<int64_t> QueryVisitor::PopPendingIntValue() { if (pending_values_.empty()) { return absl_ports::InvalidArgumentError("Unable to retrieve int value."); diff --git a/icing/query/advanced_query_parser/query-visitor.h b/icing/query/advanced_query_parser/query-visitor.h index 9fcaec0..c5598dd 100644 --- a/icing/query/advanced_query_parser/query-visitor.h +++ b/icing/query/advanced_query_parser/query-visitor.h @@ -242,6 +242,13 @@ class QueryVisitor : public AbstractSyntaxTreeVisitor { libtextclassifier3::StatusOr<PendingValue> SearchFunction( std::vector<PendingValue>&& args); + // Implementation of the propertyDefined(member) custom function. + // Returns: + // - a Pending Value holding a DocHitIterator to be implemented. + // - any errors returned by Lexer::ExtractTokens + libtextclassifier3::StatusOr<PendingValue> PropertyDefinedFunction( + std::vector<PendingValue>&& args); + // Handles a NaryOperatorNode where the operator is HAS (':') and pushes an // iterator with the proper section filter applied. If the current property // restriction represented by pending_property_restricts and the first child diff --git a/icing/query/advanced_query_parser/query-visitor_test.cc b/icing/query/advanced_query_parser/query-visitor_test.cc index b560d52..7aef40f 100644 --- a/icing/query/advanced_query_parser/query-visitor_test.cc +++ b/icing/query/advanced_query_parser/query-visitor_test.cc @@ -3583,6 +3583,125 @@ TEST_F(QueryVisitorTest, SearchFunctionNestedPropertyRestrictsExpanding) { ElementsAre(docid6, docid0)); } +TEST_F(QueryVisitorTest, + PropertyDefinedFunctionWithNoArgumentReturnsInvalidArgument) { + std::string query = "propertyDefined()"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F( + QueryVisitorTest, + PropertyDefinedFunctionWithMoreThanOneTextArgumentReturnsInvalidArgument) { + std::string query = "propertyDefined(foo, bar)"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(QueryVisitorTest, + PropertyDefinedFunctionWithStringArgumentReturnsInvalidArgument) { + // The argument type is STRING, not TEXT here. + std::string query = "propertyDefined(\"foo\")"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_F(QueryVisitorTest, + PropertyDefinedFunctionWithNonTextArgumentReturnsInvalidArgument) { + std::string query = "propertyDefined(1 < 2)"; + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + EXPECT_THAT(std::move(query_visitor).ConsumeResults(), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST_P(QueryVisitorTest, PropertyDefinedFunctionCurrentlyReturnsEverything) { + // Set up two schemas, one with a "url" field and one without. + ICING_ASSERT_OK(schema_store_->SetSchema( + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("typeWithUrl") + .AddProperty(PropertyConfigBuilder() + .SetName("url") + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("typeWithoutUrl")) + .Build())); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri0").SetSchema("typeWithUrl").Build())); + Index::Editor editor = index_->Edit(kDocumentId0, kSectionId1, + TERM_MATCH_PREFIX, /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put(DocumentBuilder() + .SetKey("ns", "uri1") + .SetSchema("typeWithoutUrl") + .Build())); + editor = index_->Edit(kDocumentId1, kSectionId1, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("foo"); + editor.IndexAllBufferedTerms(); + + ICING_ASSERT_OK(document_store_->Put( + DocumentBuilder().SetKey("ns", "uri2").SetSchema("typeWithUrl").Build())); + editor = index_->Edit(kDocumentId2, kSectionId1, TERM_MATCH_PREFIX, + /*namespace_id=*/0); + editor.BufferTerm("bar"); + editor.IndexAllBufferedTerms(); + + std::string query = CreateQuery("foo propertyDefined(url)"); + ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Node> root_node, + ParseQueryHelper(query)); + QueryVisitor query_visitor( + index_.get(), numeric_index_.get(), document_store_.get(), + schema_store_.get(), normalizer_.get(), tokenizer_.get(), query, + DocHitInfoIteratorFilter::Options(), TERM_MATCH_PREFIX, + /*needs_term_frequency_info_=*/true); + root_node->Accept(&query_visitor); + ICING_ASSERT_OK_AND_ASSIGN(QueryResults query_results, + std::move(query_visitor).ConsumeResults()); + EXPECT_THAT( + query_results.features_in_use, + UnorderedElementsAre(kPropertyDefinedInSchemaCustomFunctionFeature, + kListFilterQueryLanguageFeature)); + + // TODO(b/268680462): Update once the feature is actually implemented. + EXPECT_THAT(GetDocumentIds(query_results.root_iterator.get()), + UnorderedElementsAre(kDocumentId0, kDocumentId1)); +} + INSTANTIATE_TEST_SUITE_P(QueryVisitorTest, QueryVisitorTest, testing::Values(QueryType::kPlain, QueryType::kSearch)); diff --git a/icing/query/query-features.h b/icing/query/query-features.h index 9fafba5..6e4fb94 100644 --- a/icing/query/query-features.h +++ b/icing/query/query-features.h @@ -47,9 +47,17 @@ constexpr Feature kVerbatimSearchFeature = constexpr Feature kListFilterQueryLanguageFeature = "LIST_FILTER_QUERY_LANGUAGE"; // Features#LIST_FILTER_QUERY_LANGUAGE +// This feature enables the custom function hasPropertyDefined(member). For +// example, a query "hasPropertyDefined(url)" will only return documents whose +// schemas have defined a "url" property. +// TODO(b/268680462): Update Features.java to sync with this Feature. +constexpr Feature kPropertyDefinedInSchemaCustomFunctionFeature = + "PROPERTY_DEFINED_IN_SCHEMA"; // Features#PROPERTY_DEFINED_IN_SCHEMA + inline std::unordered_set<Feature> GetQueryFeaturesSet() { return {kNumericSearchFeature, kVerbatimSearchFeature, - kListFilterQueryLanguageFeature}; + kListFilterQueryLanguageFeature, + kPropertyDefinedInSchemaCustomFunctionFeature}; } } // namespace lib diff --git a/icing/result/result-adjustment-info.cc b/icing/result/result-adjustment-info.cc new file mode 100644 index 0000000..763cd10 --- /dev/null +++ b/icing/result/result-adjustment-info.cc @@ -0,0 +1,62 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-adjustment-info.h" + +#include <string> +#include <unordered_map> + +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" + +namespace icing { +namespace lib { + +namespace { + +SnippetContext CreateSnippetContext(const SearchSpecProto& search_spec, + const ResultSpecProto& result_spec, + SectionRestrictQueryTermsMap query_terms) { + if (result_spec.snippet_spec().num_to_snippet() > 0 && + result_spec.snippet_spec().num_matches_per_property() > 0) { + // Needs snippeting + return SnippetContext(std::move(query_terms), result_spec.snippet_spec(), + search_spec.term_match_type()); + } + return SnippetContext(/*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::UNKNOWN); +} + +} // namespace + +ResultAdjustmentInfo::ResultAdjustmentInfo( + const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec, + SectionRestrictQueryTermsMap query_terms) + : snippet_context(CreateSnippetContext(search_spec, result_spec, + std::move(query_terms))), + remaining_num_to_snippet(snippet_context.snippet_spec.num_to_snippet()) { + for (const TypePropertyMask& type_field_mask : + result_spec.type_property_masks()) { + projection_tree_map.insert( + {type_field_mask.schema_type(), ProjectionTree(type_field_mask)}); + } +} + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-adjustment-info.h b/icing/result/result-adjustment-info.h new file mode 100644 index 0000000..98fa7f5 --- /dev/null +++ b/icing/result/result-adjustment-info.h @@ -0,0 +1,51 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_ +#define ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_ + +#include <string> +#include <unordered_map> + +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" + +namespace icing { +namespace lib { + +// A wrapper struct for information used in result retrieval. +// - Snippet +// - Projection +struct ResultAdjustmentInfo { + // Information needed for snippeting. + SnippetContext snippet_context; + + // Remaining # of docs to snippet. + int remaining_num_to_snippet; + + // Information needed for projection. + std::unordered_map<std::string, ProjectionTree> projection_tree_map; + + explicit ResultAdjustmentInfo(const SearchSpecProto& search_spec, + const ScoringSpecProto& scoring_spec, + const ResultSpecProto& result_spec, + SectionRestrictQueryTermsMap query_terms); +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_RESULT_RESULT_ADJUSTMENT_INFO_H_ diff --git a/icing/result/result-adjustment-info_test.cc b/icing/result/result-adjustment-info_test.cc new file mode 100644 index 0000000..1c5aea1 --- /dev/null +++ b/icing/result/result-adjustment-info_test.cc @@ -0,0 +1,149 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/result/result-adjustment-info.h" + +#include <string> +#include <unordered_set> + +#include "gtest/gtest.h" +#include "icing/portable/equals-proto.h" +#include "icing/proto/scoring.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/proto/term.pb.h" +#include "icing/result/projection-tree.h" +#include "icing/result/snippet-context.h" + +namespace icing { +namespace lib { + +namespace { + +using ::icing::lib::portable_equals_proto::EqualsProto; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { + SearchSpecProto search_spec; + search_spec.set_term_match_type(match_type); + return search_spec; +} + +ScoringSpecProto CreateScoringSpec(bool is_descending_order) { + ScoringSpecProto scoring_spec; + scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC + : ScoringSpecProto::Order::ASC); + return scoring_spec; +} + +ResultSpecProto CreateResultSpec( + int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) { + ResultSpecProto result_spec; + result_spec.set_result_group_type(result_group_type); + result_spec.set_num_per_page(num_per_page); + return result_spec; +} + +TEST(ResultAdjustmentInfoTest, ShouldConstructSnippetContextAccordingToSpecs) { + ResultSpecProto result_spec = + CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); + result_spec.mutable_snippet_spec()->set_num_to_snippet(5); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); + + SectionRestrictQueryTermsMap query_terms_map; + query_terms_map.emplace("term1", std::unordered_set<std::string>()); + + ResultAdjustmentInfo result_adjustment_info( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + query_terms_map); + const SnippetContext snippet_context = result_adjustment_info.snippet_context; + + // Snippet context should be derived from the specs above. + EXPECT_TRUE( + result_adjustment_info.snippet_context.query_terms.find("term1") != + result_adjustment_info.snippet_context.query_terms.end()); + EXPECT_THAT(result_adjustment_info.snippet_context.snippet_spec, + EqualsProto(result_spec.snippet_spec())); + EXPECT_THAT(result_adjustment_info.snippet_context.match_type, + Eq(TermMatchType::EXACT_ONLY)); + EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(5)); +} + +TEST(ResultAdjustmentInfoTest, NoSnippetingShouldReturnNull) { + ResultSpecProto result_spec = + CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); + // Setting num_to_snippet to 0 so that snippeting info won't be + // stored. + result_spec.mutable_snippet_spec()->set_num_to_snippet(0); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); + result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); + + SectionRestrictQueryTermsMap query_terms_map; + query_terms_map.emplace("term1", std::unordered_set<std::string>()); + + ResultAdjustmentInfo result_adjustment_info( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + query_terms_map); + + EXPECT_THAT(result_adjustment_info.snippet_context.query_terms, IsEmpty()); + EXPECT_THAT( + result_adjustment_info.snippet_context.snippet_spec, + EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance())); + EXPECT_THAT(result_adjustment_info.snippet_context.match_type, + TermMatchType::UNKNOWN); + EXPECT_THAT(result_adjustment_info.remaining_num_to_snippet, Eq(0)); +} + +TEST(ResultAdjustmentInfoTest, + ShouldConstructProjectionTreeMapAccordingToSpecs) { + // Create a ResultSpec with type property mask. + ResultSpecProto result_spec = + CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); + TypePropertyMask* email_type_property_mask = + result_spec.add_type_property_masks(); + email_type_property_mask->set_schema_type("Email"); + email_type_property_mask->add_paths("sender.name"); + email_type_property_mask->add_paths("sender.emailAddress"); + TypePropertyMask* phone_type_property_mask = + result_spec.add_type_property_masks(); + phone_type_property_mask->set_schema_type("Phone"); + phone_type_property_mask->add_paths("caller"); + TypePropertyMask* wildcard_type_property_mask = + result_spec.add_type_property_masks(); + wildcard_type_property_mask->set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask->add_paths("wild.card"); + + ResultAdjustmentInfo result_adjustment_info( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + /*query_terms=*/{}); + + EXPECT_THAT(result_adjustment_info.projection_tree_map, + UnorderedElementsAre( + Pair("Email", ProjectionTree(*email_type_property_mask)), + Pair("Phone", ProjectionTree(*phone_type_property_mask)), + Pair(std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(*wildcard_type_property_mask)))); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/result/result-retriever-v2.cc b/icing/result/result-retriever-v2.cc index 53ce10a..a617f45 100644 --- a/icing/result/result-retriever-v2.cc +++ b/icing/result/result-retriever-v2.cc @@ -15,7 +15,6 @@ #include "icing/result/result-retriever-v2.h" #include <memory> -#include <string_view> #include <unordered_map> #include <utility> #include <vector> @@ -26,8 +25,11 @@ #include "icing/result/page-result.h" #include "icing/result/projection-tree.h" #include "icing/result/projector.h" +#include "icing/result/result-adjustment-info.h" +#include "icing/result/result-state-v2.h" #include "icing/result/snippet-context.h" #include "icing/result/snippet-retriever.h" +#include "icing/schema/section.h" #include "icing/scoring/scored-document-hit.h" #include "icing/store/document-store.h" #include "icing/store/namespace-id.h" @@ -38,6 +40,55 @@ namespace icing { namespace lib { +namespace { + +void ApplyProjection(const ResultAdjustmentInfo* adjustment_info, + DocumentProto* document) { + if (adjustment_info == nullptr) { + return; + } + + auto itr = adjustment_info->projection_tree_map.find(document->schema()); + if (itr != adjustment_info->projection_tree_map.end()) { + projector::Project(itr->second.root().children, document); + } else { + auto wildcard_projection_tree_itr = + adjustment_info->projection_tree_map.find( + std::string(ProjectionTree::kSchemaTypeWildcard)); + if (wildcard_projection_tree_itr != + adjustment_info->projection_tree_map.end()) { + projector::Project(wildcard_projection_tree_itr->second.root().children, + document); + } + } +} + +bool ApplySnippet(ResultAdjustmentInfo* adjustment_info, + const SnippetRetriever& snippet_retriever, + const DocumentProto& document, SectionIdMask section_id_mask, + SearchResultProto::ResultProto* result) { + if (adjustment_info == nullptr) { + return false; + } + + const SnippetContext& snippet_context = adjustment_info->snippet_context; + int& remaining_num_to_snippet = adjustment_info->remaining_num_to_snippet; + + if (snippet_context.snippet_spec.num_matches_per_property() > 0 && + remaining_num_to_snippet > 0) { + SnippetProto snippet_proto = snippet_retriever.RetrieveSnippet( + snippet_context.query_terms, snippet_context.match_type, + snippet_context.snippet_spec, document, section_id_mask); + *result->mutable_snippet() = std::move(snippet_proto); + --remaining_num_to_snippet; + return true; + } + + return false; +} + +} // namespace + bool GroupResultLimiterV2::ShouldBeRemoved( const ScoredDocumentHit& scored_document_hit, const std::unordered_map<int32_t, int>& entry_id_group_id_map, @@ -103,19 +154,6 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage( result_state.scored_document_hits_ranker->size(); int num_results_with_snippets = 0; - const SnippetContext& snippet_context = result_state.snippet_context(); - const std::unordered_map<std::string, ProjectionTree>& projection_tree_map = - result_state.projection_tree_map(); - auto wildcard_projection_tree_itr = projection_tree_map.find( - std::string(ProjectionTree::kSchemaTypeWildcard)); - - // Calculates how many snippets to return for this page. - int remaining_num_to_snippet = - snippet_context.snippet_spec.num_to_snippet() - result_state.num_returned; - if (remaining_num_to_snippet < 0) { - remaining_num_to_snippet = 0; - } - // Retrieve info std::vector<SearchResultProto::ResultProto> results; int32_t num_total_bytes = 0; @@ -141,25 +179,16 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage( } DocumentProto document = std::move(document_or).ValueOrDie(); - // Apply projection - auto itr = projection_tree_map.find(document.schema()); - if (itr != projection_tree_map.end()) { - projector::Project(itr->second.root().children, &document); - } else if (wildcard_projection_tree_itr != projection_tree_map.end()) { - projector::Project(wildcard_projection_tree_itr->second.root().children, - &document); - } + // Apply parent projection + ApplyProjection(result_state.parent_adjustment_info(), &document); SearchResultProto::ResultProto result; - // Add the snippet if requested. - if (snippet_context.snippet_spec.num_matches_per_property() > 0 && - remaining_num_to_snippet > results.size()) { - SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet( - snippet_context.query_terms, snippet_context.match_type, - snippet_context.snippet_spec, document, - next_best_document_hit.parent_scored_document_hit() - .hit_section_id_mask()); - *result.mutable_snippet() = std::move(snippet_proto); + // Add parent snippet if requested. + if (ApplySnippet(result_state.parent_adjustment_info(), *snippet_retriever_, + document, + next_best_document_hit.parent_scored_document_hit() + .hit_section_id_mask(), + &result)) { ++num_results_with_snippets; } @@ -181,10 +210,16 @@ std::pair<PageResult, bool> ResultRetrieverV2::RetrieveNextPage( } DocumentProto child_document = std::move(child_document_or).ValueOrDie(); - // TODO(b/256022027): apply projection and add snippet for child doc + ApplyProjection(result_state.child_adjustment_info(), &child_document); SearchResultProto::ResultProto* child_result = result.add_joined_results(); + // Add child snippet if requested. + ApplySnippet(result_state.child_adjustment_info(), *snippet_retriever_, + child_document, + child_scored_document_hit.hit_section_id_mask(), + child_result); + *child_result->mutable_document() = std::move(child_document); child_result->set_score(child_scored_document_hit.score()); } diff --git a/icing/result/result-retriever-v2_group-result-limiter_test.cc b/icing/result/result-retriever-v2_group-result-limiter_test.cc index f59864b..7027cc5 100644 --- a/icing/result/result-retriever-v2_group-result-limiter_test.cc +++ b/icing/result/result-retriever-v2_group-result-limiter_test.cc @@ -22,7 +22,6 @@ #include "icing/proto/document.pb.h" #include "icing/proto/schema.pb.h" #include "icing/proto/search.pb.h" -#include "icing/proto/term.pb.h" #include "icing/result/page-result.h" #include "icing/result/result-retriever-v2.h" #include "icing/result/result-state-v2.h" @@ -31,7 +30,6 @@ #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" #include "icing/scoring/scored-document-hit.h" #include "icing/store/document-id.h" -#include "icing/store/namespace-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" #include "icing/testing/icu-data-file-helper.h" @@ -107,22 +105,6 @@ class ResultRetrieverV2GroupResultLimiterTest : public testing::Test { FakeClock fake_clock_; }; -// TODO(sungyc): Refactor helper functions below (builder classes or common test -// utility). - -SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { - SearchSpecProto search_spec; - search_spec.set_term_match_type(match_type); - return search_spec; -} - -ScoringSpecProto CreateScoringSpec(bool is_descending_order) { - ScoringSpecProto scoring_spec; - scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC - : ScoringSpecProto::Order::ASC); - return scoring_spec; -} - ResultSpecProto CreateResultSpec( int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) { ResultSpecProto result_spec; @@ -172,9 +154,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -233,9 +214,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -310,9 +290,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -398,9 +377,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -460,9 +438,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -522,9 +499,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -629,9 +605,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -738,9 +713,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -850,9 +824,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -914,9 +887,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -974,9 +946,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -1078,9 +1049,8 @@ TEST_F(ResultRetrieverV2GroupResultLimiterTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - *document_store_); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *document_store_); { absl_ports::shared_lock l(&result_state.mutex); diff --git a/icing/result/result-retriever-v2_projection_test.cc b/icing/result/result-retriever-v2_projection_test.cc index d093d1f..629fb34 100644 --- a/icing/result/result-retriever-v2_projection_test.cc +++ b/icing/result/result-retriever-v2_projection_test.cc @@ -25,6 +25,7 @@ #include "icing/proto/term.pb.h" #include "icing/result/page-result.h" #include "icing/result/projection-tree.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/result/result-state-v2.h" #include "icing/schema-builder.h" @@ -150,9 +151,6 @@ class ResultRetrieverV2ProjectionTest : public testing::Test { FakeClock fake_clock_; }; -// TODO(sungyc): Refactor helper functions below (builder classes or common test -// utility). - SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { SectionIdMask mask = 0; for (SectionId section_id : section_ids) { @@ -225,10 +223,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionTopLevelLeadNodeFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -321,10 +321,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionNestedLeafNodeFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -428,10 +430,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionIntermediateNodeFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -539,10 +543,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleNestedFieldPaths) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -633,10 +639,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionEmptyFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -710,10 +718,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionInvalidFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -788,10 +798,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionValidAndInvalidFieldPath) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -868,10 +880,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesNoWildcards) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + //*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -952,10 +966,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, ProjectionMultipleTypesWildcard) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -1040,10 +1056,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -1137,10 +1155,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -1238,10 +1258,12 @@ TEST_F(ResultRetrieverV2ProjectionTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetrieverV2> result_retriever, @@ -1278,6 +1300,147 @@ TEST_F(ResultRetrieverV2ProjectionTest, EqualsProto(projected_document_two)); } +TEST_F(ResultRetrieverV2ProjectionTest, ProjectionJoinDocuments) { + // 1. Add one Person document + DocumentProto person_document = + DocumentBuilder() + .SetKey("namespace", "Person/1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id, + document_store_->Put(person_document)); + + // 2. Add two Email documents + DocumentProto email_document1 = + DocumentBuilder() + .SetKey("namespace", "Email/1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id1, + document_store_->Put(email_document1)); + + DocumentProto email_document2 = + DocumentBuilder() + .SetKey("namespace", "Email/2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id2, + document_store_->Put(email_document2)); + + // 3. Setup the joined scored results. + std::vector<SectionId> person_hit_section_ids = { + GetSectionId("Person", "name")}; + std::vector<SectionId> email_hit_section_ids = { + GetSectionId("Email", "name"), GetSectionId("Email", "body")}; + SectionIdMask person_hit_section_id_mask = + CreateSectionIdMask(person_hit_section_ids); + SectionIdMask email_hit_section_id_mask = + CreateSectionIdMask(email_hit_section_ids); + + ScoredDocumentHit person_scored_doc_hit( + person_document_id, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email1_scored_doc_hit( + email_document_id1, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email2_scored_doc_hit( + email_document_id2, email_hit_section_id_mask, /*score=*/0); + // Create JoinedScoredDocumentHits mapping Person to Email1 and Email2 + std::vector<JoinedScoredDocumentHit> joined_scored_document_hits = { + JoinedScoredDocumentHit( + /*final_score=*/0, + /*parent_scored_document_hit=*/person_scored_doc_hit, + /*child_scored_document_hits=*/ + {email1_scored_doc_hit, email2_scored_doc_hit})}; + + // 4. Create parent ResultSpec with type property mask. + ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/2); + TypePropertyMask* type_property_mask = + parent_result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Person"); + type_property_mask->add_paths("name"); + + // 5. Create child ResultSpec with type property mask. + ResultSpecProto child_result_spec; + type_property_mask = child_result_spec.add_type_property_masks(); + type_property_mask->set_schema_type("Email"); + type_property_mask->add_paths("body"); + + // 6. Create ResultState with custom ResultSpecs. + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>( + std::move(joined_scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + SectionRestrictQueryTermsMap()), + parent_result_spec, *document_store_); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 7. Verify that the returned results: + // - Person docs only contain the "name" property. + // - Email docs only contain the "body" property. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(1)); + + // Check parent doc. + DocumentProto projected_person_document = + DocumentBuilder() + .SetKey("namespace", "Person/1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(page_result.results.at(0).document(), + EqualsProto(projected_person_document)); + + // Check child docs. + ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2)); + // Check Email1 + DocumentProto projected_email_document1 = + DocumentBuilder() + .SetKey("namespace", "Email/1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + EXPECT_THAT(page_result.results.at(0).joined_results(0).document(), + EqualsProto(projected_email_document1)); + // Check Email2 + DocumentProto projected_email_document2 = + DocumentBuilder() + .SetKey("namespace", "Email/2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + EXPECT_THAT(page_result.results.at(0).joined_results(1).document(), + EqualsProto(projected_email_document2)); +} + } // namespace } // namespace lib diff --git a/icing/result/result-retriever-v2_snippet_test.cc b/icing/result/result-retriever-v2_snippet_test.cc index 6123bf4..b32800c 100644 --- a/icing/result/result-retriever-v2_snippet_test.cc +++ b/icing/result/result-retriever-v2_snippet_test.cc @@ -26,6 +26,7 @@ #include "icing/proto/search.pb.h" #include "icing/proto/term.pb.h" #include "icing/result/page-result.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/result/result-state-v2.h" #include "icing/schema-builder.h" @@ -82,37 +83,24 @@ class ResultRetrieverV2SnippetTest : public testing::Test { SchemaProto schema = SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_EXACT, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL))) .AddType( SchemaTypeConfigBuilder() - .SetType("Person") + .SetType("Email") .AddProperty(PropertyConfigBuilder() - .SetName("name") + .SetName("subject") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL)) .AddProperty(PropertyConfigBuilder() - .SetName("emailAddress") - .SetDataTypeString(TERM_MATCH_PREFIX, + .SetName("body") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) .Build(); ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); @@ -155,9 +143,6 @@ class ResultRetrieverV2SnippetTest : public testing::Test { FakeClock fake_clock_; }; -// TODO(sungyc): Refactor helper functions below (builder classes or common test -// utility). - ResultSpecProto::SnippetSpecProto CreateSnippetSpec() { ResultSpecProto::SnippetSpecProto snippet_spec; snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max()); @@ -166,16 +151,25 @@ ResultSpecProto::SnippetSpecProto CreateSnippetSpec() { return snippet_spec; } -DocumentProto CreateDocument(int id) { +DocumentProto CreateEmailDocument(int id) { return DocumentBuilder() .SetKey("icing", "Email/" + std::to_string(id)) .SetSchema("Email") - .AddStringProperty("name", "subject foo " + std::to_string(id)) + .AddStringProperty("subject", "subject foo " + std::to_string(id)) .AddStringProperty("body", "body bar " + std::to_string(id)) .SetCreationTimestampMs(1574365086666 + id) .Build(); } +DocumentProto CreatePersonDocument(int id) { + return DocumentBuilder() + .SetKey("icing", "Person/" + std::to_string(id)) + .SetSchema("Person") + .AddStringProperty("name", "person " + std::to_string(id)) + .SetCreationTimestampMs(1574365086666 + id) + .Build(); +} + SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { SectionIdMask mask = 0; for (SectionId section_id : section_ids) { @@ -205,14 +199,17 @@ ResultSpecProto CreateResultSpec(int num_per_page) { TEST_F(ResultRetrieverV2SnippetTest, DefaultSnippetSpecShouldDisableSnippeting) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -224,13 +221,18 @@ TEST_F(ResultRetrieverV2SnippetTest, ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), language_segmenter_.get(), normalizer_.get())); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), - CreateResultSpec(/*num_per_page=*/3), *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/true), result_spec, + SectionRestrictQueryTermsMap()), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; ASSERT_THAT(page_result.results, SizeIs(3)); @@ -244,14 +246,17 @@ TEST_F(ResultRetrieverV2SnippetTest, } TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -271,10 +276,12 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{{"", {"foo", "bar"}}}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; @@ -284,7 +291,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { const DocumentProto& result_document_one = page_result.results.at(0).document(); const SnippetProto& result_snippet_one = page_result.results.at(0).snippet(); - EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1))); + EXPECT_THAT(result_document_one, EqualsProto(CreateEmailDocument(/*id=*/1))); EXPECT_THAT(result_snippet_one.entries(), SizeIs(2)); EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body")); std::string_view content = GetString( @@ -293,7 +300,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { ElementsAre("body bar 1")); EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)), ElementsAre("bar")); - EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name")); + EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("subject")); content = GetString(&result_document_one, result_snippet_one.entries(1).property_name()); EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)), @@ -304,7 +311,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { const DocumentProto& result_document_two = page_result.results.at(1).document(); const SnippetProto& result_snippet_two = page_result.results.at(1).snippet(); - EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2))); + EXPECT_THAT(result_document_two, EqualsProto(CreateEmailDocument(/*id=*/2))); EXPECT_THAT(result_snippet_two.entries(), SizeIs(2)); EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body")); content = GetString(&result_document_two, @@ -313,7 +320,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { ElementsAre("body bar 2")); EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)), ElementsAre("bar")); - EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name")); + EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("subject")); content = GetString(&result_document_two, result_snippet_two.entries(1).property_name()); EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)), @@ -325,7 +332,8 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { page_result.results.at(2).document(); const SnippetProto& result_snippet_three = page_result.results.at(2).snippet(); - EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3))); + EXPECT_THAT(result_document_three, + EqualsProto(CreateEmailDocument(/*id=*/3))); EXPECT_THAT(result_snippet_three.entries(), SizeIs(2)); EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body")); content = GetString(&result_document_three, @@ -334,7 +342,7 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { ElementsAre("body bar 3")); EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)), ElementsAre("bar")); - EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name")); + EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("subject")); content = GetString(&result_document_three, result_snippet_three.entries(1).property_name()); EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)), @@ -344,14 +352,17 @@ TEST_F(ResultRetrieverV2SnippetTest, SimpleSnippeted) { } TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -373,10 +384,12 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{{"", {"foo", "bar"}}}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; @@ -385,7 +398,7 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { const DocumentProto& result_document = page_result.results.at(0).document(); const SnippetProto& result_snippet = page_result.results.at(0).snippet(); - EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1))); + EXPECT_THAT(result_document, EqualsProto(CreateEmailDocument(/*id=*/1))); EXPECT_THAT(result_snippet.entries(), SizeIs(2)); EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body")); std::string_view content = @@ -394,7 +407,7 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { ElementsAre("body bar 1")); EXPECT_THAT(GetMatches(content, result_snippet.entries(0)), ElementsAre("bar")); - EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name")); + EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("subject")); content = GetString(&result_document, result_snippet.entries(1).property_name()); EXPECT_THAT(GetWindows(content, result_snippet.entries(1)), @@ -403,25 +416,28 @@ TEST_F(ResultRetrieverV2SnippetTest, OnlyOneDocumentSnippeted) { ElementsAre("foo")); EXPECT_THAT(page_result.results.at(1).document(), - EqualsProto(CreateDocument(/*id=*/2))); + EqualsProto(CreateEmailDocument(/*id=*/2))); EXPECT_THAT(page_result.results.at(1).snippet(), EqualsProto(SnippetProto::default_instance())); EXPECT_THAT(page_result.results.at(2).document(), - EqualsProto(CreateDocument(/*id=*/3))); + EqualsProto(CreateEmailDocument(/*id=*/3))); EXPECT_THAT(page_result.results.at(2).snippet(), EqualsProto(SnippetProto::default_instance())); } TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -443,10 +459,12 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{{"", {"foo", "bar"}}}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; @@ -461,14 +479,17 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllResults) { } TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -490,19 +511,19 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{{"", {"foo", "bar"}}}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); { absl_ports::unique_lock l(&result_state.mutex); - // Set (previously) num_returned = 3 docs - result_state.num_returned = 3; + // Set remaining_num_to_snippet = 2 + result_state.parent_adjustment_info()->remaining_num_to_snippet = 2; } - // num_to_snippet = 5, (previously) num_returned = 3, - // We can return 5 - 3 = 2 snippets. PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; ASSERT_THAT(page_result.results, SizeIs(3)); @@ -513,14 +534,17 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeResults) { } TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) { - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - document_store_->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - document_store_->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - document_store_->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { @@ -542,18 +566,19 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{{"", {"foo", "bar"}}}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/false), result_spec, - *document_store_); + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); { absl_ports::unique_lock l(&result_state.mutex); - // Set (previously) num_returned = 6 docs - result_state.num_returned = 6; + // Set remaining_num_to_snippet = 0 + result_state.parent_adjustment_info()->remaining_num_to_snippet = 0; } - // num_to_snippet = 5, (previously) num_returned = 6, // We can't return any snippets for this page. PageResult page_result = result_retriever->RetrieveNextPage(result_state).first; @@ -564,6 +589,516 @@ TEST_F(ResultRetrieverV2SnippetTest, ShouldNotSnippetAnyResults) { EXPECT_THAT(page_result.num_results_with_snippets, Eq(0)); } +TEST_F(ResultRetrieverV2SnippetTest, + ShouldNotSnippetAnyResultsForNonPositiveNumMatchesPerProperty) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "subject"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); + snippet_spec.set_num_to_snippet(5); + ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/3); + *result_spec.mutable_snippet_spec() = std::move(snippet_spec); + + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( + std::move(scored_document_hits), /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + /*child_adjustment_info=*/nullptr, result_spec, *document_store_); + + { + absl_ports::unique_lock l(&result_state.mutex); + + // Set num_matchers_per_property = 0 + result_state.parent_adjustment_info() + ->snippet_context.snippet_spec.set_num_matches_per_property(0); + } + + // We can't return any snippets for this page even though num_to_snippet > 0. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.results.at(0).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.results.at(2).snippet().entries(), IsEmpty()); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(0)); +} + +TEST_F(ResultRetrieverV2SnippetTest, JoinSnippeted) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id1, + document_store_->Put(CreatePersonDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id2, + document_store_->Put(CreatePersonDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id3, + document_store_->Put(CreatePersonDocument(/*id=*/3))); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> person_hit_section_ids = { + GetSectionId("Person", "name")}; + std::vector<SectionId> email_hit_section_ids = { + GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; + SectionIdMask person_hit_section_id_mask = + CreateSectionIdMask(person_hit_section_ids); + SectionIdMask email_hit_section_id_mask = + CreateSectionIdMask(email_hit_section_ids); + + ScoredDocumentHit person1_scored_doc_hit( + person_document_id1, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit person2_scored_doc_hit( + person_document_id2, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit person3_scored_doc_hit( + person_document_id3, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email1_scored_doc_hit( + email_document_id1, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email2_scored_doc_hit( + email_document_id2, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email3_scored_doc_hit( + email_document_id3, email_hit_section_id_mask, /*score=*/0); + + // Create JoinedScoredDocumentHits mapping: + // - Person1 to Email1 and Email2 + // - Person2 to empty + // - Person3 to Email3 + JoinedScoredDocumentHit joined_scored_document_hit1( + /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit, + /*child_scored_document_hits=*/ + {email1_scored_doc_hit, email2_scored_doc_hit}); + JoinedScoredDocumentHit joined_scored_document_hit2( + /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit, + /*child_scored_document_hits=*/{}); + JoinedScoredDocumentHit joined_scored_document_hit3( + /*final_score=*/0, /*parent_scored_document_hit=*/person3_scored_doc_hit, + /*child_scored_document_hits=*/{email3_scored_doc_hit}); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create parent ResultSpec with custom snippet spec. + ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + *parent_result_spec.mutable_snippet_spec() = CreateSnippetSpec(); + + // Create child ResultSpec with custom snippet spec. + ResultSpecProto child_result_spec; + *child_result_spec.mutable_snippet_spec() = CreateSnippetSpec(); + + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>( + std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1, + joined_scored_document_hit2, + joined_scored_document_hit3}, + /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + SectionRestrictQueryTermsMap({{"", {"person"}}})), + /*child_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + parent_result_spec, *document_store_); + + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(3)); + EXPECT_THAT(page_result.num_results_with_snippets, Eq(3)); + + // Result1: Person1 for parent and [Email1, Email2] for children. + // Check parent doc (Person1). + const DocumentProto& result_parent_document_one = + page_result.results.at(0).document(); + const SnippetProto& result_parent_snippet_one = + page_result.results.at(0).snippet(); + EXPECT_THAT(result_parent_document_one, + EqualsProto(CreatePersonDocument(/*id=*/1))); + ASSERT_THAT(result_parent_snippet_one.entries(), SizeIs(1)); + EXPECT_THAT(result_parent_snippet_one.entries(0).property_name(), Eq("name")); + std::string_view content = + GetString(&result_parent_document_one, + result_parent_snippet_one.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_parent_snippet_one.entries(0)), + ElementsAre("person 1")); + EXPECT_THAT(GetMatches(content, result_parent_snippet_one.entries(0)), + ElementsAre("person")); + + // Check child docs. + ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(2)); + // Check Email1. + const DocumentProto& result_child_document_one = + page_result.results.at(0).joined_results(0).document(); + const SnippetProto& result_child_snippet_one = + page_result.results.at(0).joined_results(0).snippet(); + EXPECT_THAT(result_child_document_one, + EqualsProto(CreateEmailDocument(/*id=*/1))); + ASSERT_THAT(result_child_snippet_one.entries(), SizeIs(2)); + EXPECT_THAT(result_child_snippet_one.entries(0).property_name(), Eq("body")); + content = GetString(&result_child_document_one, + result_child_snippet_one.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(0)), + ElementsAre("body bar 1")); + EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_child_snippet_one.entries(1).property_name(), + Eq("subject")); + content = GetString(&result_child_document_one, + result_child_snippet_one.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_one.entries(1)), + ElementsAre("subject foo 1")); + EXPECT_THAT(GetMatches(content, result_child_snippet_one.entries(1)), + ElementsAre("foo")); + // Check Email2. + const DocumentProto& result_child_document_two = + page_result.results.at(0).joined_results(1).document(); + const SnippetProto& result_child_snippet_two = + page_result.results.at(0).joined_results(1).snippet(); + EXPECT_THAT(result_child_document_two, + EqualsProto(CreateEmailDocument(/*id=*/2))); + ASSERT_THAT(result_child_snippet_two.entries(), SizeIs(2)); + EXPECT_THAT(result_child_snippet_two.entries(0).property_name(), Eq("body")); + content = GetString(&result_child_document_two, + result_child_snippet_two.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(0)), + ElementsAre("body bar 2")); + EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_child_snippet_two.entries(1).property_name(), + Eq("subject")); + content = GetString(&result_child_document_two, + result_child_snippet_two.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_two.entries(1)), + ElementsAre("subject foo 2")); + EXPECT_THAT(GetMatches(content, result_child_snippet_two.entries(1)), + ElementsAre("foo")); + + // Result2: Person2 for parent and [] for children. + // Check parent doc (Person1). + const DocumentProto& result_parent_document_two = + page_result.results.at(1).document(); + const SnippetProto& result_parent_snippet_two = + page_result.results.at(1).snippet(); + EXPECT_THAT(result_parent_document_two, + EqualsProto(CreatePersonDocument(/*id=*/2))); + ASSERT_THAT(result_parent_snippet_two.entries(), SizeIs(1)); + EXPECT_THAT(result_parent_snippet_two.entries(0).property_name(), Eq("name")); + content = GetString(&result_parent_document_two, + result_parent_snippet_two.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_parent_snippet_two.entries(0)), + ElementsAre("person 2")); + EXPECT_THAT(GetMatches(content, result_parent_snippet_two.entries(0)), + ElementsAre("person")); + // Check child docs. + ASSERT_THAT(page_result.results.at(1).joined_results(), IsEmpty()); + + // Result3: Person3 for parent and [Email3] for children. + // Check parent doc (Person3). + const DocumentProto& result_parent_document_three = + page_result.results.at(2).document(); + const SnippetProto& result_parent_snippet_three = + page_result.results.at(2).snippet(); + EXPECT_THAT(result_parent_document_three, + EqualsProto(CreatePersonDocument(/*id=*/3))); + ASSERT_THAT(result_parent_snippet_three.entries(), SizeIs(1)); + EXPECT_THAT(result_parent_snippet_three.entries(0).property_name(), + Eq("name")); + content = GetString(&result_parent_document_three, + result_parent_snippet_three.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_parent_snippet_three.entries(0)), + ElementsAre("person 3")); + EXPECT_THAT(GetMatches(content, result_parent_snippet_three.entries(0)), + ElementsAre("person")); + + // Check child docs. + ASSERT_THAT(page_result.results.at(2).joined_results(), SizeIs(1)); + // Check Email3. + const DocumentProto& result_child_document_three = + page_result.results.at(2).joined_results(0).document(); + const SnippetProto& result_child_snippet_three = + page_result.results.at(2).joined_results(0).snippet(); + EXPECT_THAT(result_child_document_three, + EqualsProto(CreateEmailDocument(/*id=*/3))); + ASSERT_THAT(result_child_snippet_three.entries(), SizeIs(2)); + EXPECT_THAT(result_child_snippet_three.entries(0).property_name(), + Eq("body")); + content = GetString(&result_child_document_three, + result_child_snippet_three.entries(0).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(0)), + ElementsAre("body bar 3")); + EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(0)), + ElementsAre("bar")); + EXPECT_THAT(result_child_snippet_three.entries(1).property_name(), + Eq("subject")); + content = GetString(&result_child_document_three, + result_child_snippet_three.entries(1).property_name()); + EXPECT_THAT(GetWindows(content, result_child_snippet_three.entries(1)), + ElementsAre("subject foo 3")); + EXPECT_THAT(GetMatches(content, result_child_snippet_three.entries(1)), + ElementsAre("foo")); +} + +TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetAllJoinedResults) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id1, + document_store_->Put(CreatePersonDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id2, + document_store_->Put(CreatePersonDocument(/*id=*/2))); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> person_hit_section_ids = { + GetSectionId("Person", "name")}; + std::vector<SectionId> email_hit_section_ids = { + GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; + SectionIdMask person_hit_section_id_mask = + CreateSectionIdMask(person_hit_section_ids); + SectionIdMask email_hit_section_id_mask = + CreateSectionIdMask(email_hit_section_ids); + + ScoredDocumentHit person1_scored_doc_hit( + person_document_id1, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit person2_scored_doc_hit( + person_document_id2, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email1_scored_doc_hit( + email_document_id1, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email2_scored_doc_hit( + email_document_id2, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email3_scored_doc_hit( + email_document_id3, email_hit_section_id_mask, /*score=*/0); + + // Create JoinedScoredDocumentHits mapping: + // - Person1 to Email1 + // - Person2 to Email2, Email3 + JoinedScoredDocumentHit joined_scored_document_hit1( + /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit, + /*child_scored_document_hits=*/ + {email1_scored_doc_hit}); + JoinedScoredDocumentHit joined_scored_document_hit2( + /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit, + /*child_scored_document_hits=*/ + {email2_scored_doc_hit, email3_scored_doc_hit}); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create parent ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec(); + parent_snippet_spec.set_num_to_snippet(1); + ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec); + + // Create child ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec(); + child_snippet_spec.set_num_to_snippet(3); + ResultSpecProto child_result_spec; + *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec); + + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>( + std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1, + joined_scored_document_hit2}, + /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + SectionRestrictQueryTermsMap({{"", {"person"}}})), + /*child_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + parent_result_spec, *document_store_); + + // Only 1 parent document should be snippeted, but all of the child documents + // should be snippeted. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + // Result1: Person1 for parent and [Email1] for children. + // Check parent doc (Person1). + EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty())); + // Check child docs. + ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1)); + EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(), + Not(IsEmpty())); + + // Result2: Person2 for parent and [Email2, Email3] for children. + // Check parent doc (Person2). + EXPECT_THAT(page_result.results.at(1).snippet().entries(), IsEmpty()); + // Check child docs. + ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2)); + EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(), + Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(), + Not(IsEmpty())); + + EXPECT_THAT(page_result.num_results_with_snippets, Eq(1)); +} + +TEST_F(ResultRetrieverV2SnippetTest, ShouldSnippetSomeJoinedResults) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id1, + document_store_->Put(CreatePersonDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId person_document_id2, + document_store_->Put(CreatePersonDocument(/*id=*/2))); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id1, + document_store_->Put(CreateEmailDocument(/*id=*/1))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id2, + document_store_->Put(CreateEmailDocument(/*id=*/2))); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId email_document_id3, + document_store_->Put(CreateEmailDocument(/*id=*/3))); + + std::vector<SectionId> person_hit_section_ids = { + GetSectionId("Person", "name")}; + std::vector<SectionId> email_hit_section_ids = { + GetSectionId("Email", "subject"), GetSectionId("Email", "body")}; + SectionIdMask person_hit_section_id_mask = + CreateSectionIdMask(person_hit_section_ids); + SectionIdMask email_hit_section_id_mask = + CreateSectionIdMask(email_hit_section_ids); + + ScoredDocumentHit person1_scored_doc_hit( + person_document_id1, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit person2_scored_doc_hit( + person_document_id2, person_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email1_scored_doc_hit( + email_document_id1, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email2_scored_doc_hit( + email_document_id2, email_hit_section_id_mask, /*score=*/0); + ScoredDocumentHit email3_scored_doc_hit( + email_document_id3, email_hit_section_id_mask, /*score=*/0); + + // Create JoinedScoredDocumentHits mapping: + // - Person1 to Email1 + // - Person2 to Email2, Email3 + JoinedScoredDocumentHit joined_scored_document_hit1( + /*final_score=*/0, /*parent_scored_document_hit=*/person1_scored_doc_hit, + /*child_scored_document_hits=*/ + {email1_scored_doc_hit}); + JoinedScoredDocumentHit joined_scored_document_hit2( + /*final_score=*/0, /*parent_scored_document_hit=*/person2_scored_doc_hit, + /*child_scored_document_hits=*/ + {email2_scored_doc_hit, email3_scored_doc_hit}); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetrieverV2> result_retriever, + ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // Create parent ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto parent_snippet_spec = CreateSnippetSpec(); + parent_snippet_spec.set_num_to_snippet(3); + ResultSpecProto parent_result_spec = CreateResultSpec(/*num_per_page=*/3); + *parent_result_spec.mutable_snippet_spec() = std::move(parent_snippet_spec); + + // Create child ResultSpec with custom snippet spec. + ResultSpecProto::SnippetSpecProto child_snippet_spec = CreateSnippetSpec(); + child_snippet_spec.set_num_to_snippet(2); + ResultSpecProto child_result_spec; + *child_result_spec.mutable_snippet_spec() = std::move(child_snippet_spec); + + ResultStateV2 result_state( + std::make_unique< + PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>( + std::vector<JoinedScoredDocumentHit>{joined_scored_document_hit1, + joined_scored_document_hit2}, + /*is_descending=*/false), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), parent_result_spec, + SectionRestrictQueryTermsMap({{"", {"person"}}})), + /*child_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>( + CreateSearchSpec(TermMatchType::EXACT_ONLY), + CreateScoringSpec(/*is_descending_order=*/false), child_result_spec, + SectionRestrictQueryTermsMap({{"", {"foo", "bar"}}})), + parent_result_spec, *document_store_); + + // All parents document should be snippeted. Only 2 child documents should be + // snippeted. + PageResult page_result = + result_retriever->RetrieveNextPage(result_state).first; + ASSERT_THAT(page_result.results, SizeIs(2)); + + // Result1: Person1 for parent and [Email1] for children. + // Check parent doc (Person1). + EXPECT_THAT(page_result.results.at(0).snippet().entries(), Not(IsEmpty())); + // Check child docs. + ASSERT_THAT(page_result.results.at(0).joined_results(), SizeIs(1)); + EXPECT_THAT(page_result.results.at(0).joined_results(0).snippet().entries(), + Not(IsEmpty())); + + // Result2: Person2 for parent and [Email2, Email3] for children. + // Check parent doc (Person2). + EXPECT_THAT(page_result.results.at(1).snippet().entries(), Not(IsEmpty())); + // Check child docs. + ASSERT_THAT(page_result.results.at(1).joined_results(), SizeIs(2)); + EXPECT_THAT(page_result.results.at(1).joined_results(0).snippet().entries(), + Not(IsEmpty())); + EXPECT_THAT(page_result.results.at(1).joined_results(1).snippet().entries(), + IsEmpty()); + + EXPECT_THAT(page_result.num_results_with_snippets, Eq(2)); +} + } // namespace } // namespace lib diff --git a/icing/result/result-retriever-v2_test.cc b/icing/result/result-retriever-v2_test.cc index 874a8f1..6c2aa67 100644 --- a/icing/result/result-retriever-v2_test.cc +++ b/icing/result/result-retriever-v2_test.cc @@ -27,9 +27,7 @@ #include "icing/portable/platform.h" #include "icing/proto/document.pb.h" #include "icing/proto/schema.pb.h" -#include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" -#include "icing/proto/term.pb.h" #include "icing/result/page-result.h" #include "icing/result/result-state-v2.h" #include "icing/schema-builder.h" @@ -173,9 +171,6 @@ class ResultRetrieverV2Test : public ::testing::Test { FakeClock fake_clock_; }; -// TODO(sungyc): Refactor helper functions below (builder classes or common test -// utility). - DocumentProto CreateDocument(int id) { return DocumentBuilder() .SetKey("icing", "Email/" + std::to_string(id)) @@ -194,19 +189,6 @@ SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { return mask; } -SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { - SearchSpecProto search_spec; - search_spec.set_term_match_type(match_type); - return search_spec; -} - -ScoringSpecProto CreateScoringSpec(bool is_descending_order) { - ScoringSpecProto scoring_spec; - scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC - : ScoringSpecProto::Order::ASC); - return scoring_spec; -} - ResultSpecProto CreateResultSpec( int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) { ResultSpecProto result_spec; @@ -295,8 +277,7 @@ TEST_F(ResultRetrieverV2Test, ShouldRetrieveSimpleResults) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), *doc_store); @@ -375,8 +356,7 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE), *doc_store); PageResult page_result1 = @@ -394,8 +374,7 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreNonInternalErrors) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/3, ResultSpecProto::NAMESPACE), *doc_store); PageResult page_result2 = @@ -445,8 +424,7 @@ TEST_F(ResultRetrieverV2Test, ShouldIgnoreInternalErrors) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), *doc_store); PageResult page_result = @@ -494,8 +472,7 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateResultState) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), *doc_store); @@ -567,9 +544,7 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), *doc_store); { @@ -595,9 +570,7 @@ TEST_F(ResultRetrieverV2Test, ShouldUpdateNumTotalHits) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/SectionRestrictQueryTermsMap{}, - CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), *doc_store); { @@ -684,8 +657,8 @@ TEST_F(ResultRetrieverV2Test, ShouldLimitNumTotalBytesPerPage) { PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *doc_store); // First page. Only result1 should be returned, since its byte size meets // num_total_bytes_per_page_threshold and ResultRetriever should terminate @@ -747,8 +720,8 @@ TEST_F(ResultRetrieverV2Test, PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *doc_store); // First page. Should return single result1 even though its byte size exceeds // num_total_bytes_per_page_threshold. @@ -809,8 +782,8 @@ TEST_F(ResultRetrieverV2Test, PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, *doc_store); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, *doc_store); // After retrieving result1, total bytes are still below the threshold and # // of results is still below num_per_page, so ResultRetriever should continue diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc deleted file mode 100644 index 37b212a..0000000 --- a/icing/result/result-retriever.cc +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (C) 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "icing/result/result-retriever.h" - -#include <string_view> -#include <utility> - -#include "icing/text_classifier/lib3/utils/base/statusor.h" -#include "icing/proto/document.pb.h" -#include "icing/proto/search.pb.h" -#include "icing/result/page-result-state.h" -#include "icing/result/projection-tree.h" -#include "icing/result/projector.h" -#include "icing/result/snippet-context.h" -#include "icing/util/status-macros.h" - -namespace icing { -namespace lib { - -libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>> -ResultRetriever::Create(const DocumentStore* doc_store, - const SchemaStore* schema_store, - const LanguageSegmenter* language_segmenter, - const Normalizer* normalizer, - bool ignore_bad_document_ids) { - ICING_RETURN_ERROR_IF_NULL(doc_store); - ICING_RETURN_ERROR_IF_NULL(schema_store); - ICING_RETURN_ERROR_IF_NULL(language_segmenter); - - ICING_ASSIGN_OR_RETURN( - std::unique_ptr<SnippetRetriever> snippet_retriever, - SnippetRetriever::Create(schema_store, language_segmenter, normalizer)); - - return std::unique_ptr<ResultRetriever>(new ResultRetriever( - doc_store, std::move(snippet_retriever), ignore_bad_document_ids)); -} - -libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>> -ResultRetriever::RetrieveResults( - const PageResultState& page_result_state) const { - std::vector<SearchResultProto::ResultProto> search_results; - search_results.reserve(page_result_state.scored_document_hits.size()); - - const SnippetContext& snippet_context = page_result_state.snippet_context; - // Calculates how many snippets to return for this page. - int remaining_num_to_snippet = snippet_context.snippet_spec.num_to_snippet() - - page_result_state.num_previously_returned; - - if (remaining_num_to_snippet < 0) { - remaining_num_to_snippet = 0; - } - - auto wildcard_projection_tree_itr = - page_result_state.projection_tree_map.find( - std::string(ProjectionTree::kSchemaTypeWildcard)); - for (const auto& scored_document_hit : - page_result_state.scored_document_hits) { - libtextclassifier3::StatusOr<DocumentProto> document_or = - doc_store_.Get(scored_document_hit.document_id()); - - if (!document_or.ok()) { - // Internal errors from document store are IO errors, return directly. - if (absl_ports::IsInternal(document_or.status())) { - return document_or.status(); - } - - if (ignore_bad_document_ids_) { - continue; - } else { - return document_or.status(); - } - } - - DocumentProto document = std::move(document_or).ValueOrDie(); - // Apply projection - auto itr = page_result_state.projection_tree_map.find(document.schema()); - if (itr != page_result_state.projection_tree_map.end()) { - projector::Project(itr->second.root().children, &document); - } else if (wildcard_projection_tree_itr != - page_result_state.projection_tree_map.end()) { - projector::Project(wildcard_projection_tree_itr->second.root().children, - &document); - } - - SearchResultProto::ResultProto result; - // Add the snippet if requested. - if (snippet_context.snippet_spec.num_matches_per_property() > 0 && - remaining_num_to_snippet > search_results.size()) { - SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet( - snippet_context.query_terms, snippet_context.match_type, - snippet_context.snippet_spec, document, - scored_document_hit.hit_section_id_mask()); - *result.mutable_snippet() = std::move(snippet_proto); - } - - // Add the document, itself. - *result.mutable_document() = std::move(document); - result.set_score(scored_document_hit.score()); - search_results.push_back(std::move(result)); - } - return search_results; -} - -} // namespace lib -} // namespace icing diff --git a/icing/result/result-retriever.h b/icing/result/result-retriever.h deleted file mode 100644 index ade8441..0000000 --- a/icing/result/result-retriever.h +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright (C) 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef ICING_RESULT_RETRIEVER_H_ -#define ICING_RESULT_RETRIEVER_H_ - -#include <utility> -#include <vector> - -#include "icing/text_classifier/lib3/utils/base/statusor.h" -#include "icing/proto/search.pb.h" -#include "icing/query/query-terms.h" -#include "icing/result/page-result-state.h" -#include "icing/result/snippet-context.h" -#include "icing/result/snippet-retriever.h" -#include "icing/schema/schema-store.h" -#include "icing/schema/section.h" -#include "icing/scoring/scored-document-hit.h" -#include "icing/store/document-id.h" -#include "icing/store/document-store.h" -#include "icing/tokenization/language-segmenter.h" -#include "icing/transform/normalizer.h" - -namespace icing { -namespace lib { - -class ResultRetriever { - public: - // Factory function to create a ResultRetriever which does not take ownership - // of any input components, and all pointers must refer to valid objects that - // outlive the created ResultRetriever instance. - // - // Returns: - // A ResultRetriever on success - // FAILED_PRECONDITION on any null pointer input - static libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>> Create( - const DocumentStore* doc_store, const SchemaStore* schema_store, - const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, - bool ignore_bad_document_ids = true); - - // Retrieves results (pairs of DocumentProtos and SnippetProtos) with the - // given document and snippet information. The expected number of documents to - // return is the number of all scored document hits inside PageResultState. - // The number of snippets to return is based on the total number of snippets - // needed and number of snippets that have already been returned previously - // for the same query. The order of results returned is the same as the order - // of scored document hits inside PageResultState. - // - // "ignore_bad_document_ids" from constructor indicates whether to ignore - // invalid and non-existing document ids. If it's true, errors on some - // document ids will be ignored and valid documents will be returned, - // otherwise any error will be returned immediately. Note that IO errors will - // always be returned. - // - // Returns when ignore_bad_document_ids is true: - // A list of ResultProto on success - // INTERNAL_ERROR on IO error - // - // Returns when ignore_bad_document_ids is false: - // A list of ResultProto on success - // INVALID_ARGUMENT if any document_id < 0 - // NOT_FOUND if any doc doesn't exist or has been deleted - // INTERNAL_ERROR on IO error - libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>> - RetrieveResults(const PageResultState& page_result_state) const; - - private: - explicit ResultRetriever(const DocumentStore* doc_store, - std::unique_ptr<SnippetRetriever> snippet_retriever, - bool ignore_bad_document_ids) - : doc_store_(*doc_store), - snippet_retriever_(std::move(snippet_retriever)), - ignore_bad_document_ids_(ignore_bad_document_ids) {} - - const DocumentStore& doc_store_; - std::unique_ptr<SnippetRetriever> snippet_retriever_; - const bool ignore_bad_document_ids_; -}; - -} // namespace lib -} // namespace icing - -#endif // ICING_RESULT_RETRIEVER_H_ diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc deleted file mode 100644 index 044e0f2..0000000 --- a/icing/result/result-retriever_test.cc +++ /dev/null @@ -1,1951 +0,0 @@ -// Copyright (C) 2019 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "icing/result/result-retriever.h" - -#include <limits> -#include <memory> -#include <string_view> -#include <unordered_map> - -#include "gtest/gtest.h" -#include "icing/document-builder.h" -#include "icing/file/mock-filesystem.h" -#include "icing/portable/equals-proto.h" -#include "icing/portable/platform.h" -#include "icing/proto/document.pb.h" -#include "icing/proto/schema.pb.h" -#include "icing/proto/search.pb.h" -#include "icing/proto/term.pb.h" -#include "icing/result/projection-tree.h" -#include "icing/schema-builder.h" -#include "icing/schema/schema-store.h" -#include "icing/schema/section.h" -#include "icing/store/document-id.h" -#include "icing/testing/common-matchers.h" -#include "icing/testing/fake-clock.h" -#include "icing/testing/icu-data-file-helper.h" -#include "icing/testing/test-data.h" -#include "icing/testing/tmp-directory.h" -#include "icing/tokenization/language-segmenter-factory.h" -#include "icing/transform/normalizer-factory.h" -#include "icing/transform/normalizer.h" -#include "icing/util/snippet-helpers.h" -#include "unicode/uloc.h" - -namespace icing { -namespace lib { - -namespace { -using ::icing::lib::portable_equals_proto::EqualsProto; -using ::testing::ElementsAre; -using ::testing::Eq; -using ::testing::IsEmpty; -using ::testing::Return; -using ::testing::SizeIs; - -class ResultRetrieverTest : public testing::Test { - protected: - ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") { - filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); - } - - void SetUp() override { - if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { - ICING_ASSERT_OK( - // File generated via icu_data_file rule in //icing/BUILD. - icu_data_file_helper::SetUpICUDataFile( - GetTestFilePath("icing/icu.dat"))); - } - language_segmenter_factory::SegmenterOptions options(ULOC_US); - ICING_ASSERT_OK_AND_ASSIGN( - language_segmenter_, - language_segmenter_factory::Create(std::move(options))); - - ICING_ASSERT_OK_AND_ASSIGN( - schema_store_, - SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); - ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create( - /*max_term_byte_size=*/10000)); - - SchemaProto schema = - SchemaBuilder() - .AddType(SchemaTypeConfigBuilder() - .SetType("Email") - .AddProperty(PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("body") - .SetDataTypeString(TERM_MATCH_EXACT, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty( - PropertyConfigBuilder() - .SetName("sender") - .SetDataTypeDocument( - "Person", /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_OPTIONAL))) - .AddType( - SchemaTypeConfigBuilder() - .SetType("Person") - .AddProperty(PropertyConfigBuilder() - .SetName("name") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL)) - .AddProperty(PropertyConfigBuilder() - .SetName("emailAddress") - .SetDataTypeString(TERM_MATCH_PREFIX, - TOKENIZER_PLAIN) - .SetCardinality(CARDINALITY_OPTIONAL))) - .Build(); - ASSERT_THAT(schema_store_->SetSchema(schema), IsOk()); - } - - void TearDown() override { - filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); - } - - SectionId GetSectionId(const std::string& type, const std::string& property) { - auto type_id_or = schema_store_->GetSchemaTypeId(type); - if (!type_id_or.ok()) { - return kInvalidSectionId; - } - SchemaTypeId type_id = type_id_or.ValueOrDie(); - for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) { - auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id); - if (!metadata_or.ok()) { - break; - } - const SectionMetadata* metadata = metadata_or.ValueOrDie(); - if (metadata->path == property) { - return metadata->id; - } - } - return kInvalidSectionId; - } - - const Filesystem filesystem_; - const std::string test_dir_; - std::unique_ptr<LanguageSegmenter> language_segmenter_; - std::unique_ptr<SchemaStore> schema_store_; - std::unique_ptr<Normalizer> normalizer_; - FakeClock fake_clock_; -}; - -ResultSpecProto::SnippetSpecProto CreateSnippetSpec() { - ResultSpecProto::SnippetSpecProto snippet_spec; - snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max()); - snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max()); - snippet_spec.set_max_window_utf32_length(1024); - return snippet_spec; -} - -DocumentProto CreateDocument(int id) { - return DocumentBuilder() - .SetKey("icing", "Email/" + std::to_string(id)) - .SetSchema("Email") - .AddStringProperty("name", "subject foo " + std::to_string(id)) - .AddStringProperty("body", "body bar " + std::to_string(id)) - .SetCreationTimestampMs(1574365086666 + id) - .Build(); -} - -SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { - SectionIdMask mask = 0; - for (SectionId section_id : section_ids) { - mask |= (UINT64_C(1) << section_id); - } - return mask; -} - -TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) { - EXPECT_THAT( - ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(), - language_segmenter_.get(), normalizer_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - EXPECT_THAT( - ResultRetriever::Create(doc_store.get(), /*schema_store=*/nullptr, - language_segmenter_.get(), normalizer_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(), - /*language_segmenter=*/nullptr, - normalizer_.get()), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); - EXPECT_THAT(ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), - /*normalizer=*/nullptr), - StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION)); -} - -TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/19}, - {document_id2, hit_section_id_mask, /*score=*/5}, - {document_id3, hit_section_id_mask, /*score=*/1}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - SearchResultProto::ResultProto result1; - *result1.mutable_document() = CreateDocument(/*id=*/1); - result1.set_score(19); - SearchResultProto::ResultProto result2; - *result2.mutable_document() = CreateDocument(/*id=*/2); - result2.set_score(5); - SearchResultProto::ResultProto result3; - *result3.mutable_document() = CreateDocument(/*id=*/3); - result3.set_score(1); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - EXPECT_THAT( - result_retriever->RetrieveResults(page_result_state), - IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2), - EqualsProto(result3)))); -} - -TEST_F(ResultRetrieverTest, IgnoreErrors) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - - DocumentId invalid_document_id = -1; - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/12}, - {document_id2, hit_section_id_mask, /*score=*/4}, - {invalid_document_id, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get(), - /*ignore_bad_document_ids=*/true)); - - SearchResultProto::ResultProto result1; - *result1.mutable_document() = CreateDocument(/*id=*/1); - result1.set_score(12); - SearchResultProto::ResultProto result2; - *result2.mutable_document() = CreateDocument(/*id=*/2); - result2.set_score(4); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - EXPECT_THAT( - result_retriever->RetrieveResults(page_result_state), - IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2)))); -} - -TEST_F(ResultRetrieverTest, NotIgnoreErrors) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - - DocumentId invalid_document_id = -1; - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {invalid_document_id, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get(), - /*ignore_bad_document_ids=*/false)); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), - StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); - - DocumentId non_existing_document_id = 4; - page_result_state.scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {non_existing_document_id, hit_section_id_mask, /*score=*/0}}; - EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), - StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); -} - -TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) { - MockFilesystem mock_filesystem; - ON_CALL(mock_filesystem, PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>())) - .WillByDefault(Return(false)); - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get(), - /*ignore_bad_document_ids=*/true)); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); -} - -TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> results, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(results, SizeIs(3)); - EXPECT_THAT(results.at(0).snippet(), - EqualsProto(SnippetProto::default_instance())); - EXPECT_THAT(results.at(1).snippet(), - EqualsProto(SnippetProto::default_instance())); - EXPECT_THAT(results.at(2).snippet(), - EqualsProto(SnippetProto::default_instance())); -} - -TEST_F(ResultRetrieverTest, SimpleSnippeted) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - SnippetContext snippet_context( - /*query_terms_in=*/{{"", {"foo", "bar"}}}, CreateSnippetSpec(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - EXPECT_THAT(result, SizeIs(3)); - - const DocumentProto& result_document_one = result.at(0).document(); - const SnippetProto& result_snippet_one = result.at(0).snippet(); - EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1))); - EXPECT_THAT(result_snippet_one.entries(), SizeIs(2)); - EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body")); - std::string_view content = GetString( - &result_document_one, result_snippet_one.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)), - ElementsAre("body bar 1")); - EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)), - ElementsAre("bar")); - EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name")); - content = GetString(&result_document_one, - result_snippet_one.entries(1).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)), - ElementsAre("subject foo 1")); - EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)), - ElementsAre("foo")); - - const DocumentProto& result_document_two = result.at(1).document(); - const SnippetProto& result_snippet_two = result.at(1).snippet(); - EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2))); - EXPECT_THAT(result_snippet_two.entries(), SizeIs(2)); - EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body")); - content = GetString(&result_document_two, - result_snippet_two.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)), - ElementsAre("body bar 2")); - EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)), - ElementsAre("bar")); - EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name")); - content = GetString(&result_document_two, - result_snippet_two.entries(1).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)), - ElementsAre("subject foo 2")); - EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)), - ElementsAre("foo")); - - const DocumentProto& result_document_three = result.at(2).document(); - const SnippetProto& result_snippet_three = result.at(2).snippet(); - EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3))); - EXPECT_THAT(result_snippet_three.entries(), SizeIs(2)); - EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body")); - content = GetString(&result_document_three, - result_snippet_three.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)), - ElementsAre("body bar 3")); - EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)), - ElementsAre("bar")); - EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name")); - content = GetString(&result_document_three, - result_snippet_three.entries(1).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)), - ElementsAre("subject foo 3")); - EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)), - ElementsAre("foo")); -} - -TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); - snippet_spec.set_num_to_snippet(1); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - SnippetContext snippet_context(/*query_terms_in=*/{{"", {"foo", "bar"}}}, - snippet_spec, TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - EXPECT_THAT(result, SizeIs(3)); - - const DocumentProto& result_document = result.at(0).document(); - const SnippetProto& result_snippet = result.at(0).snippet(); - EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1))); - EXPECT_THAT(result_snippet.entries(), SizeIs(2)); - EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body")); - std::string_view content = - GetString(&result_document, result_snippet.entries(0).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet.entries(0)), - ElementsAre("body bar 1")); - EXPECT_THAT(GetMatches(content, result_snippet.entries(0)), - ElementsAre("bar")); - EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name")); - content = - GetString(&result_document, result_snippet.entries(1).property_name()); - EXPECT_THAT(GetWindows(content, result_snippet.entries(1)), - ElementsAre("subject foo 1")); - EXPECT_THAT(GetMatches(content, result_snippet.entries(1)), - ElementsAre("foo")); - - EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2))); - EXPECT_THAT(result[1].snippet(), - EqualsProto(SnippetProto::default_instance())); - - EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3))); - EXPECT_THAT(result[2].snippet(), - EqualsProto(SnippetProto::default_instance())); -} - -TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); - snippet_spec.set_num_to_snippet(5); - SnippetContext snippet_context( - /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/3); - - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - // num_to_snippet = 5, num_previously_returned_in = 0, - // We can return 5 - 0 = 5 snippets at most. We're able to return all 3 - // snippets here. - ASSERT_THAT(result, SizeIs(3)); - EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty())); - EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty())); - EXPECT_THAT(result[2].snippet().entries(), Not(IsEmpty())); -} - -TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); - snippet_spec.set_num_to_snippet(5); - SnippetContext snippet_context( - /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/3, - /*num_per_page_in=*/3); - - // num_to_snippet = 5, num_previously_returned_in = 3, - // We can return 5 - 3 = 2 snippets. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(3)); - EXPECT_THAT(result[0].snippet().entries(), Not(IsEmpty())); - EXPECT_THAT(result[1].snippet().entries(), Not(IsEmpty())); - EXPECT_THAT(result[2].snippet().entries(), IsEmpty()); -} - -TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(CreateDocument(/*id=*/1))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(CreateDocument(/*id=*/2))); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, - doc_store->Put(CreateDocument(/*id=*/3))); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}, - {document_id3, hit_section_id_mask, /*score=*/0}}; - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); - snippet_spec.set_num_to_snippet(5); - SnippetContext snippet_context( - /*query_terms_in=*/{{"", {"foo", "bar"}}}, std::move(snippet_spec), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), - std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/6, - /*num_per_page_in=*/3); - - // num_to_snippet = 5, num_previously_returned_in = 6, - // We can't return any snippets for this page. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(3)); - EXPECT_THAT(result[0].snippet().entries(), IsEmpty()); - EXPECT_THAT(result[1].snippet().entries(), IsEmpty()); - EXPECT_THAT(result[2].snippet().entries(), IsEmpty()); -} - -TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results only contain the 'name' property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("sender.name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results only contain the 'sender.name' - // property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .Build()) - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .Build()) - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("sender"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results only contain the 'sender' - // property and all of the subproperties of 'sender'. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("sender.name"); - type_property_mask.add_paths("sender.emailAddress"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results only contain the 'sender.name' and - // 'sender.address' properties. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetSchema("Person") - .AddStringProperty("name", "Meg Ryan") - .AddStringProperty("emailAddress", "shopgirl@aol.com") - .Build()) - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty( - "sender", DocumentBuilder() - .SetKey("namespace", "uri2") - .SetSchema("Person") - .AddStringProperty("name", "Tom Hanks") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build()) - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results contain *no* properties. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("nonExistentProperty"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results contain *no* properties. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two Email documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .AddStringProperty("body", - "Count all the sheep and tell them 'Hello'.") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("name"); - type_property_mask.add_paths("nonExistentProperty"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned results only contain the 'name' property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Goodnight Moon!") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionMultipleTypesNoWildcards) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask type_property_mask; - type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned Email results only contain the 'name' - // property and the returned Person results have all of their properties. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcard) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask wildcard_type_property_mask; - wildcard_type_property_mask.set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); - wildcard_type_property_mask.add_paths("name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(wildcard_type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned Email results only contain the 'name' - // property and the returned Person results only contain the 'name' property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcardWithOneOverride) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask email_type_property_mask; - email_type_property_mask.set_schema_type("Email"); - email_type_property_mask.add_paths("body"); - TypePropertyMask wildcard_type_property_mask; - wildcard_type_property_mask.set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); - wildcard_type_property_mask.add_paths("name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(email_type_property_mask)}); - type_projection_tree_map.insert( - {std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(wildcard_type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned Email results only contain the 'body' - // property and the returned Person results only contain the 'name' property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, ProjectionSingleTypesWildcardAndOverride) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Person") - .AddStringProperty("name", "Mr. Body") - .AddStringProperty("emailAddress", "mr.body123@gmail.com") - .Build()) - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask email_type_property_mask; - email_type_property_mask.set_schema_type("Email"); - email_type_property_mask.add_paths("sender.name"); - TypePropertyMask wildcard_type_property_mask; - wildcard_type_property_mask.set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); - wildcard_type_property_mask.add_paths("name"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(email_type_property_mask)}); - type_projection_tree_map.insert( - {std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(wildcard_type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned Email results only contain the 'sender.name' - // property and the returned Person results only contain the 'name' property. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Person") - .AddStringProperty("name", "Mr. Body") - .Build()) - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -TEST_F(ResultRetrieverTest, - ProjectionSingleTypesWildcardAndOverrideNestedProperty) { - ICING_ASSERT_OK_AND_ASSIGN( - DocumentStore::CreateResult create_result, - DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, - schema_store_.get())); - std::unique_ptr<DocumentStore> doc_store = - std::move(create_result.document_store); - - // 1. Add two documents - DocumentProto document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddStringProperty("name", "Hello World!") - .AddStringProperty( - "body", "Oh what a beautiful morning! Oh what a beautiful day!") - .AddDocumentProperty( - "sender", - DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Person") - .AddStringProperty("name", "Mr. Body") - .AddStringProperty("emailAddress", "mr.body123@gmail.com") - .Build()) - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, - doc_store->Put(document_one)); - - DocumentProto document_two = - DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .AddStringProperty("name", "Joe Fox") - .AddStringProperty("emailAddress", "ny152@aol.com") - .Build(); - ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, - doc_store->Put(document_two)); - - // 2. Setup the scored results. - std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), - GetSectionId("Email", "body")}; - SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); - std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, hit_section_id_mask, /*score=*/0}, - {document_id2, hit_section_id_mask, /*score=*/0}}; - - TypePropertyMask email_type_property_mask; - email_type_property_mask.set_schema_type("Email"); - email_type_property_mask.add_paths("sender.name"); - TypePropertyMask wildcard_type_property_mask; - wildcard_type_property_mask.set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); - wildcard_type_property_mask.add_paths("sender"); - std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; - type_projection_tree_map.insert( - {"Email", ProjectionTree(email_type_property_mask)}); - type_projection_tree_map.insert( - {std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(wildcard_type_property_mask)}); - - SnippetContext snippet_context( - /*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::EXACT_ONLY); - PageResultState page_result_state( - std::move(scored_document_hits), /*next_page_token_in=*/1, - std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0, - /*num_per_page_in=*/2); - - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<ResultRetriever> result_retriever, - ResultRetriever::Create(doc_store.get(), schema_store_.get(), - language_segmenter_.get(), normalizer_.get())); - - // 3. Verify that the returned Email results only contain the 'sender.name' - // property and the returned Person results contain no properties. - ICING_ASSERT_OK_AND_ASSIGN( - std::vector<SearchResultProto::ResultProto> result, - result_retriever->RetrieveResults(page_result_state)); - ASSERT_THAT(result, SizeIs(2)); - - DocumentProto projected_document_one = - DocumentBuilder() - .SetKey("namespace", "uri1") - .SetCreationTimestampMs(1000) - .SetSchema("Email") - .AddDocumentProperty("sender", - DocumentBuilder() - .SetKey("namespace", "uri") - .SetSchema("Person") - .AddStringProperty("name", "Mr. Body") - .Build()) - .Build(); - EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); - - DocumentProto projected_document_two = DocumentBuilder() - .SetKey("namespace", "uri2") - .SetCreationTimestampMs(1000) - .SetSchema("Person") - .Build(); - EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); -} - -} // namespace - -} // namespace lib -} // namespace icing diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc index 2783fe2..f2fb94f 100644 --- a/icing/result/result-state-manager.cc +++ b/icing/result/result-state-manager.cc @@ -18,9 +18,8 @@ #include <queue> #include <utility> -#include "icing/proto/search.pb.h" -#include "icing/query/query-terms.h" #include "icing/result/page-result.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/result/result-state-v2.h" #include "icing/scoring/scored-document-hits-ranker.h" @@ -43,8 +42,8 @@ ResultStateManager::ResultStateManager(int max_total_hits, libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> ResultStateManager::CacheAndRetrieveFirstPage( std::unique_ptr<ScoredDocumentHitsRanker> ranker, - SectionRestrictQueryTermsMap query_terms, - const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info, + std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info, const ResultSpecProto& result_spec, const DocumentStore& document_store, const ResultRetrieverV2& result_retriever) { if (ranker == nullptr) { @@ -54,8 +53,8 @@ ResultStateManager::CacheAndRetrieveFirstPage( // Create shared pointer of ResultState. // ResultState should be created by ResultStateManager only. std::shared_ptr<ResultStateV2> result_state = std::make_shared<ResultStateV2>( - std::move(ranker), std::move(query_terms), search_spec, scoring_spec, - result_spec, document_store); + std::move(ranker), std::move(parent_adjustment_info), + std::move(child_adjustment_info), result_spec, document_store); // Retrieve docs outside of ResultStateManager critical section. // Will enter ResultState critical section inside ResultRetriever. diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h index e2bc797..400187f 100644 --- a/icing/result/result-state-manager.h +++ b/icing/result/result-state-manager.h @@ -24,10 +24,9 @@ #include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/mutex.h" -#include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" -#include "icing/query/query-terms.h" #include "icing/result/page-result.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/result/result-state-v2.h" #include "icing/scoring/scored-document-hits-ranker.h" @@ -61,6 +60,10 @@ class ResultStateManager { // result states if exceeding the cache size limit. next_page_token will be // set to a default value kInvalidNextPageToken if there're no more pages. // + // NOTE: parent_adjustment_info and child_adjustment_info can be nullptr if + // there is no requirement to apply adjustment (snippet, projection) to + // them. + // // NOTE: it is possible to have empty result for the first page even if the // ranker was not empty before the retrieval, since GroupResultLimiter // may filter out all docs. In this case, the first page is also the @@ -70,14 +73,12 @@ class ResultStateManager { // A token and PageResult wrapped by std::pair on success // INVALID_ARGUMENT if the input ranker is null or contains no results libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>> - CacheAndRetrieveFirstPage(std::unique_ptr<ScoredDocumentHitsRanker> ranker, - SectionRestrictQueryTermsMap query_terms, - const SearchSpecProto& search_spec, - const ScoringSpecProto& scoring_spec, - const ResultSpecProto& result_spec, - const DocumentStore& document_store, - const ResultRetrieverV2& result_retriever) - ICING_LOCKS_EXCLUDED(mutex_); + CacheAndRetrieveFirstPage( + std::unique_ptr<ScoredDocumentHitsRanker> ranker, + std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info, + std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info, + const ResultSpecProto& result_spec, const DocumentStore& document_store, + const ResultRetrieverV2& result_retriever) ICING_LOCKS_EXCLUDED(mutex_); // Retrieves and returns PageResult for the next page. // The returned results won't exist in ResultStateManager anymore. If the diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc index c8af5fe..c0ea49a 100644 --- a/icing/result/result-state-manager_test.cc +++ b/icing/result/result-state-manager_test.cc @@ -20,6 +20,7 @@ #include "icing/file/filesystem.h" #include "icing/portable/equals-proto.h" #include "icing/result/page-result.h" +#include "icing/result/result-adjustment-info.h" #include "icing/result/result-retriever-v2.h" #include "icing/schema/schema-store.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" @@ -47,9 +48,6 @@ using ::testing::Not; using ::testing::SizeIs; using PageResultInfo = std::pair<uint64_t, PageResult>; -// TODO(sungyc): Refactor helper functions below (builder classes or common test -// utility). - ScoringSpecProto CreateScoringSpec() { ScoringSpecProto scoring_spec; scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); @@ -196,9 +194,8 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageOnePage) { ICING_ASSERT_OK_AND_ASSIGN( PageResultInfo page_result_info, result_state_manager.CacheAndRetrieveFirstPage( - std::move(ranker), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + std::move(ranker), /*parent_adjustment_info=*/nullptr, + /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/10, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -243,9 +240,8 @@ TEST_F(ResultStateManagerTest, ShouldCacheAndRetrieveFirstPageMultiplePages) { ICING_ASSERT_OK_AND_ASSIGN( PageResultInfo page_result_info1, result_state_manager.CacheAndRetrieveFirstPage( - std::move(ranker), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + std::move(ranker), /*parent_adjustment_info=*/nullptr, + /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken))); @@ -290,9 +286,8 @@ TEST_F(ResultStateManagerTest, NullRankerShouldReturnError) { EXPECT_THAT( result_state_manager.CacheAndRetrieveFirstPage( - /*ranker=*/nullptr, - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*ranker=*/nullptr, /*parent_adjustment_info=*/nullptr, + /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever()), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); @@ -308,8 +303,7 @@ TEST_F(ResultStateManagerTest, EmptyRankerShouldReturnEmptyFirstPage) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::vector<ScoredDocumentHit>(), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -347,9 +341,8 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyFirstPage) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), result_spec, document_store(), - result_retriever())); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, document_store(), result_retriever())); // If the first page has no result, then it should be the last page. EXPECT_THAT(page_result_info.first, Eq(kInvalidNextPageToken)); EXPECT_THAT(page_result_info.second.results, IsEmpty()); @@ -391,9 +384,8 @@ TEST_F(ResultStateManagerTest, ShouldAllowEmptyLastPage) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), result_spec, document_store(), - result_retriever())); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, document_store(), result_retriever())); EXPECT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken))); ASSERT_THAT(page_result_info1.second.results, SizeIs(2)); EXPECT_THAT(page_result_info1.second.results.at(0).document(), @@ -437,7 +429,10 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - query_terms, search_spec, scoring_spec, result_spec, document_store(), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec, + result_spec, query_terms), + /*child_adjustment_info=*/nullptr, result_spec, document_store(), result_retriever())); ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken))); @@ -449,7 +444,10 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - query_terms, search_spec, scoring_spec, result_spec, document_store(), + /*parent_adjustment_info=*/ + std::make_unique<ResultAdjustmentInfo>(search_spec, scoring_spec, + result_spec, query_terms), + /*child_adjustment_info=*/nullptr, result_spec, document_store(), result_retriever())); // Calling CacheAndRetrieveFirstPage() on state 2 should invalidate the @@ -484,8 +482,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); ASSERT_THAT(page_result_info1.first, Not(Eq(kInvalidNextPageToken))); @@ -498,8 +495,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); ASSERT_THAT(page_result_info2.first, Not(Eq(kInvalidNextPageToken))); @@ -542,8 +538,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); ASSERT_THAT(page_result_info.first, Not(Eq(kInvalidNextPageToken))); @@ -589,8 +584,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -600,8 +594,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -639,8 +632,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -650,8 +642,7 @@ TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -685,8 +676,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -696,8 +686,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -708,8 +697,7 @@ TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -755,8 +743,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -766,8 +753,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -777,8 +763,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -797,8 +782,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits4), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -851,8 +835,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -862,8 +845,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -873,8 +855,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -897,8 +878,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits4), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -908,8 +888,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits5), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -919,8 +898,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits6), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -982,8 +960,7 @@ TEST_F( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -993,8 +970,7 @@ TEST_F( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1004,8 +980,7 @@ TEST_F( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1024,8 +999,7 @@ TEST_F( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits4), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1040,8 +1014,7 @@ TEST_F( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits5), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1097,8 +1070,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1108,8 +1080,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1119,8 +1090,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1144,8 +1114,7 @@ TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits4), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1198,8 +1167,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1209,8 +1177,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1220,8 +1187,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1245,8 +1211,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits4), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1261,8 +1226,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits5), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1316,8 +1280,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1327,8 +1290,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1345,8 +1307,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits3), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); EXPECT_THAT(page_result_info3.first, Not(Eq(kInvalidNextPageToken))); @@ -1420,8 +1381,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1435,8 +1395,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/1, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); @@ -1474,8 +1433,7 @@ TEST_F(ResultStateManagerTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE), document_store(), result_retriever())); diff --git a/icing/result/result-state-manager_thread-safety_test.cc b/icing/result/result-state-manager_thread-safety_test.cc index 0da37d8..55eda85 100644 --- a/icing/result/result-state-manager_thread-safety_test.cc +++ b/icing/result/result-state-manager_thread-safety_test.cc @@ -26,7 +26,6 @@ #include "icing/result/result-state-manager.h" #include "icing/schema/schema-store.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" -#include "icing/scoring/scored-document-hits-ranker.h" #include "icing/store/document-store.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" @@ -49,12 +48,6 @@ using ::testing::Not; using ::testing::SizeIs; using PageResultInfo = std::pair<uint64_t, PageResult>; -ScoringSpecProto CreateScoringSpec() { - ScoringSpecProto scoring_spec; - scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - return scoring_spec; -} - ResultSpecProto CreateResultSpec(int num_per_page) { ResultSpecProto result_spec; result_spec.set_num_per_page(num_per_page); @@ -163,9 +156,8 @@ TEST_F(ResultStateManagerThreadSafetyTest, std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), CreateResultSpec(kNumPerPage), *document_store_, - *result_retriever_)); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_)); ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage)); for (int i = 0; i < kNumPerPage; ++i) { ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i)); @@ -264,9 +256,8 @@ TEST_F(ResultStateManagerThreadSafetyTest, InvalidateResultStateWhileUsing) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/false), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), CreateResultSpec(kNumPerPage), *document_store_, - *result_retriever_)); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + CreateResultSpec(kNumPerPage), *document_store_, *result_retriever_)); ASSERT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage)); for (int i = 0; i < kNumPerPage; ++i) { ASSERT_THAT(page_result_info1.second.results[i].score(), Eq(i)); @@ -394,8 +385,8 @@ TEST_F(ResultStateManagerThreadSafetyTest, MultipleResultStates) { std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits_copy), /*is_descending=*/false), - /*query_terms=*/{}, SearchSpecProto::default_instance(), - CreateScoringSpec(), CreateResultSpec(kNumPerPage), + /*parent_adjustment_info=*/nullptr, + /*child_adjustment_info=*/nullptr, CreateResultSpec(kNumPerPage), *document_store_, *result_retriever)); EXPECT_THAT(page_result_info1.second.results, SizeIs(kNumPerPage)); for (int i = 0; i < kNumPerPage; ++i) { diff --git a/icing/result/result-state-v2.cc b/icing/result/result-state-v2.cc index e42620e..9459910 100644 --- a/icing/result/result-state-v2.cc +++ b/icing/result/result-state-v2.cc @@ -15,54 +15,33 @@ #include "icing/result/result-state-v2.h" #include <atomic> +#include <cstdint> #include <memory> +#include <string> +#include <vector> -#include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" -#include "icing/proto/term.pb.h" -#include "icing/result/projection-tree.h" -#include "icing/result/snippet-context.h" +#include "icing/result/result-adjustment-info.h" #include "icing/scoring/scored-document-hits-ranker.h" +#include "icing/store/document-store.h" namespace icing { namespace lib { -namespace { -SnippetContext CreateSnippetContext(SectionRestrictQueryTermsMap query_terms, - const SearchSpecProto& search_spec, - const ResultSpecProto& result_spec) { - if (result_spec.snippet_spec().num_to_snippet() > 0 && - result_spec.snippet_spec().num_matches_per_property() > 0) { - // Needs snippeting - return SnippetContext(std::move(query_terms), result_spec.snippet_spec(), - search_spec.term_match_type()); - } - return SnippetContext(/*query_terms_in=*/{}, - ResultSpecProto::SnippetSpecProto::default_instance(), - TermMatchType::UNKNOWN); -} -} // namespace - ResultStateV2::ResultStateV2( std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in, - SectionRestrictQueryTermsMap query_terms, - const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info, + std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info, const ResultSpecProto& result_spec, const DocumentStore& document_store) : scored_document_hits_ranker(std::move(scored_document_hits_ranker_in)), num_returned(0), - snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec, - result_spec)), + parent_adjustment_info_(std::move(parent_adjustment_info)), + child_adjustment_info_(std::move(child_adjustment_info)), num_per_page_(result_spec.num_per_page()), num_total_bytes_per_page_threshold_( result_spec.num_total_bytes_per_page_threshold()), num_total_hits_(nullptr), result_group_type_(result_spec.result_group_type()) { - for (const TypePropertyMask& type_field_mask : - result_spec.type_property_masks()) { - projection_tree_map_.insert( - {type_field_mask.schema_type(), ProjectionTree(type_field_mask)}); - } - for (const ResultSpecProto::ResultGrouping& result_grouping : result_spec.result_groupings()) { int group_id = group_result_limits.size(); diff --git a/icing/result/result-state-v2.h b/icing/result/result-state-v2.h index df2f070..b01bee9 100644 --- a/icing/result/result-state-v2.h +++ b/icing/result/result-state-v2.h @@ -22,13 +22,10 @@ #include <vector> #include "icing/absl_ports/mutex.h" -#include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" -#include "icing/result/projection-tree.h" -#include "icing/result/snippet-context.h" +#include "icing/result/result-adjustment-info.h" #include "icing/scoring/scored-document-hits-ranker.h" #include "icing/store/document-store.h" -#include "icing/store/namespace-id.h" namespace icing { namespace lib { @@ -39,8 +36,8 @@ class ResultStateV2 { public: explicit ResultStateV2( std::unique_ptr<ScoredDocumentHitsRanker> scored_document_hits_ranker_in, - SectionRestrictQueryTermsMap query_terms, - const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, + std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info, + std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info, const ResultSpecProto& result_spec, const DocumentStore& document_store); ~ResultStateV2(); @@ -60,14 +57,28 @@ class ResultStateV2 { void IncrementNumTotalHits(int increment_by) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex); - const SnippetContext& snippet_context() const + // Returns a nullable pointer to parent adjustment info. + ResultAdjustmentInfo* parent_adjustment_info() + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) { + return parent_adjustment_info_.get(); + } + + // Returns a nullable pointer to parent adjustment info. + const ResultAdjustmentInfo* parent_adjustment_info() const ICING_SHARED_LOCKS_REQUIRED(mutex) { - return snippet_context_; + return parent_adjustment_info_.get(); + } + + // Returns a nullable pointer to child adjustment info. + ResultAdjustmentInfo* child_adjustment_info() + ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex) { + return child_adjustment_info_.get(); } - const std::unordered_map<std::string, ProjectionTree>& projection_tree_map() - const ICING_SHARED_LOCKS_REQUIRED(mutex) { - return projection_tree_map_; + // Returns a nullable pointer to child adjustment info. + const ResultAdjustmentInfo* child_adjustment_info() const + ICING_SHARED_LOCKS_REQUIRED(mutex) { + return child_adjustment_info_.get(); } const std::unordered_map<int32_t, int>& entry_id_group_id_map() const @@ -110,11 +121,16 @@ class ResultStateV2 { int num_returned ICING_GUARDED_BY(mutex); private: - // Information needed for snippeting. - SnippetContext snippet_context_ ICING_GUARDED_BY(mutex); + // Adjustment information for parent documents, including snippet and + // projection. Can be nullptr if there is no adjustment info for parent + // documents. + std::unique_ptr<ResultAdjustmentInfo> parent_adjustment_info_ + ICING_GUARDED_BY(mutex); - // Information needed for projection. - std::unordered_map<std::string, ProjectionTree> projection_tree_map_ + // Adjustment information for child documents, including snippet and + // projection. This is only used for join query. Can be nullptr if there is no + // adjustment info for child documents. + std::unique_ptr<ResultAdjustmentInfo> child_adjustment_info_ ICING_GUARDED_BY(mutex); // A map between result grouping entry id and the id of the group that it diff --git a/icing/result/result-state-v2_test.cc b/icing/result/result-state-v2_test.cc index 4f16e7f..8706b6d 100644 --- a/icing/result/result-state-v2_test.cc +++ b/icing/result/result-state-v2_test.cc @@ -18,27 +18,18 @@ #include <cstdint> #include <memory> #include <string> -#include <unordered_map> -#include <unordered_set> #include <vector> #include "gtest/gtest.h" #include "icing/absl_ports/mutex.h" #include "icing/file/filesystem.h" -#include "icing/portable/equals-proto.h" #include "icing/proto/document.pb.h" #include "icing/proto/schema.pb.h" -#include "icing/proto/scoring.pb.h" #include "icing/proto/search.pb.h" -#include "icing/proto/term.pb.h" -#include "icing/result/projection-tree.h" -#include "icing/result/snippet-context.h" #include "icing/schema/schema-store.h" #include "icing/scoring/priority-queue-scored-document-hits-ranker.h" #include "icing/scoring/scored-document-hit.h" -#include "icing/scoring/scored-document-hits-ranker.h" #include "icing/store/document-store.h" -#include "icing/store/namespace-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" #include "icing/util/clock.h" @@ -47,26 +38,11 @@ namespace icing { namespace lib { namespace { -using ::icing::lib::portable_equals_proto::EqualsProto; using ::testing::ElementsAre; using ::testing::Eq; -using ::testing::IsEmpty; using ::testing::Pair; using ::testing::UnorderedElementsAre; -SearchSpecProto CreateSearchSpec(TermMatchType::Code match_type) { - SearchSpecProto search_spec; - search_spec.set_term_match_type(match_type); - return search_spec; -} - -ScoringSpecProto CreateScoringSpec(bool is_descending_order) { - ScoringSpecProto scoring_spec; - scoring_spec.set_order_by(is_descending_order ? ScoringSpecProto::Order::DESC - : ScoringSpecProto::Order::ASC); - return scoring_spec; -} - ResultSpecProto CreateResultSpec( int num_per_page, ResultSpecProto::ResultGroupingType result_group_type) { ResultSpecProto result_spec; @@ -133,14 +109,13 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToSpecs) { CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); result_spec.set_num_total_bytes_per_page_threshold(4096); + // Adjustment info is not important in this test. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( - std::vector<ScoredDocumentHit>(), - /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - document_store()); + std::vector<ScoredDocumentHit>(), /*is_descending=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, document_store()); absl_ports::shared_lock l(&result_state.mutex); @@ -156,14 +131,14 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) { ASSERT_THAT(default_result_spec.num_total_bytes_per_page_threshold(), Eq(std::numeric_limits<int32_t>::max())); + // Adjustment info is not important in this test. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::vector<ScoredDocumentHit>(), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), default_result_spec, - document_store()); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + default_result_spec, document_store()); absl_ports::shared_lock l(&result_state.mutex); @@ -174,116 +149,6 @@ TEST_F(ResultStateV2Test, ShouldInitializeValuesAccordingToDefaultSpecs) { Eq(default_result_spec.num_total_bytes_per_page_threshold())); } -TEST_F(ResultStateV2Test, ShouldReturnSnippetContextAccordingToSpecs) { - ResultSpecProto result_spec = - CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); - result_spec.mutable_snippet_spec()->set_num_to_snippet(5); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); - - SectionRestrictQueryTermsMap query_terms_map; - query_terms_map.emplace("term1", std::unordered_set<std::string>()); - - ResultStateV2 result_state( - std::make_unique< - PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( - std::vector<ScoredDocumentHit>(), - /*is_descending=*/true), - query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - document_store()); - - absl_ports::shared_lock l(&result_state.mutex); - - const SnippetContext snippet_context = result_state.snippet_context(); - - // Snippet context should be derived from the specs above. - EXPECT_TRUE(snippet_context.query_terms.find("term1") != - snippet_context.query_terms.end()); - EXPECT_THAT(snippet_context.snippet_spec, - EqualsProto(result_spec.snippet_spec())); - EXPECT_THAT(snippet_context.match_type, Eq(TermMatchType::EXACT_ONLY)); - - // The same copy can be fetched multiple times. - const SnippetContext snippet_context2 = result_state.snippet_context(); - EXPECT_TRUE(snippet_context2.query_terms.find("term1") != - snippet_context2.query_terms.end()); - EXPECT_THAT(snippet_context2.snippet_spec, - EqualsProto(result_spec.snippet_spec())); - EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY)); -} - -TEST_F(ResultStateV2Test, NoSnippetingShouldReturnNull) { - ResultSpecProto result_spec = - CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); - // Setting num_to_snippet to 0 so that snippeting info won't be - // stored. - result_spec.mutable_snippet_spec()->set_num_to_snippet(0); - result_spec.mutable_snippet_spec()->set_num_matches_per_property(5); - result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5); - - SectionRestrictQueryTermsMap query_terms_map; - query_terms_map.emplace("term1", std::unordered_set<std::string>()); - - ResultStateV2 result_state( - std::make_unique< - PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( - std::vector<ScoredDocumentHit>(), - /*is_descending=*/true), - query_terms_map, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - document_store()); - - absl_ports::shared_lock l(&result_state.mutex); - - const SnippetContext snippet_context = result_state.snippet_context(); - EXPECT_THAT(snippet_context.query_terms, IsEmpty()); - EXPECT_THAT( - snippet_context.snippet_spec, - EqualsProto(ResultSpecProto::SnippetSpecProto::default_instance())); - EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN); -} - -TEST_F(ResultStateV2Test, ShouldConstructProjectionTreeMapAccordingToSpecs) { - // Create a ResultSpec with type property mask. - ResultSpecProto result_spec = - CreateResultSpec(/*num_per_page=*/2, ResultSpecProto::NAMESPACE); - TypePropertyMask* email_type_property_mask = - result_spec.add_type_property_masks(); - email_type_property_mask->set_schema_type("Email"); - email_type_property_mask->add_paths("sender.name"); - email_type_property_mask->add_paths("sender.emailAddress"); - TypePropertyMask* phone_type_property_mask = - result_spec.add_type_property_masks(); - phone_type_property_mask->set_schema_type("Phone"); - phone_type_property_mask->add_paths("caller"); - TypePropertyMask* wildcard_type_property_mask = - result_spec.add_type_property_masks(); - wildcard_type_property_mask->set_schema_type( - std::string(ProjectionTree::kSchemaTypeWildcard)); - wildcard_type_property_mask->add_paths("wild.card"); - - ResultStateV2 result_state( - std::make_unique< - PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( - std::vector<ScoredDocumentHit>(), - /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - document_store()); - - absl_ports::shared_lock l(&result_state.mutex); - - const std::unordered_map<std::string, ProjectionTree>& projection_tree_map = - result_state.projection_tree_map(); - EXPECT_THAT(projection_tree_map, - UnorderedElementsAre( - Pair("Email", ProjectionTree(*email_type_property_mask)), - Pair("Phone", ProjectionTree(*phone_type_property_mask)), - Pair(std::string(ProjectionTree::kSchemaTypeWildcard), - ProjectionTree(*wildcard_type_property_mask)))); -} - TEST_F(ResultStateV2Test, ShouldConstructNamespaceGroupIdMapAndGroupResultLimitsAccordingToSpecs) { // Create 3 docs under namespace1, namespace2, namespace3. @@ -342,14 +207,14 @@ TEST_F(ResultStateV2Test, int32_t entry_id3, document_store().GetResultGroupingEntryId( result_grouping_type, "namespace3", "Document")); + // Adjustment info is not important in this test. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::vector<ScoredDocumentHit>(), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), result_spec, - document_store()); + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, + result_spec, document_store()); absl_ports::shared_lock l(&result_state.mutex); @@ -374,14 +239,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHits) { AddScoredDocument(/*document_id=*/4), AddScoredDocument(/*document_id=*/3)}; + // Adjustment info is not important in this test. // Creates a ResultState with 5 ScoredDocumentHits. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE), document_store()); @@ -408,14 +273,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) { num_total_hits() = 2; { + // Adjustment info is not important in this test. // Creates a ResultState with 5 ScoredDocumentHits. ResultStateV2 result_state1( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits1), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE), document_store()); @@ -425,14 +290,14 @@ TEST_F(ResultStateV2Test, ShouldUpdateNumTotalHitsWhenDestructed) { ASSERT_THAT(num_total_hits(), Eq(7)); { + // Adjustment info is not important in this test. // Creates another ResultState with 2 ScoredDocumentHits. ResultStateV2 result_state2( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits2), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE), document_store()); @@ -457,13 +322,13 @@ TEST_F(ResultStateV2Test, ShouldNotUpdateNumTotalHitsWhenNotRegistered) { // Creates a ResultState with 5 ScoredDocumentHits. { + // Adjustment info is not important in this test. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE), document_store()); @@ -488,14 +353,14 @@ TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) { AddScoredDocument(/*document_id=*/4), AddScoredDocument(/*document_id=*/3)}; + // Adjustment info is not important in this test. // Creates a ResultState with 5 ScoredDocumentHits. ResultStateV2 result_state( std::make_unique< PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>( std::move(scored_document_hits), /*is_descending=*/true), - /*query_terms=*/{}, CreateSearchSpec(TermMatchType::EXACT_ONLY), - CreateScoringSpec(/*is_descending_order=*/true), + /*parent_adjustment_info=*/nullptr, /*child_adjustment_info=*/nullptr, CreateResultSpec(/*num_per_page=*/5, ResultSpecProto::NAMESPACE), document_store()); @@ -520,4 +385,4 @@ TEST_F(ResultStateV2Test, ShouldDecrementOriginalNumTotalHitsWhenReregister) { } // namespace } // namespace lib -} // namespace icing
\ No newline at end of file +} // namespace icing diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc index a94775d..d654195 100644 --- a/icing/result/snippet-retriever.cc +++ b/icing/result/snippet-retriever.cc @@ -502,91 +502,86 @@ void GetEntriesFromProperty(const PropertyProto* current_property, CharacterIterator start_itr(value); CharacterIterator end_itr(value); CharacterIterator reset_itr(value); + bool encountered_error = false; while (iterator->Advance()) { std::vector<Token> batch_tokens = iterator->GetTokens(); if (batch_tokens.empty()) { continue; } - // As snippet matching may move iterator around, we save a reset iterator - // so that we can reset to the initial iterator state, and continue - // Advancing in order in the next round. + bool needs_reset = false; reset_itr.MoveToUtf8(batch_tokens.at(0).text.begin() - value.begin()); - - for (const Token& token : batch_tokens) { + start_itr = reset_itr; + end_itr = start_itr; + for (int i = 0; i < batch_tokens.size(); ++i) { + const Token& token = batch_tokens.at(i); CharacterIterator submatch_end = matcher->Matches(token); // If the token matched a query term, then submatch_end will point to an // actual position within token.text. - if (submatch_end.utf8_index() != -1) { - if (!start_itr.MoveToUtf8(token.text.begin() - value.begin())) { - // We can't get the char_iterator to a valid position, so there's no - // way for us to provide valid utf-16 indices. There's nothing more - // we can do here, so just return whatever we've built up so far. - if (!snippet_entry.snippet_matches().empty()) { - *snippet_proto->add_entries() = std::move(snippet_entry); - } - return; - } - if (!end_itr.MoveToUtf8(token.text.end() - value.begin())) { - // Same as above - if (!snippet_entry.snippet_matches().empty()) { - *snippet_proto->add_entries() = std::move(snippet_entry); - } - return; - } - SectionData data = {property_path, value}; - auto match_or = RetrieveMatch(match_options->snippet_spec, data, - iterator.get(), start_itr, end_itr); - if (!match_or.ok()) { - if (absl_ports::IsAborted(match_or.status())) { - // Only an aborted. We can't get this match, but we might be able - // to retrieve others. Just continue. - continue; - } else { - // Probably an internal error. The tokenizer iterator is probably - // in an invalid state. There's nothing more we can do here, so - // just return whatever we've built up so far. - if (!snippet_entry.snippet_matches().empty()) { - *snippet_proto->add_entries() = std::move(snippet_entry); - } - return; - } - } - SnippetMatchProto match = std::move(match_or).ValueOrDie(); - // submatch_end refers to a position *within* token.text. - // This, conveniently enough, means that index that submatch_end - // points to is the length of the submatch (because the submatch - // starts at 0 in token.text). - match.set_submatch_byte_length(submatch_end.utf8_index()); - match.set_submatch_utf16_length(submatch_end.utf16_index()); - // Add the values for the submatch. - snippet_entry.mutable_snippet_matches()->Add(std::move(match)); - - if (--match_options->max_matches_remaining <= 0) { - *snippet_proto->add_entries() = std::move(snippet_entry); - return; + if (submatch_end.utf8_index() == -1) { + continue; + } + // As snippet matching may move iterator around, we save a reset + // iterator so that we can reset to the initial iterator state, and + // continue Advancing in order in the next round. + if (!start_itr.MoveToUtf8(token.text.begin() - value.begin())) { + encountered_error = true; + break; + } + if (!end_itr.MoveToUtf8(token.text.end() - value.begin())) { + encountered_error = true; + break; + } + SectionData data = {property_path, value}; + auto match_or = RetrieveMatch(match_options->snippet_spec, data, + iterator.get(), start_itr, end_itr); + if (!match_or.ok()) { + if (absl_ports::IsAborted(match_or.status())) { + // Only an aborted. We can't get this match, but we might be able + // to retrieve others. Just continue. + continue; + } else { + encountered_error = true; + break; } } + SnippetMatchProto match = std::move(match_or).ValueOrDie(); + if (match.window_byte_length() > 0) { + needs_reset = true; + } + // submatch_end refers to a position *within* token.text. + // This, conveniently enough, means that index that submatch_end + // points to is the length of the submatch (because the submatch + // starts at 0 in token.text). + match.set_submatch_byte_length(submatch_end.utf8_index()); + match.set_submatch_utf16_length(submatch_end.utf16_index()); + // Add the values for the submatch. + snippet_entry.mutable_snippet_matches()->Add(std::move(match)); + + if (--match_options->max_matches_remaining <= 0) { + *snippet_proto->add_entries() = std::move(snippet_entry); + return; + } } - // RetrieveMatch calls DetermineWindowStart/End, which may change the - // position of the iterator. So, reset the iterator back to the original - // position. The first token of the token batch will be the token to reset - // to. - - bool success = false; - if (reset_itr.utf8_index() > 0) { - success = - iterator->ResetToTokenStartingAfter(reset_itr.utf32_index() - 1); - } else { - success = iterator->ResetToStart(); + if (encountered_error) { + break; } - if (!success) { - if (!snippet_entry.snippet_matches().empty()) { - *snippet_proto->add_entries() = std::move(snippet_entry); + // RetrieveMatch may call DetermineWindowStart/End if windowing is + // requested, which may change the position of the iterator. So, reset the + // iterator back to the original position. The first token of the token + // batch will be the token to reset to. + if (needs_reset) { + if (reset_itr.utf8_index() > 0) { + encountered_error = + !iterator->ResetToTokenStartingAfter(reset_itr.utf32_index() - 1); + } else { + encountered_error = !iterator->ResetToStart(); } - return; + } + if (encountered_error) { + break; } } if (!snippet_entry.snippet_matches().empty()) { diff --git a/icing/result/snippet-retriever_benchmark.cc b/icing/result/snippet-retriever_benchmark.cc new file mode 100644 index 0000000..9af8efa --- /dev/null +++ b/icing/result/snippet-retriever_benchmark.cc @@ -0,0 +1,329 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "testing/base/public/benchmark.h" +#include "gmock/gmock.h" +#include "third_party/absl/flags/flag.h" +#include "icing/document-builder.h" +#include "icing/file/filesystem.h" +#include "icing/proto/schema.pb.h" +#include "icing/proto/search.pb.h" +#include "icing/result/snippet-retriever.h" +#include "icing/schema-builder.h" +#include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/icu-data-file-helper.h" +#include "icing/testing/random-string.h" +#include "icing/testing/test-data.h" +#include "icing/testing/tmp-directory.h" +#include "icing/tokenization/language-segmenter-factory.h" +#include "icing/transform/normalizer-factory.h" +#include "icing/util/clock.h" +#include "icing/util/logging.h" +#include "unicode/uloc.h" + +// Run on a Linux workstation: +// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt +// //icing/result:snippet-retriever_benchmark +// +// $ blaze-bin/icing/result/snippet-retriever_benchmark +// --benchmark_filter=all +// +// Run on an Android device: +// Make target //icing/tokenization:language-segmenter depend on +// //third_party/icu +// +// Make target //icing/transform:normalizer depend on +// //third_party/icu +// +// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" +// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt +// //icing/result:snippet-retriever_benchmark +// +// $ adb push blaze-bin/icing/result/snippet-retriever_benchmark +// /data/local/tmp/ +// +// $ adb shell /data/local/tmp/snippet-retriever_benchmark +// --benchmark_filter=all --adb + +// Flag to tell the benchmark that it'll be run on an Android device via adb, +// the benchmark will set up data files accordingly. +ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device"); + +namespace icing { +namespace lib { + +namespace { + +using ::testing::SizeIs; + +void BM_SnippetOneProperty(benchmark::State& state) { + bool run_via_adb = absl::GetFlag(FLAGS_adb); + if (!run_via_adb) { + ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; + const std::string schema_dir = base_dir + "/schema"; + Filesystem filesystem; + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); + if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) { + ICING_LOG(ERROR) << "Failed to create test directories"; + } + + language_segmenter_factory::SegmenterOptions options(ULOC_US); + std::unique_ptr<LanguageSegmenter> language_segmenter = + language_segmenter_factory::Create(std::move(options)).ValueOrDie(); + std::unique_ptr<Normalizer> normalizer = + normalizer_factory::Create( + /*max_term_byte_size=*/std::numeric_limits<int>::max()) + .ValueOrDie(); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty( + PropertyConfigBuilder() + .SetName("prop1") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + Clock clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem, schema_dir, &clock)); + ICING_ASSERT_OK(schema_store->SetSchema(schema)); + + auto snippet_retriever = + SnippetRetriever::Create(schema_store.get(), language_segmenter.get(), + normalizer.get()) + .ValueOrDie(); + + int num_matches = state.range(0); + int total_terms = state.range(1); + + std::default_random_engine random; + std::vector<std::string> language = + CreateLanguages(/*language_size=*/1000, &random); + std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1); + std::uniform_real_distribution<double> uniform_double(0.0, 1.0); + + std::string text; + int num_actual_matches = 0; + double match_chance; + while (total_terms-- > 0) { + std::string term; + match_chance = static_cast<double>(num_matches) / total_terms; + if (uniform_double(random) <= match_chance) { + --num_matches; + ++num_actual_matches; + term = "foo"; + } else { + term = language.at(uniform(random)); + } + absl_ports::StrAppend(&text, " ", term); + } + DocumentProto document = DocumentBuilder() + .SetKey("icing", "uri1") + .SetSchema("type1") + .AddStringProperty("prop1", text) + .Build(); + SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}}; + ResultSpecProto::SnippetSpecProto snippet_spec; + snippet_spec.set_num_to_snippet(100000); + snippet_spec.set_num_matches_per_property(100000); + snippet_spec.set_max_window_utf32_length(64); + + SectionIdMask section_id_mask = 0x01; + SnippetProto snippet_proto; + for (auto _ : state) { + snippet_proto = snippet_retriever->RetrieveSnippet( + query_terms, TERM_MATCH_PREFIX, snippet_spec, document, + section_id_mask); + ASSERT_THAT(snippet_proto.entries(), SizeIs(1)); + ASSERT_THAT(snippet_proto.entries(0).snippet_matches(), + SizeIs(num_actual_matches)); + } + + // Destroy the schema store before the whole directory is removed because they + // persist data in destructor. + schema_store.reset(); + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); +} +BENCHMARK(BM_SnippetOneProperty) + // Arguments: num_matches, total_terms + ->ArgPair(1, 1) + ->ArgPair(1, 16) // single match + ->ArgPair(2, 16) // ~10% matches + ->ArgPair(3, 16) // ~20% matches + ->ArgPair(8, 16) // 50% matches + ->ArgPair(16, 16) // 100% matches + ->ArgPair(1, 128) // single match + ->ArgPair(13, 128) // ~10% matches + ->ArgPair(26, 128) // ~20% matches + ->ArgPair(64, 128) // 50% matches + ->ArgPair(128, 128) // 100% matches + ->ArgPair(1, 512) // single match + ->ArgPair(51, 512) // ~10% matches + ->ArgPair(102, 512) // ~20% matches + ->ArgPair(256, 512) // 50% matches + ->ArgPair(512, 512) // 100% matches + ->ArgPair(1, 1024) // single match + ->ArgPair(102, 1024) // ~10% matches + ->ArgPair(205, 1024) // ~20% matches + ->ArgPair(512, 1024) // 50% matches + ->ArgPair(1024, 1024) // 100% matches + ->ArgPair(1, 4096) // single match + ->ArgPair(410, 4096) // ~10% matches + ->ArgPair(819, 4096) // ~20% matches + ->ArgPair(2048, 4096) // 50% matches + ->ArgPair(4096, 4096) // 100% matches + ->ArgPair(1, 16384) // single match + ->ArgPair(1638, 16384) // ~10% matches + ->ArgPair(3277, 16384) // ~20% matches + ->ArgPair(8192, 16384) // 50% matches + ->ArgPair(16384, 16384); // 100% matches + +void BM_SnippetRfcOneProperty(benchmark::State& state) { + bool run_via_adb = absl::GetFlag(FLAGS_adb); + if (!run_via_adb) { + ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + const std::string base_dir = GetTestTempDir() + "/query_processor_benchmark"; + const std::string schema_dir = base_dir + "/schema"; + Filesystem filesystem; + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); + if (!filesystem.CreateDirectoryRecursively(schema_dir.c_str())) { + ICING_LOG(ERROR) << "Failed to create test directories"; + } + + language_segmenter_factory::SegmenterOptions options(ULOC_US); + std::unique_ptr<LanguageSegmenter> language_segmenter = + language_segmenter_factory::Create(std::move(options)).ValueOrDie(); + std::unique_ptr<Normalizer> normalizer = + normalizer_factory::Create( + /*max_term_byte_size=*/std::numeric_limits<int>::max()) + .ValueOrDie(); + + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("type1").AddProperty( + PropertyConfigBuilder() + .SetName("prop1") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + Clock clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem, schema_dir, &clock)); + ICING_ASSERT_OK(schema_store->SetSchema(schema)); + + auto snippet_retriever = + SnippetRetriever::Create(schema_store.get(), language_segmenter.get(), + normalizer.get()) + .ValueOrDie(); + + int num_matches = state.range(0); + int total_terms = state.range(1); + + std::default_random_engine random; + std::vector<std::string> language = + CreateLanguages(/*language_size=*/1000, &random); + std::uniform_int_distribution<size_t> uniform(0u, language.size() - 1); + std::uniform_real_distribution<double> uniform_double(0.0, 1.0); + + std::string text; + int num_actual_matches = 0; + double match_chance; + while (total_terms-- > 0) { + std::string term; + match_chance = static_cast<double>(num_matches) / total_terms; + if (uniform_double(random) <= match_chance) { + --num_matches; + ++num_actual_matches; + term = "foo@google.com"; + } else { + term = absl_ports::StrCat(language.at(uniform(random)), "@google.com"); + } + absl_ports::StrAppend(&text, ",", term); + } + DocumentProto document = DocumentBuilder() + .SetKey("icing", "uri1") + .SetSchema("type1") + .AddStringProperty("prop1", text) + .Build(); + SectionRestrictQueryTermsMap query_terms = {{"", {"foo"}}}; + ResultSpecProto::SnippetSpecProto snippet_spec; + snippet_spec.set_num_to_snippet(100000); + snippet_spec.set_num_matches_per_property(100000); + snippet_spec.set_max_window_utf32_length(64); + + SectionIdMask section_id_mask = 0x01; + SnippetProto snippet_proto; + for (auto _ : state) { + snippet_proto = snippet_retriever->RetrieveSnippet( + query_terms, TERM_MATCH_PREFIX, snippet_spec, document, + section_id_mask); + ASSERT_THAT(snippet_proto.entries(), SizeIs(1)); + ASSERT_THAT(snippet_proto.entries(0).snippet_matches(), + SizeIs(num_actual_matches)); + } + + // Destroy the schema store before the whole directory is removed because they + // persist data in destructor. + schema_store.reset(); + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); +} +BENCHMARK(BM_SnippetRfcOneProperty) + // Arguments: num_matches, total_terms + ->ArgPair(1, 1) + ->ArgPair(1, 16) // single match + ->ArgPair(2, 16) // ~10% matches + ->ArgPair(3, 16) // ~20% matches + ->ArgPair(8, 16) // 50% matches + ->ArgPair(16, 16) // 100% matches + ->ArgPair(1, 128) // single match + ->ArgPair(13, 128) // ~10% matches + ->ArgPair(26, 128) // ~20% matches + ->ArgPair(64, 128) // 50% matches + ->ArgPair(128, 128) // 100% matches + ->ArgPair(1, 512) // single match + ->ArgPair(51, 512) // ~10% matches + ->ArgPair(102, 512) // ~20% matches + ->ArgPair(256, 512) // 50% matches + ->ArgPair(512, 512) // 100% matches + ->ArgPair(1, 1024) // single match + ->ArgPair(102, 1024) // ~10% matches + ->ArgPair(205, 1024) // ~20% matches + ->ArgPair(512, 1024) // 50% matches + ->ArgPair(1024, 1024) // 100% matches + ->ArgPair(1, 4096) // single match + ->ArgPair(410, 4096) // ~10% matches + ->ArgPair(819, 4096) // ~20% matches + ->ArgPair(2048, 4096) // 50% matches + ->ArgPair(4096, 4096) // 100% matches + ->ArgPair(1, 16384) // single match + ->ArgPair(1638, 16384) // ~10% matches + ->ArgPair(3277, 16384) // ~20% matches + ->ArgPair(8192, 16384) // 50% matches + ->ArgPair(16384, 16384); // 100% matches + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/scoring/advanced_scoring/advanced-scorer_test.cc b/icing/scoring/advanced_scoring/advanced-scorer_test.cc index ebefc4e..b3a47ba 100644 --- a/icing/scoring/advanced_scoring/advanced-scorer_test.cc +++ b/icing/scoring/advanced_scoring/advanced-scorer_test.cc @@ -116,7 +116,7 @@ DocumentProto CreateDocument( } UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc index b13d54a..08705a1 100644 --- a/icing/scoring/scorer_test.cc +++ b/icing/scoring/scorer_test.cc @@ -111,7 +111,7 @@ class ScorerTest : public ::testing::TestWithParam<ScorerTestingMode> { }; UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc index 7e4ca1d..1e579be 100644 --- a/icing/scoring/scoring-processor_test.cc +++ b/icing/scoring/scoring-processor_test.cc @@ -144,7 +144,7 @@ CreateAndInsertsDocumentsWithScores(DocumentStore* document_store, } UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 1273c67..1d9a2a0 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -95,7 +95,7 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo( } UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc index b2dbe4b..2b17f13 100644 --- a/icing/store/usage-store_test.cc +++ b/icing/store/usage-store_test.cc @@ -44,7 +44,7 @@ class UsageStoreTest : public testing::Test { }; UsageReport CreateUsageReport(std::string name_space, std::string uri, - int64 timestamp_ms, + int64_t timestamp_ms, UsageReport::UsageType usage_type) { UsageReport usage_report; usage_report.set_document_namespace(name_space); @@ -450,7 +450,7 @@ TEST_F(UsageStoreTest, Reset) { TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) { // Create a report with the max value of timestamps. UsageReport usage_report = CreateUsageReport( - "namespace", "uri", /*timestamp_ms=*/std::numeric_limits<int64>::max(), + "namespace", "uri", /*timestamp_ms=*/std::numeric_limits<int64_t>::max(), UsageReport::USAGE_TYPE1); ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store, diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h index fd8d87b..a313c1c 100644 --- a/icing/testing/random-string.h +++ b/icing/testing/random-string.h @@ -25,6 +25,15 @@ namespace lib { inline constexpr std::string_view kAlNumAlphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; +// Average length of word in English is 4.7 characters. +inline constexpr int kAvgTokenLen = 5; +// Made up value. This results in a fairly reasonable language - the majority of +// generated words are 3-9 characters, ~3% of words are >=20 chars, and the +// longest ones are 27 chars, (roughly consistent with the longest, +// non-contrived English words +// https://en.wikipedia.org/wiki/Longest_word_in_English) +inline constexpr int kTokenStdDev = 7; + template <typename Gen> std::string RandomString(const std::string_view alphabet, size_t len, Gen* gen) { @@ -37,6 +46,22 @@ std::string RandomString(const std::string_view alphabet, size_t len, return result; } +// Creates a vector containing num_words randomly-generated words for use by +// documents. +template <typename Rand> +std::vector<std::string> CreateLanguages(int num_words, Rand* r) { + std::vector<std::string> language; + std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev); + while (--num_words >= 0) { + int word_length = 0; + while (word_length < 1) { + word_length = std::round(norm_dist(*r)); + } + language.push_back(RandomString(kAlNumAlphabet, word_length, r)); + } + return language; +} + // Returns a vector containing num_terms unique terms. Terms are created in // non-random order starting with "a" to "z" to "aa" to "zz", etc. std::vector<std::string> GenerateUniqueTerms(int num_terms); diff --git a/icing/text_classifier/lib3/utils/base/statusor.h b/icing/text_classifier/lib3/utils/base/statusor.h index 9ec3d91..aa1e598 100644 --- a/icing/text_classifier/lib3/utils/base/statusor.h +++ b/icing/text_classifier/lib3/utils/base/statusor.h @@ -201,12 +201,19 @@ template <typename T> inline StatusOr<T>::StatusOr(T&& value) : value_(std::move(value)) {} template <typename T> -inline StatusOr<T>::StatusOr(const StatusOr& other) - : status_(other.status_), value_(other.value_) {} +inline StatusOr<T>::StatusOr(const StatusOr& other) : status_(other.status_) { + if (other.ok()) { + MakeValue(other.value_); + } +} template <typename T> inline StatusOr<T>::StatusOr(StatusOr&& other) - : status_(other.status_), value_(std::move(other.value_)) {} + : status_(std::move(other.status_)) { + if (other.ok()) { + MakeValue(std::move(other.value_)); + } +} template <typename T> template < @@ -216,7 +223,11 @@ template < std::is_convertible<const U&, T>>::value, int>> inline StatusOr<T>::StatusOr(const StatusOr<U>& other) - : status_(other.status_), value_(other.value_) {} + : status_(other.status_) { + if (other.ok()) { + MakeValue(other.value_); + } +} template <typename T> template <typename U, @@ -225,7 +236,11 @@ template <typename U, std::is_convertible<U&&, T>>::value, int>> inline StatusOr<T>::StatusOr(StatusOr<U>&& other) - : status_(other.status_), value_(std::move(other.value_)) {} + : status_(std::move(other.status_)) { + if (other.ok()) { + MakeValue(std::move(other.value_)); + } +} template <typename T> template < |