diff options
author | Yogesh Singh <yosing@google.com> | 2023-08-05 21:06:22 +0000 |
---|---|---|
committer | Yogesh Singh <yosing@google.com> | 2023-08-07 22:59:47 +0000 |
commit | 7edd8a42d2ad4b80e95557a7e4c615f97450b968 (patch) | |
tree | 7e8ce2a244cf50de9ead16fea57297e0b3d3316f | |
parent | f8a8e6c2e2b55900e082d2657f3691ee6d9b2d27 (diff) | |
download | icing-7edd8a42d2ad4b80e95557a7e4c615f97450b968.tar.gz |
Update Icing from upstream.
Descriptions:
========================================================================
Add join test for SetSchemaNewIndexedDocumentPropertyTriggersIndexRestorationAndReturnsOk
========================================================================
Support different sets of properties to search for different schema types.
========================================================================
Do not cache the section mask for a schema type if no property filters apply while processing the type property filters.
========================================================================
Bug: 288310393
Bug: 291019114
Change-Id: Icb867b40a603c0d63e2d96a0e2ec641e424aad7e
-rw-r--r-- | icing/icing-search-engine_schema_test.cc | 134 | ||||
-rw-r--r-- | icing/icing-search-engine_search_test.cc | 541 | ||||
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-section-restrict.cc | 179 | ||||
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-section-restrict.h | 52 | ||||
-rw-r--r-- | icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc | 4 | ||||
-rw-r--r-- | icing/query/query-processor.cc | 7 | ||||
-rw-r--r-- | icing/query/query-processor_test.cc | 255 | ||||
-rw-r--r-- | icing/schema/schema-store.cc | 11 | ||||
-rw-r--r-- | icing/schema/schema-store.h | 9 | ||||
-rw-r--r-- | proto/icing/proto/search.proto | 12 | ||||
-rw-r--r-- | synced_AOSP_CL_number.txt | 2 |
11 files changed, 1138 insertions, 68 deletions
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc index 5076d50..0e88c5a 100644 --- a/icing/icing-search-engine_schema_test.cc +++ b/icing/icing-search-engine_schema_test.cc @@ -897,8 +897,17 @@ TEST_F( // - "age": integer type, indexed. Section id = 0 // - "name": string type, indexed. Section id = 1. // - "worksFor.name": string type, (nested) indexed. Section id = 2. + // + // Joinable property id assignment for 'Person': + // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable + // property id = 0. SchemaProto schema_one = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty( + PropertyConfigBuilder() + .SetName("title") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Person") .AddProperty(PropertyConfigBuilder() @@ -922,38 +931,47 @@ TEST_F( .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("listRef") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); - - SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one); - // Ignore latency numbers. They're covered elsewhere. - set_schema_result.clear_latency_ms(); - SetSchemaResultProto expected_set_schema_result; - expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); - expected_set_schema_result.mutable_new_schema_types()->Add("Organization"); - expected_set_schema_result.mutable_new_schema_types()->Add("Person"); - EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); - - DocumentProto personDocument = + ASSERT_THAT(icing.SetSchema(schema_one).status(), ProtoIsOk()); + + DocumentProto list_document = DocumentBuilder() + .SetKey("namespace", "list/1") + .SetSchema("List") + .SetCreationTimestampMs(1000) + .AddStringProperty("title", "title") + .Build(); + DocumentProto person_document = DocumentBuilder() .SetKey("namespace", "person/2") .SetSchema("Person") .SetCreationTimestampMs(1000) .AddStringProperty("name", "John") .AddInt64Property("age", 20) - .AddDocumentProperty("worksFor", - DocumentBuilder() - .SetKey("namespace", "org/1") - .SetSchema("Organization") - .AddStringProperty("name", "Google") - .Build()) + .AddDocumentProperty( + "worksFor", DocumentBuilder() + .SetKey("namespace", "org/1") + .SetSchema("Organization") + .AddStringProperty("name", "Google") + .AddStringProperty("listRef", "namespace#list/1") + .Build()) .Build(); - EXPECT_THAT(icing.Put(personDocument).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(list_document).status(), ProtoIsOk()); + EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk()); + + ResultSpecProto result_spec = ResultSpecProto::default_instance(); + result_spec.set_max_joined_children_per_parent_to_return( + std::numeric_limits<int32_t>::max()); SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = - personDocument; + person_document; SearchResultProto empty_result; empty_result.mutable_status()->set_code(StatusProto::OK); @@ -964,8 +982,7 @@ TEST_F( search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); SearchResultProto actual_results = - icing.Search(search_spec1, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); @@ -976,11 +993,44 @@ TEST_F( SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); - actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); + // Verify join search: join a query for `title:title` (which will get + // list_document) with a child query for `name:John` (which will get + // person_document) based on the child's `worksFor.listRef` field. + SearchSpecProto search_spec_with_join; + search_spec_with_join.set_query("title:title"); + search_spec_with_join.set_term_match_type(TermMatchType::EXACT_ONLY); + JoinSpecProto* join_spec = search_spec_with_join.mutable_join_spec(); + join_spec->set_parent_property_expression( + std::string(JoinProcessor::kQualifiedIdExpr)); + join_spec->set_child_property_expression("worksFor.listRef"); + join_spec->set_aggregation_scoring_strategy( + JoinSpecProto::AggregationScoringStrategy::COUNT); + JoinSpecProto::NestedSpecProto* nested_spec = + join_spec->mutable_nested_spec(); + SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); + nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); + nested_search_spec->set_query("name:John"); + *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); + *nested_spec->mutable_result_spec() = result_spec; + + SearchResultProto expected_join_search_result_proto; + expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); + SearchResultProto::ResultProto* result_proto = + expected_join_search_result_proto.mutable_results()->Add(); + *result_proto->mutable_document() = list_document; + *result_proto->mutable_joined_results()->Add()->mutable_document() = + person_document; + + actual_results = + icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_join_search_result_proto)); + // Change the schema to add another nested document property to 'Person' // // New section id assignment for 'Person': @@ -988,8 +1038,19 @@ TEST_F( // - "almaMater.name", string type, indexed. Section id = 1 // - "name": string type, indexed. Section id = 2 // - "worksFor.name": string type, (nested) indexed. Section id = 3 + // + // New joinable property id assignment for 'Person': + // - "almaMater.listRef": string type, Qualified Id type joinable. Joinable + // property id = 0. + // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable + // property id = 1. SchemaProto schema_two = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty( + PropertyConfigBuilder() + .SetName("title") + .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) .AddType(SchemaTypeConfigBuilder() .SetType("Person") .AddProperty(PropertyConfigBuilder() @@ -1019,6 +1080,11 @@ TEST_F( .SetName("name") .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED)) + .AddProperty(PropertyConfigBuilder() + .SetName("listRef") + .SetDataTypeJoinableString( + JOINABLE_VALUE_TYPE_QUALIFIED_ID) .SetCardinality(CARDINALITY_REQUIRED))) .Build(); @@ -1028,10 +1094,10 @@ TEST_F( // Index restoration should be triggered here because new schema requires more // properties to be indexed. Also new section ids will be reassigned and index // restoration should use new section ids to rebuild. - set_schema_result = icing.SetSchema(schema_two); + SetSchemaResultProto set_schema_result = icing.SetSchema(schema_two); // Ignore latency numbers. They're covered elsewhere. set_schema_result.clear_latency_ms(); - expected_set_schema_result = SetSchemaResultProto(); + SetSchemaResultProto expected_set_schema_result = SetSchemaResultProto(); expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); expected_set_schema_result.mutable_index_incompatible_changed_schema_types() ->Add("Person"); @@ -1041,8 +1107,8 @@ TEST_F( // Verify term search: // Searching for "worksFor.name:Google" should still match document - actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); @@ -1051,16 +1117,22 @@ TEST_F( // rebuild was not triggered and Icing is still searching the old index, where // 'worksFor.name' was indexed at section id 2. search_spec1.set_query("name:Google"); - actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(empty_result)); // Verify numeric (integer) search: should still match document - actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()); + actual_results = + icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec); EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( expected_search_result_proto)); + + // Verify join search: should still able to join by `worksFor.listRef` + actual_results = + icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores( + expected_join_search_result_proto)); } TEST_F(IcingSearchEngineSchemaTest, diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc index fd2c939..451c9ce 100644 --- a/icing/icing-search-engine_search_test.cc +++ b/icing/icing-search-engine_search_test.cc @@ -3565,6 +3565,547 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) { EqualsProto(projected_document_one)); } +TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query with property filters of sender.name and subject for the + // Email schema type. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* email_property_filters = + search_spec->add_type_property_filters(); + email_property_filters->set_schema_type("Email"); + email_property_filters->add_paths("sender.name"); + email_property_filters->add_paths("subject"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that only the first document is returned. Although 'hello' is + // present in document_two, it shouldn't be in the result since 'hello' is not + // in the specified property filter. + EXPECT_THAT(results.results(0).document(), + EqualsProto(document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + // Add Person and Organization schema with a property 'name' in both. + SchemaProto schema = SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("emailAddress") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder() + .SetType("Organization") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("address") + .SetDataTypeString(TERM_MATCH_PREFIX, + TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + ASSERT_THAT(icing.SetSchema(schema).status(), + ProtoIsOk()); + + // 1. Add person document + DocumentProto person_document = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build(); + ASSERT_THAT(icing.Put(person_document).status(), ProtoIsOk()); + + // 1. Add organization document + DocumentProto organization_document = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Organization") + .AddStringProperty("name", "Meg Corp") + .AddStringProperty("address", "Universal street") + .Build(); + ASSERT_THAT(icing.Put(organization_document).status(), ProtoIsOk()); + + // 2. Issue a query with property filters. Person schema has name in it's + // property filter but Organization schema doesn't. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("Meg"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* person_property_filters = + search_spec->add_type_property_filters(); + person_property_filters->set_schema_type("Person"); + person_property_filters->add_paths("name"); + TypePropertyMask* organization_property_filters = + search_spec->add_type_property_filters(); + organization_property_filters->set_schema_type("Organization"); + organization_property_filters->add_paths("address"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that only the person document is returned. Although 'Meg' is + // present in organization document, it shouldn't be in the result since + // the name field is not specified in the Organization property filter. + EXPECT_THAT(results.results(0).document(), + EqualsProto(person_document)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query with property filters of sender.name and subject for the + // wildcard(*) schema type. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* wildcard_property_filters = + search_spec->add_type_property_filters(); + wildcard_property_filters->set_schema_type("*"); + wildcard_property_filters->add_paths("sender.name"); + wildcard_property_filters->add_paths("subject"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that only the first document is returned since the second + // document doesn't contain the word 'hello' in either of fields specified in + // the property filter. This confirms that the property filters for the + // wildcard entry have been applied to the Email schema as well. + EXPECT_THAT(results.results(0).document(), + EqualsProto(document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query with property filters of sender.name and subject for the + // wildcard(*) schema type plus property filters of sender.name and body for + // the Email schema type. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* wildcard_property_filters = + search_spec->add_type_property_filters(); + wildcard_property_filters->set_schema_type("*"); + wildcard_property_filters->add_paths("sender.name"); + wildcard_property_filters->add_paths("subject"); + TypePropertyMask* email_property_filters = + search_spec->add_type_property_filters(); + email_property_filters->set_schema_type("Email"); + email_property_filters->add_paths("sender.name"); + email_property_filters->add_paths("body"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that only the second document is returned since the first + // document doesn't contain the word 'hello' in either of fields sender.name + // or body. This confirms that the property filters specified for Email schema + // have been applied and the ones specified for wildcard entry have been + // ignored. + EXPECT_THAT(results.results(0).document(), + EqualsProto(document_two)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query with property filters of sender.name and subject for an + // unknown schema type. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* email_property_filters = + search_spec->add_type_property_filters(); + email_property_filters->set_schema_type("unknown"); + email_property_filters->add_paths("sender.name"); + email_property_filters->add_paths("subject"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(2)); + + // 3. Verify that both the documents are returned since each of them have the + // word 'hello' in at least 1 property. The second document being returned + // confirms that the body field was searched and the specified property + // filters were not applied to the Email schema type. + EXPECT_THAT(results.results(0).document(), + EqualsProto(document_two)); + EXPECT_THAT(results.results(1).document(), + EqualsProto(document_one)); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("body", "Hello World!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + // 2. Issue a query with empty property filter for Message schema. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* message_property_filters = + search_spec->add_type_property_filters(); + message_property_filters->set_schema_type("Message"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + + // 3. Verify that no documents are returned. Although 'hello' is present in + // the indexed document, it shouldn't be returned since the Message property + // filter doesn't allow any properties to be searched. + ASSERT_THAT(results.results(), IsEmpty()); +} + +TEST_P(IcingSearchEngineSearchTest, + SearchWithPropertyFilterHavingInvalidProperty) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Message") + .AddStringProperty("body", "Hello World!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + // 2. Issue a query with property filter having invalid/unknown property for + // Message schema. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* message_property_filters = + search_spec->add_type_property_filters(); + message_property_filters->set_schema_type("Message"); + message_property_filters->add_paths("unknown"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + + // 3. Verify that no documents are returned. Although 'hello' is present in + // the indexed document, it shouldn't be returned since the Message property + // filter doesn't allow any valid properties to be searched. Any + // invalid/unknown properties specified in the property filters will be + // ignored while searching. + ASSERT_THAT(results.results(), IsEmpty()); +} + +TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(), + ProtoIsOk()); + + // 1. Add two email documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Meg Ryan") + .AddStringProperty("emailAddress", "hellogirl@aol.com") + .Build()) + .AddStringProperty("subject", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk()); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty( + "sender", DocumentBuilder() + .SetKey("namespace", "uri2") + .SetSchema("Person") + .AddStringProperty("name", "Tom Hanks") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build()) + .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("body", + "Count all the sheep and tell them 'Hello'.") + .Build(); + ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk()); + + // 2. Issue a query with property filter of sender.emailAddress for the Email + // schema type. + auto search_spec = std::make_unique<SearchSpecProto>(); + search_spec->set_term_match_type(TermMatchType::PREFIX); + search_spec->set_query("hello"); + search_spec->set_search_type(GetParam()); + TypePropertyMask* email_property_filters = + search_spec->add_type_property_filters(); + email_property_filters->set_schema_type("Email"); + email_property_filters->add_paths("sender.emailAddress"); + + auto result_spec = std::make_unique<ResultSpecProto>(); + + auto scoring_spec = std::make_unique<ScoringSpecProto>(); + *scoring_spec = GetDefaultScoringSpec(); + SearchResultProto results = + icing.Search(*search_spec, *scoring_spec, *result_spec); + EXPECT_THAT(results.status(), ProtoIsOk()); + EXPECT_THAT(results.results(), SizeIs(1)); + + // 3. Verify that only the first document is returned since the second + // document doesn't contain the word 'hello' in sender.emailAddress. The first + // document being returned confirms that the nested property + // sender.emailAddress was actually searched. + EXPECT_THAT(results.results(0).document(), + EqualsProto(document_one)); +} + TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) { auto fake_clock = std::make_unique<FakeClock>(); fake_clock->SetTimerElapsedMilliseconds(5); diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc index 227a185..b850a9b 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc @@ -42,8 +42,99 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( : delegate_(std::move(delegate)), document_store_(*document_store), schema_store_(*schema_store), - target_sections_(std::move(target_sections)), - current_time_ms_(current_time_ms) {} + current_time_ms_(current_time_ms) { + type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] = + std::move(target_sections); +} + +DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + const SearchSpecProto& search_spec, + int64_t current_time_ms) + : delegate_(std::move(delegate)), + document_store_(*document_store), + schema_store_(*schema_store), + current_time_ms_(current_time_ms) { + // TODO(b/294274922): Add support for polymorphism in type property filters. + for (const TypePropertyMask& type_property_mask : + search_spec.type_property_filters()) { + type_property_filters_[type_property_mask.schema_type()] = + std::set<std::string>(type_property_mask.paths().begin(), + type_property_mask.paths().end()); + } +} + +DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + std::unordered_map<std::string, std::set<std::string>> + type_property_filters, + std::unordered_map<std::string, SectionIdMask> type_property_masks, + int64_t current_time_ms) + : delegate_(std::move(delegate)), + document_store_(*document_store), + schema_store_(*schema_store), + current_time_ms_(current_time_ms), + type_property_filters_(std::move(type_property_filters)), + type_property_masks_(std::move(type_property_masks)) {} + +SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask( + const std::string& schema_type, + const std::set<std::string>& target_sections) const { + SectionIdMask section_mask = kSectionIdMaskNone; + auto section_metadata_list_or = + schema_store_.GetSectionMetadata(schema_type); + if (!section_metadata_list_or.ok()) { + // The current schema doesn't have section metadata. + return kSectionIdMaskNone; + } + const std::vector<SectionMetadata>* section_metadata_list = + section_metadata_list_or.ValueOrDie(); + for (const SectionMetadata& section_metadata : *section_metadata_list) { + if (target_sections.find(section_metadata.path) != + target_sections.end()) { + section_mask |= UINT64_C(1) << section_metadata.id; + } + } + return section_mask; +} + +SectionIdMask DocHitInfoIteratorSectionRestrict:: + ComputeAndCacheSchemaTypeAllowedSectionsMask( + const std::string& schema_type) { + if (const auto type_property_mask_itr = + type_property_masks_.find(schema_type); + type_property_mask_itr != type_property_masks_.end()) { + return type_property_mask_itr->second; + } + + // Section id mask of schema_type is never calculated before, so + // calculate it here and put it into type_property_masks_. + // - If type property filters of schema_type or wildcard (*) are + // specified, then create a mask according to the filters. + // - Otherwise, create a mask to match all properties. + SectionIdMask new_section_id_mask = kSectionIdMaskAll; + if (const auto itr = type_property_filters_.find(schema_type); + itr != type_property_filters_.end()) { + // Property filters defined for given schema type + new_section_id_mask = GenerateSectionMask( + schema_type, itr->second); + } else if (const auto wildcard_itr = type_property_filters_.find( + std::string(SchemaStore::kSchemaTypeWildcard)); + wildcard_itr != type_property_filters_.end()) { + // Property filters defined for wildcard entry + new_section_id_mask = GenerateSectionMask( + schema_type, wildcard_itr->second); + } else { + // Do not cache the section mask if no property filters apply to this schema + // type to avoid taking up unnecessary space. + return kSectionIdMaskAll; + } + + type_property_masks_[schema_type] = new_section_id_mask; + return new_section_id_mask; +} libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { doc_hit_info_ = DocHitInfo(kInvalidDocumentId); @@ -63,32 +154,32 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() { // Guaranteed that the DocumentFilterData exists at this point SchemaTypeId schema_type_id = data_optional.value().schema_type_id(); - - // A hit can be in multiple sections at once, need to check which of the - // section ids match the target sections - while (section_id_mask != 0) { - // There was a hit in this section id - SectionId section_id = __builtin_ctzll(section_id_mask); - - auto section_metadata_or = - schema_store_.GetSectionMetadata(schema_type_id, section_id); - - if (section_metadata_or.ok()) { - const SectionMetadata* section_metadata = - section_metadata_or.ValueOrDie(); - - if (target_sections_.find(section_metadata->path) != - target_sections_.end()) { - // The hit was in the target section name, return OK/found - hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id; - } - } - - // Mark this section as checked - section_id_mask &= ~(UINT64_C(1) << section_id); + auto schema_type_or = schema_store_.GetSchemaType(schema_type_id); + if (!schema_type_or.ok()) { + // Ran into error retrieving schema type, skip + continue; } + const std::string* schema_type = std::move(schema_type_or).ValueOrDie(); + SectionIdMask allowed_sections_mask = + ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type); - if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) { + // A hit can be in multiple sections at once, need to check which of the + // section ids match the sections allowed by type_property_masks_. This can + // be done by doing a bitwise and of the section_id_mask in the doc hit and + // the allowed_sections_mask. + hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask; + + // Return this document if: + // - the sectionIdMask is not empty after applying property filters, or + // - no property filters apply for its schema type (allowed_sections_mask + // == kSectionIdMaskAll). This is needed to ensure that in case of empty + // query (which uses doc-hit-info-iterator-all-document-id), where + // section_id_mask (and hence hit_intersect_section_ids_mask_) is + // kSectionIdMaskNone, doc hits with no property restrictions don't get + // filtered out. Doc hits for schema types for whom property filters are + // specified will still get filtered out. + if (allowed_sections_mask == kSectionIdMaskAll + || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) { doc_hit_info_ = delegate_->doc_hit_info(); doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_); return libtextclassifier3::Status::OK; @@ -104,16 +195,36 @@ libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode> DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && { ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate, std::move(*delegate_).TrimRightMostNode()); + // TrimRightMostNode is only used by suggestion processor to process query + // expression, so an entry for wildcard should always be present in + // type_property_filters_ when code flow reaches here. If the InternalError + // below is returned, that means TrimRightMostNode hasn't been called in the + // right context. + const auto it = type_property_filters_.find("*"); + if (it == type_property_filters_.end()) { + return absl_ports::InternalError( + "A wildcard entry should always be present in type property filters " + "whenever TrimRightMostNode() is called for " + "DocHitInfoIteratorSectionRestrict"); + } + std::set<std::string>& target_sections = it->second; + if (target_sections.empty()) { + return absl_ports::InternalError( + "Target sections should not be empty whenever TrimRightMostNode() is " + "called for DocHitInfoIteratorSectionRestrict"); + } if (trimmed_delegate.iterator_ == nullptr) { // TODO(b/228240987): Update TrimmedNode and downstream code to handle // multiple section restricts. - trimmed_delegate.target_section_ = std::move(*target_sections_.begin()); + trimmed_delegate.target_section_ = std::move(*target_sections.begin()); return trimmed_delegate; } trimmed_delegate.iterator_ = - std::make_unique<DocHitInfoIteratorSectionRestrict>( + std::unique_ptr<DocHitInfoIteratorSectionRestrict>( + new DocHitInfoIteratorSectionRestrict( std::move(trimmed_delegate.iterator_), &document_store_, - &schema_store_, std::move(target_sections_), current_time_ms_); + &schema_store_, std::move(type_property_filters_), + std::move(type_property_masks_), current_time_ms_)); return std::move(trimmed_delegate); } @@ -126,8 +237,14 @@ int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const { } std::string DocHitInfoIteratorSectionRestrict::ToString() const { - return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","), - "): ", delegate_->ToString()); + std::string output = ""; + for (auto it = type_property_filters_.cbegin(); + it != type_property_filters_.cend(); it++) { + std::string paths = absl_ports::StrJoin(it->second, ","); + output += (it->first) + ":" + (paths) + "; "; + } + std::string result = "{" + output.substr(0, output.size() - 2) + "}: "; + return absl_ports::StrCat(result, delegate_->ToString()); } } // namespace lib diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h index 58dd120..5d44ed7 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h @@ -19,10 +19,13 @@ #include <memory> #include <string> #include <string_view> +#include <unordered_map> #include "icing/text_classifier/lib3/utils/base/status.h" #include "icing/index/iterator/doc-hit-info-iterator.h" #include "icing/schema/schema-store.h" +#include "icing/schema/section.h" +#include "icing/store/document-filter-data.h" #include "icing/store/document-store.h" namespace icing { @@ -44,6 +47,12 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator { const DocumentStore* document_store, const SchemaStore* schema_store, std::set<std::string> target_sections, int64_t current_time_ms); + explicit DocHitInfoIteratorSectionRestrict( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + const SearchSpecProto& search_spec, + int64_t current_time_ms); + libtextclassifier3::Status Advance() override; libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override; @@ -72,12 +81,51 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator { } private: + explicit DocHitInfoIteratorSectionRestrict( + std::unique_ptr<DocHitInfoIterator> delegate, + const DocumentStore* document_store, const SchemaStore* schema_store, + std::unordered_map<std::string, std::set<std::string>> + type_property_filters, + std::unordered_map<std::string, SectionIdMask> type_property_masks, + int64_t current_time_ms); + // Calculates the section mask of allowed sections(determined by the property + // filters map) for the given schema type and caches the same for any future + // calls. + // + // Returns: + // - If type_property_filters_ has an entry for the given schema type or + // wildcard(*), return a bitwise or of section IDs in the schema type that + // that are also present in the relevant filter list. + // - Otherwise, return kSectionIdMaskAll. + SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask( + const std::string& schema_type); + // Generates a section mask for the given schema type and the target sections. + // + // Returns: + // - A bitwise or of section IDs in the schema_type that that are also + // present in the target_sections list. + // - If none of the sections in the schema_type are present in the + // target_sections list, return kSectionIdMaskNone. + // This is done by doing a bitwise or of the target section ids for the given + // schema type. + SectionIdMask GenerateSectionMask(const std::string& schema_type, + const std::set<std::string>& + target_sections) const; + std::unique_ptr<DocHitInfoIterator> delegate_; const DocumentStore& document_store_; const SchemaStore& schema_store_; - - std::set<std::string> target_sections_; int64_t current_time_ms_; + + // Map of property filters per schema type. Supports wildcard(*) for schema + // type that will apply to all schema types that are not specifically + // specified in the mapping otherwise. + std::unordered_map<std::string, std::set<std::string>> + type_property_filters_; + // Mapping of schema type to the section mask of allowed sections for that + // schema type. This section mask is lazily calculated based on the specified + // property filters and cached for any future use. + std::unordered_map<std::string, SectionIdMask> type_property_masks_; }; } // namespace lib diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc index 47f55aa..1500571 100644 --- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc +++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc @@ -186,7 +186,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) { DocHitInfoIteratorSectionRestrict filtered_iterator( std::move(original_iterator_empty), document_store_.get(), - schema_store_.get(), /*target_sections=*/{}, + schema_store_.get(), /*target_sections=*/std::set<std::string>(), fake_clock_.GetSystemTimeMilliseconds()); EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty()); @@ -391,7 +391,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, // Create a hit that exists in a different section, so it shouldn't match any // section filters std::vector<DocHitInfo> doc_hit_infos = { - DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)}; + DocHitInfo(document_id, UINT64_C(1) << not_matching_section_id)}; std::unique_ptr<DocHitInfoIterator> original_iterator = std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos); diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc index 5e0b696..3e43ad9 100644 --- a/icing/query/query-processor.cc +++ b/icing/query/query-processor.cc @@ -176,6 +176,13 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch( results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>( std::move(results.root_iterator), &document_store_, &schema_store_, options, current_time_ms); + // TODO(b/294114230): Move this SectionRestrict filter from root level to + // lower levels if that would improve performance. + if (!search_spec.type_property_filters().empty()) { + results.root_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>( + std::move(results.root_iterator), &document_store_, &schema_store_, + search_spec, current_time_ms); + } return results; } diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc index 5bbf9ba..fa70de1 100644 --- a/icing/query/query-processor_test.cc +++ b/icing/query/query-processor_test.cc @@ -2646,6 +2646,261 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) { EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal")); } +TEST_P(QueryProcessorTest, TypePropertyFilter) { + // Create the schema and document store + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email") + .AddProperty( + PropertyConfigBuilder() + .SetName("foo") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("bar") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("baz") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("message") + .AddProperty( + PropertyConfigBuilder() + .SetName("foo") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("bar") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("baz") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + // SectionIds are assigned in ascending order per schema type, + // alphabetically. + int email_bar_section_id = 0; + int email_baz_section_id = 1; + int email_foo_section_id = 2; + int message_bar_section_id = 0; + int message_baz_section_id = 1; + int message_foo_section_id = 2; + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); + + // These documents don't actually match to the tokens in the index. We're + // inserting the documents to get the appropriate number of documents and + // schema types populated. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema("email") + .Build())); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema("message") + .Build())); + + // Poplate the index + TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; + + // Email document has content "animal" in all sections + ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id, + term_match_type, "animal"), + IsOk()); + + // Message document has content "animal" in all sections + ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id, + term_match_type, "animal"), + IsOk()); + + SearchSpecProto search_spec; + search_spec.set_query("animal"); + search_spec.set_term_match_type(term_match_type); + search_spec.set_search_type(GetParam()); + + // email has property filters for foo and baz properties + TypePropertyMask *email_mask = search_spec.add_type_property_filters(); + email_mask->set_schema_type("email"); + email_mask->add_paths("foo"); + email_mask->add_paths("baz"); + + // message has property filters for bar and baz properties + TypePropertyMask *message_mask = search_spec.add_type_property_filters(); + message_mask->set_schema_type("message"); + message_mask->add_paths("bar"); + message_mask->add_paths("baz"); + + ICING_ASSERT_OK_AND_ASSIGN( + QueryResults results, + query_processor_->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, + fake_clock_.GetSystemTimeMilliseconds())); + + // Ordered by descending DocumentId, so message comes first since it was + // inserted last + DocHitInfo expected_doc_hit_info1(message_document_id); + expected_doc_hit_info1.UpdateSection(message_bar_section_id); + expected_doc_hit_info1.UpdateSection(message_baz_section_id); + DocHitInfo expected_doc_hit_info2(email_document_id); + expected_doc_hit_info2.UpdateSection(email_foo_section_id); + expected_doc_hit_info2.UpdateSection(email_baz_section_id); + EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), + ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2)); + EXPECT_THAT(results.query_term_iterators, SizeIs(1)); + + EXPECT_THAT(results.query_terms, SizeIs(1)); + EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal")); +} + +TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) { + // Create the schema and document store + SchemaProto schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("email") + .AddProperty( + PropertyConfigBuilder() + .SetName("foo") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("bar") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("baz") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(SchemaTypeConfigBuilder().SetType("message") + .AddProperty( + PropertyConfigBuilder() + .SetName("foo") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("bar") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("baz") + .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + // SectionIds are assigned in ascending order per schema type, + // alphabetically. + int email_bar_section_id = 0; + int email_baz_section_id = 1; + int email_foo_section_id = 2; + int message_bar_section_id = 0; + int message_baz_section_id = 1; + int message_foo_section_id = 2; + ASSERT_THAT(schema_store_->SetSchema( + schema, /*ignore_errors_and_delete_documents=*/false, + /*allow_circular_schema_definitions=*/false), + IsOk()); + + // These documents don't actually match to the tokens in the index. We're + // inserting the documents to get the appropriate number of documents and + // schema types populated. + ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "1") + .SetSchema("email") + .Build())); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id, + document_store_->Put(DocumentBuilder() + .SetKey("namespace", "2") + .SetSchema("message") + .Build())); + + // Poplate the index + TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY; + + // Email document has content "animal" in all sections + ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id, + term_match_type, "animal"), + IsOk()); + + // Message document has content "animal" in all sections + ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id, + term_match_type, "animal"), + IsOk()); + ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id, + term_match_type, "animal"), + IsOk()); + + SearchSpecProto search_spec; + // Create a section filter '<section name>:<query term>' + search_spec.set_query("foo:animal"); + search_spec.set_term_match_type(term_match_type); + search_spec.set_search_type(GetParam()); + + // email has property filters for foo and baz properties + TypePropertyMask *email_mask = search_spec.add_type_property_filters(); + email_mask->set_schema_type("email"); + email_mask->add_paths("foo"); + email_mask->add_paths("baz"); + + // message has property filters for bar and baz properties + TypePropertyMask *message_mask = search_spec.add_type_property_filters(); + message_mask->set_schema_type("message"); + message_mask->add_paths("bar"); + message_mask->add_paths("baz"); + + ICING_ASSERT_OK_AND_ASSIGN( + QueryResults results, + query_processor_->ParseSearch( + search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE, + fake_clock_.GetSystemTimeMilliseconds())); + + // Only hits in sections allowed by both the property filters and section + // restricts should be returned. Message document should not be returned since + // section foo specified in the section restrict is not allowed by the + // property filters. + DocHitInfo expected_doc_hit_info(email_document_id); + expected_doc_hit_info.UpdateSection(email_foo_section_id); + EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), + ElementsAre(expected_doc_hit_info)); + EXPECT_THAT(results.query_term_iterators, SizeIs(1)); + + EXPECT_THAT(results.query_terms, SizeIs(1)); + EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal")); +} + TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) { // Create the schema and document store SchemaProto schema = SchemaBuilder() diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index bcacdce..a389d13 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -780,6 +780,17 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId( return schema_type_mapper_->Get(schema_type); } +libtextclassifier3::StatusOr<const std::string*> SchemaStore::GetSchemaType( + SchemaTypeId schema_type_id) const { + ICING_RETURN_IF_ERROR(CheckSchemaSet()); + if (const auto it = reverse_schema_type_mapper_.find(schema_type_id); + it == reverse_schema_type_mapper_.end()) { + return absl_ports::InvalidArgumentError("Invalid schema type id"); + } else { + return &it->second; + } +} + libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*> SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const { ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id, diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h index 6075f5b..88968b1 100644 --- a/icing/schema/schema-store.h +++ b/icing/schema/schema-store.h @@ -276,6 +276,15 @@ class SchemaStore { libtextclassifier3::StatusOr<const SchemaTypeConfigProto*> GetSchemaTypeConfig(std::string_view schema_type) const; + // Returns the schema type of the passed in SchemaTypeId + // + // Returns: + // schema type on success + // FAILED_PRECONDITION if schema hasn't been set yet + // INVALID_ARGUMENT if schema type id is invalid + libtextclassifier3::StatusOr<const std::string*> GetSchemaType( + SchemaTypeId schema_type_id) const; + // Returns the SchemaTypeId of the passed in schema type // // Returns: diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto index 411ad3e..7f4fb3e 100644 --- a/proto/icing/proto/search.proto +++ b/proto/icing/proto/search.proto @@ -27,7 +27,7 @@ option java_multiple_files = true; option objc_class_prefix = "ICNG"; // Client-supplied specifications on what documents to retrieve. -// Next tag: 10 +// Next tag: 11 message SearchSpecProto { // REQUIRED: The "raw" query string that users may type. For example, "cat" // will search for documents with the term cat in it. @@ -102,6 +102,16 @@ message SearchSpecProto { // Finer-grained locks are implemented around code paths that write changes to // Icing during Search. optional bool use_read_only_search = 9 [default = true]; + + // TODO(b/294266822): Handle multiple property filter lists for same schema + // type. + // How to specify a subset of properties to be searched. If no type property + // filter has been specified for a schema type (no TypePropertyMask for the + // given schema type), then *all* properties of that schema type will be + // searched. If an empty property filter is specified for a given schema type + // (TypePropertyMask for the given schema type has empty paths field), no + // properties of that schema type will be searched. + repeated TypePropertyMask type_property_filters = 10; } // Client-supplied specifications on what to include/how to format the search diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt index 58918d3..3ec2c2d 100644 --- a/synced_AOSP_CL_number.txt +++ b/synced_AOSP_CL_number.txt @@ -1 +1 @@ -set(synced_AOSP_CL_number=548180296) +set(synced_AOSP_CL_number=553570527) |