aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYogesh Singh <yosing@google.com>2023-08-07 23:05:11 +0000
committerYogesh Singh <yosing@google.com>2023-08-07 23:30:09 +0000
commit734ee44256733dfb8c6e9426c7c5580e24076761 (patch)
treea52f95e939f183fff409c600208558f6fecb9d58
parentb83a3188c44e60dc20032b92fc08e1caa3427731 (diff)
parent7edd8a42d2ad4b80e95557a7e4c615f97450b968 (diff)
downloadicing-734ee44256733dfb8c6e9426c7c5580e24076761.tar.gz
Merge remote-tracking branch 'aosp/upstream-master' into androidx-main
* aosp/upstream-master: Update Icing from upstream. Descriptions: ======================================================================== Add join test for SetSchemaNewIndexedDocumentPropertyTriggersIndexRestorationAndReturnsOk ======================================================================== Support different sets of properties to search for different schema types. ======================================================================== Do not cache the section mask for a schema type if no property filters apply while processing the type property filters. ======================================================================== Bug: 288310393 Bug: 291019114 Change-Id: If510ff1ef81f95c2dc10027c1faef0bee6b49aa6
-rw-r--r--icing/icing-search-engine_schema_test.cc134
-rw-r--r--icing/icing-search-engine_search_test.cc541
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.cc179
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict.h52
-rw-r--r--icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc4
-rw-r--r--icing/query/query-processor.cc7
-rw-r--r--icing/query/query-processor_test.cc255
-rw-r--r--icing/schema/schema-store.cc11
-rw-r--r--icing/schema/schema-store.h9
-rw-r--r--proto/icing/proto/search.proto12
-rw-r--r--synced_AOSP_CL_number.txt2
11 files changed, 1138 insertions, 68 deletions
diff --git a/icing/icing-search-engine_schema_test.cc b/icing/icing-search-engine_schema_test.cc
index 5076d50..0e88c5a 100644
--- a/icing/icing-search-engine_schema_test.cc
+++ b/icing/icing-search-engine_schema_test.cc
@@ -897,8 +897,17 @@ TEST_F(
// - "age": integer type, indexed. Section id = 0
// - "name": string type, indexed. Section id = 1.
// - "worksFor.name": string type, (nested) indexed. Section id = 2.
+ //
+ // Joinable property id assignment for 'Person':
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
SchemaProto schema_one =
SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Person")
.AddProperty(PropertyConfigBuilder()
@@ -922,38 +931,47 @@ TEST_F(
.SetName("name")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
-
- SetSchemaResultProto set_schema_result = icing.SetSchema(schema_one);
- // Ignore latency numbers. They're covered elsewhere.
- set_schema_result.clear_latency_ms();
- SetSchemaResultProto expected_set_schema_result;
- expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
- expected_set_schema_result.mutable_new_schema_types()->Add("Organization");
- expected_set_schema_result.mutable_new_schema_types()->Add("Person");
- EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result));
-
- DocumentProto personDocument =
+ ASSERT_THAT(icing.SetSchema(schema_one).status(), ProtoIsOk());
+
+ DocumentProto list_document = DocumentBuilder()
+ .SetKey("namespace", "list/1")
+ .SetSchema("List")
+ .SetCreationTimestampMs(1000)
+ .AddStringProperty("title", "title")
+ .Build();
+ DocumentProto person_document =
DocumentBuilder()
.SetKey("namespace", "person/2")
.SetSchema("Person")
.SetCreationTimestampMs(1000)
.AddStringProperty("name", "John")
.AddInt64Property("age", 20)
- .AddDocumentProperty("worksFor",
- DocumentBuilder()
- .SetKey("namespace", "org/1")
- .SetSchema("Organization")
- .AddStringProperty("name", "Google")
- .Build())
+ .AddDocumentProperty(
+ "worksFor", DocumentBuilder()
+ .SetKey("namespace", "org/1")
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Google")
+ .AddStringProperty("listRef", "namespace#list/1")
+ .Build())
.Build();
- EXPECT_THAT(icing.Put(personDocument).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(list_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ ResultSpecProto result_spec = ResultSpecProto::default_instance();
+ result_spec.set_max_joined_children_per_parent_to_return(
+ std::numeric_limits<int32_t>::max());
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- personDocument;
+ person_document;
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
@@ -964,8 +982,7 @@ TEST_F(
search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
SearchResultProto actual_results =
- icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -976,11 +993,44 @@ TEST_F(
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY);
search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
- actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+ // Verify join search: join a query for `title:title` (which will get
+ // list_document) with a child query for `name:John` (which will get
+ // person_document) based on the child's `worksFor.listRef` field.
+ SearchSpecProto search_spec_with_join;
+ search_spec_with_join.set_query("title:title");
+ search_spec_with_join.set_term_match_type(TermMatchType::EXACT_ONLY);
+ JoinSpecProto* join_spec = search_spec_with_join.mutable_join_spec();
+ join_spec->set_parent_property_expression(
+ std::string(JoinProcessor::kQualifiedIdExpr));
+ join_spec->set_child_property_expression("worksFor.listRef");
+ join_spec->set_aggregation_scoring_strategy(
+ JoinSpecProto::AggregationScoringStrategy::COUNT);
+ JoinSpecProto::NestedSpecProto* nested_spec =
+ join_spec->mutable_nested_spec();
+ SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
+ nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
+ nested_search_spec->set_query("name:John");
+ *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
+ *nested_spec->mutable_result_spec() = result_spec;
+
+ SearchResultProto expected_join_search_result_proto;
+ expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ SearchResultProto::ResultProto* result_proto =
+ expected_join_search_result_proto.mutable_results()->Add();
+ *result_proto->mutable_document() = list_document;
+ *result_proto->mutable_joined_results()->Add()->mutable_document() =
+ person_document;
+
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
+
// Change the schema to add another nested document property to 'Person'
//
// New section id assignment for 'Person':
@@ -988,8 +1038,19 @@ TEST_F(
// - "almaMater.name", string type, indexed. Section id = 1
// - "name": string type, indexed. Section id = 2
// - "worksFor.name": string type, (nested) indexed. Section id = 3
+ //
+ // New joinable property id assignment for 'Person':
+ // - "almaMater.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 0.
+ // - "worksFor.listRef": string type, Qualified Id type joinable. Joinable
+ // property id = 1.
SchemaProto schema_two =
SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("List").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("title")
+ .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Person")
.AddProperty(PropertyConfigBuilder()
@@ -1019,6 +1080,11 @@ TEST_F(
.SetName("name")
.SetDataTypeString(TERM_MATCH_PREFIX,
TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("listRef")
+ .SetDataTypeJoinableString(
+ JOINABLE_VALUE_TYPE_QUALIFIED_ID)
.SetCardinality(CARDINALITY_REQUIRED)))
.Build();
@@ -1028,10 +1094,10 @@ TEST_F(
// Index restoration should be triggered here because new schema requires more
// properties to be indexed. Also new section ids will be reassigned and index
// restoration should use new section ids to rebuild.
- set_schema_result = icing.SetSchema(schema_two);
+ SetSchemaResultProto set_schema_result = icing.SetSchema(schema_two);
// Ignore latency numbers. They're covered elsewhere.
set_schema_result.clear_latency_ms();
- expected_set_schema_result = SetSchemaResultProto();
+ SetSchemaResultProto expected_set_schema_result = SetSchemaResultProto();
expected_set_schema_result.mutable_status()->set_code(StatusProto::OK);
expected_set_schema_result.mutable_index_incompatible_changed_schema_types()
->Add("Person");
@@ -1041,8 +1107,8 @@ TEST_F(
// Verify term search:
// Searching for "worksFor.name:Google" should still match document
- actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
@@ -1051,16 +1117,22 @@ TEST_F(
// rebuild was not triggered and Icing is still searching the old index, where
// 'worksFor.name' was indexed at section id 2.
search_spec1.set_query("name:Google");
- actual_results = icing.Search(search_spec1, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec1, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results,
EqualsSearchResultIgnoreStatsAndScores(empty_result));
// Verify numeric (integer) search: should still match document
- actual_results = icing.Search(search_spec2, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
+ actual_results =
+ icing.Search(search_spec2, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
expected_search_result_proto));
+
+ // Verify join search: should still able to join by `worksFor.listRef`
+ actual_results =
+ icing.Search(search_spec_with_join, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_join_search_result_proto));
}
TEST_F(IcingSearchEngineSchemaTest,
diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index fd2c939..451c9ce 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc
@@ -3565,6 +3565,547 @@ TEST_P(IcingSearchEngineSearchTest, SearchWithProjectionMultipleFieldPaths) {
EqualsProto(projected_document_one));
}
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned. Although 'hello' is
+ // present in document_two, it shouldn't be in the result since 'hello' is not
+ // in the specified property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersOnMultipleSchema) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ // Add Person and Organization schema with a property 'name' in both.
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Organization")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("address")
+ .SetDataTypeString(TERM_MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(),
+ ProtoIsOk());
+
+ // 1. Add person document
+ DocumentProto person_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build();
+ ASSERT_THAT(icing.Put(person_document).status(), ProtoIsOk());
+
+ // 1. Add organization document
+ DocumentProto organization_document =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Organization")
+ .AddStringProperty("name", "Meg Corp")
+ .AddStringProperty("address", "Universal street")
+ .Build();
+ ASSERT_THAT(icing.Put(organization_document).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters. Person schema has name in it's
+ // property filter but Organization schema doesn't.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("Meg");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* person_property_filters =
+ search_spec->add_type_property_filters();
+ person_property_filters->set_schema_type("Person");
+ person_property_filters->add_paths("name");
+ TypePropertyMask* organization_property_filters =
+ search_spec->add_type_property_filters();
+ organization_property_filters->set_schema_type("Organization");
+ organization_property_filters->add_paths("address");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the person document is returned. Although 'Meg' is
+ // present in organization document, it shouldn't be in the result since
+ // the name field is not specified in the Organization property filter.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(person_document));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithWildcardPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in either of fields specified in
+ // the property filter. This confirms that the property filters for the
+ // wildcard entry have been applied to the Email schema as well.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithMixedPropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for the
+ // wildcard(*) schema type plus property filters of sender.name and body for
+ // the Email schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* wildcard_property_filters =
+ search_spec->add_type_property_filters();
+ wildcard_property_filters->set_schema_type("*");
+ wildcard_property_filters->add_paths("sender.name");
+ wildcard_property_filters->add_paths("subject");
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("body");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the second document is returned since the first
+ // document doesn't contain the word 'hello' in either of fields sender.name
+ // or body. This confirms that the property filters specified for Email schema
+ // have been applied and the ones specified for wildcard entry have been
+ // ignored.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithNonApplicablePropertyFilters) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filters of sender.name and subject for an
+ // unknown schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("unknown");
+ email_property_filters->add_paths("sender.name");
+ email_property_filters->add_paths("subject");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ // 3. Verify that both the documents are returned since each of them have the
+ // word 'hello' in at least 1 property. The second document being returned
+ // confirms that the body field was searched and the specified property
+ // filters were not applied to the Email schema type.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(),
+ EqualsProto(document_one));
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithEmptyPropertyFilter) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with empty property filter for Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any properties to be searched.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest,
+ SearchWithPropertyFilterHavingInvalidProperty) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Message")
+ .AddStringProperty("body", "Hello World!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter having invalid/unknown property for
+ // Message schema.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* message_property_filters =
+ search_spec->add_type_property_filters();
+ message_property_filters->set_schema_type("Message");
+ message_property_filters->add_paths("unknown");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+
+ // 3. Verify that no documents are returned. Although 'hello' is present in
+ // the indexed document, it shouldn't be returned since the Message property
+ // filter doesn't allow any valid properties to be searched. Any
+ // invalid/unknown properties specified in the property filters will be
+ // ignored while searching.
+ ASSERT_THAT(results.results(), IsEmpty());
+}
+
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersWithNesting) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "hellogirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query with property filter of sender.emailAddress for the Email
+ // schema type.
+ auto search_spec = std::make_unique<SearchSpecProto>();
+ search_spec->set_term_match_type(TermMatchType::PREFIX);
+ search_spec->set_query("hello");
+ search_spec->set_search_type(GetParam());
+ TypePropertyMask* email_property_filters =
+ search_spec->add_type_property_filters();
+ email_property_filters->set_schema_type("Email");
+ email_property_filters->add_paths("sender.emailAddress");
+
+ auto result_spec = std::make_unique<ResultSpecProto>();
+
+ auto scoring_spec = std::make_unique<ScoringSpecProto>();
+ *scoring_spec = GetDefaultScoringSpec();
+ SearchResultProto results =
+ icing.Search(*search_spec, *scoring_spec, *result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that only the first document is returned since the second
+ // document doesn't contain the word 'hello' in sender.emailAddress. The first
+ // document being returned confirms that the nested property
+ // sender.emailAddress was actually searched.
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(document_one));
+}
+
TEST_P(IcingSearchEngineSearchTest, QueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 227a185..b850a9b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -42,8 +42,99 @@ DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- target_sections_(std::move(target_sections)),
- current_time_ms_(current_time_ms) {}
+ current_time_ms_(current_time_ms) {
+ type_property_filters_[std::string(SchemaStore::kSchemaTypeWildcard)] =
+ std::move(target_sections);
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms) {
+ // TODO(b/294274922): Add support for polymorphism in type property filters.
+ for (const TypePropertyMask& type_property_mask :
+ search_spec.type_property_filters()) {
+ type_property_filters_[type_property_mask.schema_type()] =
+ std::set<std::string>(type_property_mask.paths().begin(),
+ type_property_mask.paths().end());
+ }
+}
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms)
+ : delegate_(std::move(delegate)),
+ document_store_(*document_store),
+ schema_store_(*schema_store),
+ current_time_ms_(current_time_ms),
+ type_property_filters_(std::move(type_property_filters)),
+ type_property_masks_(std::move(type_property_masks)) {}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::GenerateSectionMask(
+ const std::string& schema_type,
+ const std::set<std::string>& target_sections) const {
+ SectionIdMask section_mask = kSectionIdMaskNone;
+ auto section_metadata_list_or =
+ schema_store_.GetSectionMetadata(schema_type);
+ if (!section_metadata_list_or.ok()) {
+ // The current schema doesn't have section metadata.
+ return kSectionIdMaskNone;
+ }
+ const std::vector<SectionMetadata>* section_metadata_list =
+ section_metadata_list_or.ValueOrDie();
+ for (const SectionMetadata& section_metadata : *section_metadata_list) {
+ if (target_sections.find(section_metadata.path) !=
+ target_sections.end()) {
+ section_mask |= UINT64_C(1) << section_metadata.id;
+ }
+ }
+ return section_mask;
+}
+
+SectionIdMask DocHitInfoIteratorSectionRestrict::
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type) {
+ if (const auto type_property_mask_itr =
+ type_property_masks_.find(schema_type);
+ type_property_mask_itr != type_property_masks_.end()) {
+ return type_property_mask_itr->second;
+ }
+
+ // Section id mask of schema_type is never calculated before, so
+ // calculate it here and put it into type_property_masks_.
+ // - If type property filters of schema_type or wildcard (*) are
+ // specified, then create a mask according to the filters.
+ // - Otherwise, create a mask to match all properties.
+ SectionIdMask new_section_id_mask = kSectionIdMaskAll;
+ if (const auto itr = type_property_filters_.find(schema_type);
+ itr != type_property_filters_.end()) {
+ // Property filters defined for given schema type
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, itr->second);
+ } else if (const auto wildcard_itr = type_property_filters_.find(
+ std::string(SchemaStore::kSchemaTypeWildcard));
+ wildcard_itr != type_property_filters_.end()) {
+ // Property filters defined for wildcard entry
+ new_section_id_mask = GenerateSectionMask(
+ schema_type, wildcard_itr->second);
+ } else {
+ // Do not cache the section mask if no property filters apply to this schema
+ // type to avoid taking up unnecessary space.
+ return kSectionIdMaskAll;
+ }
+
+ type_property_masks_[schema_type] = new_section_id_mask;
+ return new_section_id_mask;
+}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
@@ -63,32 +154,32 @@ libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
// Guaranteed that the DocumentFilterData exists at this point
SchemaTypeId schema_type_id = data_optional.value().schema_type_id();
-
- // A hit can be in multiple sections at once, need to check which of the
- // section ids match the target sections
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctzll(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
-
- if (target_sections_.find(section_metadata->path) !=
- target_sections_.end()) {
- // The hit was in the target section name, return OK/found
- hit_intersect_section_ids_mask_ |= UINT64_C(1) << section_id;
- }
- }
-
- // Mark this section as checked
- section_id_mask &= ~(UINT64_C(1) << section_id);
+ auto schema_type_or = schema_store_.GetSchemaType(schema_type_id);
+ if (!schema_type_or.ok()) {
+ // Ran into error retrieving schema type, skip
+ continue;
}
+ const std::string* schema_type = std::move(schema_type_or).ValueOrDie();
+ SectionIdMask allowed_sections_mask =
+ ComputeAndCacheSchemaTypeAllowedSectionsMask(*schema_type);
- if (hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
+ // A hit can be in multiple sections at once, need to check which of the
+ // section ids match the sections allowed by type_property_masks_. This can
+ // be done by doing a bitwise and of the section_id_mask in the doc hit and
+ // the allowed_sections_mask.
+ hit_intersect_section_ids_mask_ = section_id_mask & allowed_sections_mask;
+
+ // Return this document if:
+ // - the sectionIdMask is not empty after applying property filters, or
+ // - no property filters apply for its schema type (allowed_sections_mask
+ // == kSectionIdMaskAll). This is needed to ensure that in case of empty
+ // query (which uses doc-hit-info-iterator-all-document-id), where
+ // section_id_mask (and hence hit_intersect_section_ids_mask_) is
+ // kSectionIdMaskNone, doc hits with no property restrictions don't get
+ // filtered out. Doc hits for schema types for whom property filters are
+ // specified will still get filtered out.
+ if (allowed_sections_mask == kSectionIdMaskAll
+ || hit_intersect_section_ids_mask_ != kSectionIdMaskNone) {
doc_hit_info_ = delegate_->doc_hit_info();
doc_hit_info_.set_hit_section_ids_mask(hit_intersect_section_ids_mask_);
return libtextclassifier3::Status::OK;
@@ -104,16 +195,36 @@ libtextclassifier3::StatusOr<DocHitInfoIterator::TrimmedNode>
DocHitInfoIteratorSectionRestrict::TrimRightMostNode() && {
ICING_ASSIGN_OR_RETURN(TrimmedNode trimmed_delegate,
std::move(*delegate_).TrimRightMostNode());
+ // TrimRightMostNode is only used by suggestion processor to process query
+ // expression, so an entry for wildcard should always be present in
+ // type_property_filters_ when code flow reaches here. If the InternalError
+ // below is returned, that means TrimRightMostNode hasn't been called in the
+ // right context.
+ const auto it = type_property_filters_.find("*");
+ if (it == type_property_filters_.end()) {
+ return absl_ports::InternalError(
+ "A wildcard entry should always be present in type property filters "
+ "whenever TrimRightMostNode() is called for "
+ "DocHitInfoIteratorSectionRestrict");
+ }
+ std::set<std::string>& target_sections = it->second;
+ if (target_sections.empty()) {
+ return absl_ports::InternalError(
+ "Target sections should not be empty whenever TrimRightMostNode() is "
+ "called for DocHitInfoIteratorSectionRestrict");
+ }
if (trimmed_delegate.iterator_ == nullptr) {
// TODO(b/228240987): Update TrimmedNode and downstream code to handle
// multiple section restricts.
- trimmed_delegate.target_section_ = std::move(*target_sections_.begin());
+ trimmed_delegate.target_section_ = std::move(*target_sections.begin());
return trimmed_delegate;
}
trimmed_delegate.iterator_ =
- std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::unique_ptr<DocHitInfoIteratorSectionRestrict>(
+ new DocHitInfoIteratorSectionRestrict(
std::move(trimmed_delegate.iterator_), &document_store_,
- &schema_store_, std::move(target_sections_), current_time_ms_);
+ &schema_store_, std::move(type_property_filters_),
+ std::move(type_property_masks_), current_time_ms_));
return std::move(trimmed_delegate);
}
@@ -126,8 +237,14 @@ int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
}
std::string DocHitInfoIteratorSectionRestrict::ToString() const {
- return absl_ports::StrCat("(", absl_ports::StrJoin(target_sections_, ","),
- "): ", delegate_->ToString());
+ std::string output = "";
+ for (auto it = type_property_filters_.cbegin();
+ it != type_property_filters_.cend(); it++) {
+ std::string paths = absl_ports::StrJoin(it->second, ",");
+ output += (it->first) + ":" + (paths) + "; ";
+ }
+ std::string result = "{" + output.substr(0, output.size() - 2) + "}: ";
+ return absl_ports::StrCat(result, delegate_->ToString());
}
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index 58dd120..5d44ed7 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -19,10 +19,13 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
#include "icing/store/document-store.h"
namespace icing {
@@ -44,6 +47,12 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
const DocumentStore* document_store, const SchemaStore* schema_store,
std::set<std::string> target_sections, int64_t current_time_ms);
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ const SearchSpecProto& search_spec,
+ int64_t current_time_ms);
+
libtextclassifier3::Status Advance() override;
libtextclassifier3::StatusOr<TrimmedNode> TrimRightMostNode() && override;
@@ -72,12 +81,51 @@ class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
}
private:
+ explicit DocHitInfoIteratorSectionRestrict(
+ std::unique_ptr<DocHitInfoIterator> delegate,
+ const DocumentStore* document_store, const SchemaStore* schema_store,
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters,
+ std::unordered_map<std::string, SectionIdMask> type_property_masks,
+ int64_t current_time_ms);
+ // Calculates the section mask of allowed sections(determined by the property
+ // filters map) for the given schema type and caches the same for any future
+ // calls.
+ //
+ // Returns:
+ // - If type_property_filters_ has an entry for the given schema type or
+ // wildcard(*), return a bitwise or of section IDs in the schema type that
+ // that are also present in the relevant filter list.
+ // - Otherwise, return kSectionIdMaskAll.
+ SectionIdMask ComputeAndCacheSchemaTypeAllowedSectionsMask(
+ const std::string& schema_type);
+ // Generates a section mask for the given schema type and the target sections.
+ //
+ // Returns:
+ // - A bitwise or of section IDs in the schema_type that that are also
+ // present in the target_sections list.
+ // - If none of the sections in the schema_type are present in the
+ // target_sections list, return kSectionIdMaskNone.
+ // This is done by doing a bitwise or of the target section ids for the given
+ // schema type.
+ SectionIdMask GenerateSectionMask(const std::string& schema_type,
+ const std::set<std::string>&
+ target_sections) const;
+
std::unique_ptr<DocHitInfoIterator> delegate_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
-
- std::set<std::string> target_sections_;
int64_t current_time_ms_;
+
+ // Map of property filters per schema type. Supports wildcard(*) for schema
+ // type that will apply to all schema types that are not specifically
+ // specified in the mapping otherwise.
+ std::unordered_map<std::string, std::set<std::string>>
+ type_property_filters_;
+ // Mapping of schema type to the section mask of allowed sections for that
+ // schema type. This section mask is lazily calculated based on the specified
+ // property filters and cached for any future use.
+ std::unordered_map<std::string, SectionIdMask> type_property_masks_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 47f55aa..1500571 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -186,7 +186,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
DocHitInfoIteratorSectionRestrict filtered_iterator(
std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), /*target_sections=*/{},
+ schema_store_.get(), /*target_sections=*/std::set<std::string>(),
fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
@@ -391,7 +391,7 @@ TEST_F(DocHitInfoIteratorSectionRestrictTest,
// Create a hit that exists in a different section, so it shouldn't match any
// section filters
std::vector<DocHitInfo> doc_hit_infos = {
- DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)};
+ DocHitInfo(document_id, UINT64_C(1) << not_matching_section_id)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 5e0b696..3e43ad9 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -176,6 +176,13 @@ libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
std::move(results.root_iterator), &document_store_, &schema_store_,
options, current_time_ms);
+ // TODO(b/294114230): Move this SectionRestrict filter from root level to
+ // lower levels if that would improve performance.
+ if (!search_spec.type_property_filters().empty()) {
+ results.root_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+ std::move(results.root_iterator), &document_store_, &schema_store_,
+ search_spec, current_time_ms);
+ }
return results;
}
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 5bbf9ba..fa70de1 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -2646,6 +2646,261 @@ TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
+TEST_P(QueryProcessorTest, TypePropertyFilter) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Ordered by descending DocumentId, so message comes first since it was
+ // inserted last
+ DocHitInfo expected_doc_hit_info1(message_document_id);
+ expected_doc_hit_info1.UpdateSection(message_bar_section_id);
+ expected_doc_hit_info1.UpdateSection(message_baz_section_id);
+ DocHitInfo expected_doc_hit_info2(email_document_id);
+ expected_doc_hit_info2.UpdateSection(email_foo_section_id);
+ expected_doc_hit_info2.UpdateSection(email_baz_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) {
+ // Create the schema and document store
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("baz")
+ .SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
+ int email_bar_section_id = 0;
+ int email_baz_section_id = 1;
+ int email_foo_section_id = 2;
+ int message_bar_section_id = 0;
+ int message_baz_section_id = 1;
+ int message_foo_section_id = 2;
+ ASSERT_THAT(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/false,
+ /*allow_circular_schema_definitions=*/false),
+ IsOk());
+
+ // These documents don't actually match to the tokens in the index. We're
+ // inserting the documents to get the appropriate number of documents and
+ // schema types populated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "2")
+ .SetSchema("message")
+ .Build()));
+
+ // Poplate the index
+ TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+ // Email document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ // Message document has content "animal" in all sections
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
+ term_match_type, "animal"),
+ IsOk());
+ ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
+ term_match_type, "animal"),
+ IsOk());
+
+ SearchSpecProto search_spec;
+ // Create a section filter '<section name>:<query term>'
+ search_spec.set_query("foo:animal");
+ search_spec.set_term_match_type(term_match_type);
+ search_spec.set_search_type(GetParam());
+
+ // email has property filters for foo and baz properties
+ TypePropertyMask *email_mask = search_spec.add_type_property_filters();
+ email_mask->set_schema_type("email");
+ email_mask->add_paths("foo");
+ email_mask->add_paths("baz");
+
+ // message has property filters for bar and baz properties
+ TypePropertyMask *message_mask = search_spec.add_type_property_filters();
+ message_mask->set_schema_type("message");
+ message_mask->add_paths("bar");
+ message_mask->add_paths("baz");
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ QueryResults results,
+ query_processor_->ParseSearch(
+ search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+ fake_clock_.GetSystemTimeMilliseconds()));
+
+ // Only hits in sections allowed by both the property filters and section
+ // restricts should be returned. Message document should not be returned since
+ // section foo specified in the section restrict is not allowed by the
+ // property filters.
+ DocHitInfo expected_doc_hit_info(email_document_id);
+ expected_doc_hit_info.UpdateSection(email_foo_section_id);
+ EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+ ElementsAre(expected_doc_hit_info));
+ EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+
+ EXPECT_THAT(results.query_terms, SizeIs(1));
+ EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+}
+
TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index bcacdce..a389d13 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -780,6 +780,17 @@ libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
return schema_type_mapper_->Get(schema_type);
}
+libtextclassifier3::StatusOr<const std::string*> SchemaStore::GetSchemaType(
+ SchemaTypeId schema_type_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
+ if (const auto it = reverse_schema_type_mapper_.find(schema_type_id);
+ it == reverse_schema_type_mapper_.end()) {
+ return absl_ports::InvalidArgumentError("Invalid schema type id");
+ } else {
+ return &it->second;
+ }
+}
+
libtextclassifier3::StatusOr<const std::unordered_set<SchemaTypeId>*>
SchemaStore::GetSchemaTypeIdsWithChildren(std::string_view schema_type) const {
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 6075f5b..88968b1 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -276,6 +276,15 @@ class SchemaStore {
libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
GetSchemaTypeConfig(std::string_view schema_type) const;
+ // Returns the schema type of the passed in SchemaTypeId
+ //
+ // Returns:
+ // schema type on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
+ // INVALID_ARGUMENT if schema type id is invalid
+ libtextclassifier3::StatusOr<const std::string*> GetSchemaType(
+ SchemaTypeId schema_type_id) const;
+
// Returns the SchemaTypeId of the passed in schema type
//
// Returns:
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 411ad3e..7f4fb3e 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -27,7 +27,7 @@ option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Client-supplied specifications on what documents to retrieve.
-// Next tag: 10
+// Next tag: 11
message SearchSpecProto {
// REQUIRED: The "raw" query string that users may type. For example, "cat"
// will search for documents with the term cat in it.
@@ -102,6 +102,16 @@ message SearchSpecProto {
// Finer-grained locks are implemented around code paths that write changes to
// Icing during Search.
optional bool use_read_only_search = 9 [default = true];
+
+ // TODO(b/294266822): Handle multiple property filter lists for same schema
+ // type.
+ // How to specify a subset of properties to be searched. If no type property
+ // filter has been specified for a schema type (no TypePropertyMask for the
+ // given schema type), then *all* properties of that schema type will be
+ // searched. If an empty property filter is specified for a given schema type
+ // (TypePropertyMask for the given schema type has empty paths field), no
+ // properties of that schema type will be searched.
+ repeated TypePropertyMask type_property_filters = 10;
}
// Client-supplied specifications on what to include/how to format the search
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 58918d3..3ec2c2d 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=548180296)
+set(synced_AOSP_CL_number=553570527)