diff options
author | Tim Barron <tjbarron@google.com> | 2021-07-02 19:21:27 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2021-07-02 19:21:27 +0000 |
commit | 914dcccaf0ea341cbd6e7fcc0969f086acb5ca10 (patch) | |
tree | c7c6251292221fbdc8ce32a3caba71e3ed1ee18c | |
parent | 5131721fabf167f05e41373e85e5d008eecfe9c3 (diff) | |
parent | 8ed9ccef5cd49d935a322118c8697da411f4ae51 (diff) | |
download | icing-914dcccaf0ea341cbd6e7fcc0969f086acb5ca10.tar.gz |
Merge remote-tracking branch 'goog/androidx-platform-dev' am: 8ed9ccef5c
Original change: https://googleplex-android-review.googlesource.com/c/platform/external/icing/+/15162658
Change-Id: Ic95a07cedf0aa2119225792420a06db00b88588c
-rw-r--r-- | icing/icing-search-engine.cc | 8 | ||||
-rw-r--r-- | icing/icing-search-engine_test.cc | 401 | ||||
-rw-r--r-- | icing/schema/schema-store.cc | 6 | ||||
-rw-r--r-- | icing/schema/schema-store_test.cc | 69 | ||||
-rw-r--r-- | icing/schema/schema-util.cc | 322 | ||||
-rw-r--r-- | icing/schema/schema-util.h | 17 | ||||
-rw-r--r-- | icing/schema/schema-util_test.cc | 535 |
7 files changed, 1218 insertions, 140 deletions
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index e9865e4..cf998bf 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -508,12 +508,6 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( return result_proto; } - libtextclassifier3::Status status = SchemaUtil::Validate(new_schema); - if (!status.ok()) { - TransformStatus(status, result_status); - return result_proto; - } - auto lost_previous_schema_or = LostPreviousSchema(); if (!lost_previous_schema_or.ok()) { TransformStatus(lost_previous_schema_or.status(), result_status); @@ -549,6 +543,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema( result_proto.add_incompatible_schema_types(incompatible_type); } + libtextclassifier3::Status status; if (set_schema_result.success) { if (lost_previous_schema) { // No previous schema to calculate a diff against. We have to go through @@ -911,6 +906,7 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats(); delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY); + std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); libtextclassifier3::Status status = ValidateSearchSpec(search_spec, performance_configuration_); diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 752e0e2..5760614 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -1053,6 +1053,407 @@ TEST_F(IcingSearchEngineTest, expected_search_result_proto)); } +TEST_F(IcingSearchEngineTest, + SetSchemaChangeNestedPropertiesTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaTypeConfigProto person_proto = + SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty(PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto nested_schema = + SchemaBuilder() + .AddType(person_proto) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Person") + .AddStringProperty("name", "Bill Lundbergh") + .Build()) + .Build(); + + // "sender.name" should get assigned property id 0 and subject should get + // property id 1. + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // document should match a query for 'Bill' in 'sender.name', but not in + // 'subject' + SearchSpecProto search_spec; + search_spec.set_query("sender.name:Bill"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + SearchResultProto empty_result; + empty_result.mutable_status()->set_code(StatusProto::OK); + search_spec.set_query("subject:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + + // Now update the schema with index_nested_properties=false. This should + // reassign property ids, lead to an index rebuild and ensure that nothing + // match a query for "Bill". + SchemaProto no_nested_schema = + SchemaBuilder() + .AddType(person_proto) + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty(PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument( + "Person", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema(no_nested_schema); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // document shouldn't match a query for 'Bill' in either 'sender.name' or + // 'subject' + search_spec.set_query("sender.name:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); + + search_spec.set_query("subject:Bill"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStatsAndScores(empty_result)); +} + +TEST_F(IcingSearchEngineTest, + ForceSetSchemaPropertyDeletionTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 'body' should have a property id of 0 and 'subject' should have a property + // id of 1. + SchemaProto email_with_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = + icing.SetSchema(email_with_body_schema); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create a document with only a subject property. + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + SearchSpecProto search_spec; + search_spec.set_query("subject:tps"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + // Now update the schema to remove the 'body' field. This is backwards + // incompatible, but document should be preserved because it doesn't contain a + // 'body' field. If the index is correctly rebuilt, then 'subject' will now + // have a property id of 0. If not, then the hits in the index will still have + // have a property id of 1 and therefore it won't be found. + SchemaProto email_no_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Email").AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema( + email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + search_spec.set_query("subject:tps"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); +} + +TEST_F( + IcingSearchEngineTest, + ForceSetSchemaPropertyDeletionAndAdditionTriggersIndexRestorationAndReturnsOk) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + // 'body' should have a property id of 0 and 'subject' should have a property + // id of 1. + SchemaProto email_with_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("body") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SetSchemaResultProto set_schema_result = + icing.SetSchema(email_with_body_schema); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create a document with only a subject property. + DocumentProto document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .Build(); + EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + SearchSpecProto search_spec; + search_spec.set_query("subject:tps"); + search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); + + SearchResultProto result; + result.mutable_status()->set_code(StatusProto::OK); + *result.mutable_results()->Add()->mutable_document() = document; + + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); + + // Now update the schema to remove the 'body' field. This is backwards + // incompatible, but document should be preserved because it doesn't contain a + // 'body' field. If the index is correctly rebuilt, then 'subject' and 'to' + // will now have property ids of 0 and 1 respectively. If not, then the hits + // in the index will still have have a property id of 1 and therefore it won't + // be found. + SchemaProto email_no_body_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("to") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + set_schema_result = icing.SetSchema( + email_no_body_schema, /*ignore_errors_and_delete_documents=*/true); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // We should be able to retrieve the document by searching for 'tps' in + // 'subject'. + search_spec.set_query("subject:tps"); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(result)); +} + +TEST_F(IcingSearchEngineTest, ForceSetSchemaIncompatibleNestedDocsAreDeleted) { + IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + + SchemaTypeConfigProto email_schema_type = + SchemaTypeConfigBuilder() + .SetType("Email") + .AddProperty( + PropertyConfigBuilder() + .SetName("sender") + .SetDataTypeDocument("Person", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto nested_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType("Person") + .AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("company") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(email_schema_type) + .Build(); + + SetSchemaResultProto set_schema_result = icing.SetSchema(nested_schema); + SetSchemaResultProto expected_set_schema_result; + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Create two documents - a person document and an email document - both docs + // should be deleted when we remove the 'company' field from the person type. + DocumentProto person_document = + DocumentBuilder() + .SetKey("namespace1", "uri1") + .SetSchema("Person") + .SetCreationTimestampMs(1000) + .AddStringProperty("name", "Bill Lundbergh") + .AddStringProperty("company", "Initech Corp.") + .Build(); + EXPECT_THAT(icing.Put(person_document).status(), ProtoIsOk()); + + DocumentProto email_document = + DocumentBuilder() + .SetKey("namespace1", "uri2") + .SetSchema("Email") + .SetCreationTimestampMs(1000) + .AddStringProperty("subject", + "Did you get the memo about TPS reports?") + .AddDocumentProperty("sender", person_document) + .Build(); + EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); + + // We should be able to retrieve both documents. + GetResultProto get_result = + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoIsOk()); + EXPECT_THAT(get_result.document(), EqualsProto(person_document)); + + get_result = + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoIsOk()); + EXPECT_THAT(get_result.document(), EqualsProto(email_document)); + + // Now update the schema to remove the 'company' field. This is backwards + // incompatible, *both* documents should be deleted because both fail + // validation (they each contain a 'Person' that has a non-existent property). + nested_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( + PropertyConfigBuilder() + .SetName("name") + .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .AddType(email_schema_type) + .Build(); + + set_schema_result = icing.SetSchema( + nested_schema, /*ignore_errors_and_delete_documents=*/true); + expected_set_schema_result = SetSchemaResultProto(); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Person"); + expected_set_schema_result.mutable_incompatible_schema_types()->Add("Email"); + expected_set_schema_result.mutable_status()->set_code(StatusProto::OK); + EXPECT_THAT(set_schema_result, EqualsProto(expected_set_schema_result)); + + // Both documents should be deleted now. + get_result = + icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); + + get_result = + icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()); + EXPECT_THAT(get_result.status(), ProtoStatusIs(StatusProto::NOT_FOUND)); +} + TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc index 7040a31..e9ba654 100644 --- a/icing/schema/schema-store.cc +++ b/icing/schema/schema-store.cc @@ -322,6 +322,9 @@ SchemaStore::SetSchema(const SchemaProto& new_schema, libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult> SchemaStore::SetSchema(SchemaProto&& new_schema, bool ignore_errors_and_delete_documents) { + ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap new_dependency_map, + SchemaUtil::Validate(new_schema)); + SetSchemaResult result; auto schema_proto_or = GetSchema(); @@ -345,7 +348,8 @@ SchemaStore::SetSchema(SchemaProto&& new_schema, // Different schema, track the differences and see if we can still write it SchemaUtil::SchemaDelta schema_delta = - SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema); + SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + new_dependency_map); // An incompatible index is fine, we can just reindex result.index_incompatible = schema_delta.index_incompatible; diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc index 69663b5..5ef2dea 100644 --- a/icing/schema/schema-store_test.cc +++ b/icing/schema/schema-store_test.cc @@ -49,6 +49,8 @@ using ::testing::Pointee; constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL = PropertyConfigProto_Cardinality_Code_OPTIONAL; +constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED = + PropertyConfigProto_Cardinality_Code_REPEATED; constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN = StringIndexingConfig_TokenizerType_Code_PLAIN; @@ -522,6 +524,73 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) { EXPECT_THAT(*actual_schema, EqualsProto(schema)); } +TEST_F(SchemaStoreTest, IndexNestedDocumentsChangeRequiresReindexingOk) { + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_)); + + // Make two schemas. One that sets index_nested_properties to false and one + // that sets it to true. + SchemaTypeConfigProto email_type_config = + SchemaTypeConfigBuilder() + .SetType("email") + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto no_nested_index_schema = + SchemaBuilder() + .AddType(email_type_config) + .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty( + PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument("email", + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + SchemaProto nested_index_schema = + SchemaBuilder() + .AddType(email_type_config) + .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty( + PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument("email", + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + // Set schema with index_nested_properties=false to start. + SchemaStore::SetSchemaResult result; + result.success = true; + EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), + IsOkAndHolds(EqualsSetSchemaResult(result))); + ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema, + schema_store->GetSchema()); + EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema)); + + // Set schema with index_nested_properties=true and confirm that the change to + // 'person' is index incompatible. + result = SchemaStore::SetSchemaResult(); + result.success = true; + result.index_incompatible = true; + EXPECT_THAT(schema_store->SetSchema(nested_index_schema), + IsOkAndHolds(EqualsSetSchemaResult(result))); + ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); + EXPECT_THAT(*actual_schema, EqualsProto(nested_index_schema)); + + // Set schema with index_nested_properties=false and confirm that the change + // to 'person' is index incompatible. + result = SchemaStore::SetSchemaResult(); + result.success = true; + result.index_incompatible = true; + EXPECT_THAT(schema_store->SetSchema(no_nested_index_schema), + IsOkAndHolds(EqualsSetSchemaResult(result))); + ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema()); + EXPECT_THAT(*actual_schema, EqualsProto(no_nested_index_schema)); +} + TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) { ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<SchemaStore> schema_store, diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc index 49e7096..cabe76d 100644 --- a/icing/schema/schema-util.cc +++ b/icing/schema/schema-util.cc @@ -95,43 +95,175 @@ bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed, } // namespace -libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) { - // Tracks SchemaTypeConfigs that we've validated already. - std::unordered_set<std::string_view> known_schema_types; +libtextclassifier3::Status ExpandTranstiveDependencies( + const SchemaUtil::DependencyMap& child_to_direct_parent_map, + std::string_view type, + SchemaUtil::DependencyMap* expanded_child_to_parent_map, + std::unordered_set<std::string_view>* pending_expansions, + std::unordered_set<std::string_view>* orphaned_types) { + auto expanded_itr = expanded_child_to_parent_map->find(type); + if (expanded_itr != expanded_child_to_parent_map->end()) { + // We've already expanded this type. Just return. + return libtextclassifier3::Status::OK; + } + auto itr = child_to_direct_parent_map.find(type); + if (itr == child_to_direct_parent_map.end()) { + // It's an orphan. Just return. + orphaned_types->insert(type); + return libtextclassifier3::Status::OK; + } + pending_expansions->insert(type); + std::unordered_set<std::string_view> expanded_dependencies; + + // Add all of the direct parent dependencies. + expanded_dependencies.reserve(itr->second.size()); + expanded_dependencies.insert(itr->second.begin(), itr->second.end()); + + // Iterate through each direct parent and add their indirect parents. + for (std::string_view dep : itr->second) { + // 1. Check if we're in the middle of expanding this type - IOW there's a + // cycle! + if (pending_expansions->count(dep) > 0) { + return absl_ports::InvalidArgumentError( + absl_ports::StrCat("Infinite loop detected in type configs. '", type, + "' references itself.")); + } - // Tracks SchemaTypeConfigs that have been mentioned (by other - // SchemaTypeConfigs), but we haven't validated yet. - std::unordered_set<std::string_view> unknown_schema_types; + // 2. Expand this type as needed. + ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies( + child_to_direct_parent_map, dep, expanded_child_to_parent_map, + pending_expansions, orphaned_types)); + if (orphaned_types->count(dep) > 0) { + // Dep is an orphan. Just skip to the next dep. + continue; + } - // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated - // already. - std::unordered_set<std::string_view> known_property_names; + // 3. Dep has been fully expanded. Add all of its dependencies to this + // type's dependencies. + auto dep_expanded_itr = expanded_child_to_parent_map->find(dep); + expanded_dependencies.reserve(expanded_dependencies.size() + + dep_expanded_itr->second.size()); + expanded_dependencies.insert(dep_expanded_itr->second.begin(), + dep_expanded_itr->second.end()); + } + expanded_child_to_parent_map->insert( + {type, std::move(expanded_dependencies)}); + pending_expansions->erase(type); + return libtextclassifier3::Status::OK; +} - // Tracks which schemas reference other schemas. This is used to detect - // infinite loops between indexed schema references (e.g. A -> B -> C -> A). - // We could get into an infinite loop while trying to assign section ids. - // - // The key is the "child" schema that is being referenced within another - // schema. - // The value is a set of all the direct/indirect "parent" schemas that - // reference the "child" schema. - // - // For example, if A has a nested document property of type B, then A is the - // "parent" and B is the "child" and so schema_references will contain - // schema_references[B] == {A}. - std::unordered_map<std::string_view, std::unordered_set<std::string_view>> - schema_references; +// Expands the dependencies represented by the child_to_direct_parent_map to +// also include indirect parents. +// +// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of +// type B and B has a property of type C. C and D only have non-document +// properties. +// +// The child to direct parent dependency map for this schema would be: +// C -> B +// B -> A +// +// This function would expand it so that A is also present as an indirect parent +// of C. +libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> +ExpandTranstiveDependencies( + const SchemaUtil::DependencyMap& child_to_direct_parent_map) { + SchemaUtil::DependencyMap expanded_child_to_parent_map; + + // Types that we are expanding. + std::unordered_set<std::string_view> pending_expansions; + + // Types that have no parents that depend on them. + std::unordered_set<std::string_view> orphaned_types; + for (const auto& kvp : child_to_direct_parent_map) { + ICING_RETURN_IF_ERROR(ExpandTranstiveDependencies( + child_to_direct_parent_map, kvp.first, &expanded_child_to_parent_map, + &pending_expansions, &orphaned_types)); + } + return expanded_child_to_parent_map; +} +// Builds a transitive child-parent dependency map. 'Orphaned' types (types with +// no parents) will not be present in the map. +// +// Ex. Suppose we have a schema with four types A, B, C, D. A has a property of +// type B and B has a property of type C. C and D only have non-document +// properties. +// +// The transitive child-parent dependency map for this schema would be: +// C -> A, B +// B -> A +// +// A and D would be considered orphaned properties because no type refers to +// them. +// +// RETURNS: +// On success, a transitive child-parent dependency map of all types in the +// schema. +// INVALID_ARGUMENT if the schema contains a cycle or an undefined type. +// ALREADY_EXISTS if a schema type is specified more than once in the schema +libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> +BuildTransitiveDependencyGraph(const SchemaProto& schema) { + // Child to parent map. + SchemaUtil::DependencyMap child_to_direct_parent_map; + + // Add all first-order dependencies. + std::unordered_set<std::string_view> known_types; + std::unordered_set<std::string_view> unknown_types; for (const auto& type_config : schema.types()) { std::string_view schema_type(type_config.schema_type()); - ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type)); - - // We can't have duplicate schema_types - if (!known_schema_types.insert(schema_type).second) { + if (known_types.count(schema_type) > 0) { return absl_ports::AlreadyExistsError(absl_ports::StrCat( "Field 'schema_type' '", schema_type, "' is already defined")); } - unknown_schema_types.erase(schema_type); + known_types.insert(schema_type); + unknown_types.erase(schema_type); + for (const auto& property_config : type_config.properties()) { + if (property_config.data_type() == + PropertyConfigProto::DataType::DOCUMENT) { + // Need to know what schema_type these Document properties should be + // validated against + std::string_view property_schema_type(property_config.schema_type()); + if (property_schema_type == schema_type) { + return absl_ports::InvalidArgumentError( + absl_ports::StrCat("Infinite loop detected in type configs. '", + schema_type, "' references itself.")); + } + if (known_types.count(property_schema_type) == 0) { + unknown_types.insert(property_schema_type); + } + auto itr = child_to_direct_parent_map.find(property_schema_type); + if (itr == child_to_direct_parent_map.end()) { + child_to_direct_parent_map.insert( + {property_schema_type, std::unordered_set<std::string_view>()}); + itr = child_to_direct_parent_map.find(property_schema_type); + } + itr->second.insert(schema_type); + } + } + } + if (!unknown_types.empty()) { + return absl_ports::InvalidArgumentError(absl_ports::StrCat( + "Undefined 'schema_type's: ", absl_ports::StrJoin(unknown_types, ","))); + } + return ExpandTranstiveDependencies(child_to_direct_parent_map); +} + +libtextclassifier3::StatusOr<SchemaUtil::DependencyMap> SchemaUtil::Validate( + const SchemaProto& schema) { + // 1. Build the dependency map. This will detect any cycles, non-existent or + // duplicate types in the schema. + ICING_ASSIGN_OR_RETURN(SchemaUtil::DependencyMap dependency_map, + BuildTransitiveDependencyGraph(schema)); + + // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated + // already. + std::unordered_set<std::string_view> known_property_names; + + // 2. Validate the properties of each type. + for (const auto& type_config : schema.types()) { + std::string_view schema_type(type_config.schema_type()); + ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type)); // We only care about properties being unique within one type_config known_property_names.clear(); @@ -164,56 +296,6 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) { "data_types in schema property '", schema_type, ".", property_name, "'")); } - - if (property_schema_type == schema_type) { - // The schema refers to itself. This also causes a infinite loop. - // - // TODO(b/171996137): When clients can opt out of indexing document - // properties, then we don't need to do this if the document property - // isn't indexed. We only care about infinite loops while we're trying - // to assign section ids for indexing. - return absl_ports::InvalidArgumentError( - absl_ports::StrCat("Infinite loop detected in type configs. '", - schema_type, "' references itself.")); - } - - // Need to make sure we eventually see/validate this schema_type - if (known_schema_types.count(property_schema_type) == 0) { - unknown_schema_types.insert(property_schema_type); - } - - // Start tracking the parent schemas that references this nested schema - // for infinite loop detection. - // - // TODO(b/171996137): When clients can opt out of indexing document - // properties, then we don't need to do this if the document property - // isn't indexed. We only care about infinite loops while we're trying - // to assign section ids for indexing. - std::unordered_set<std::string_view> parent_schemas; - parent_schemas.insert(schema_type); - - for (const auto& parent : parent_schemas) { - // Check for any indirect parents - auto indirect_parents_iter = schema_references.find(parent); - if (indirect_parents_iter == schema_references.end()) { - continue; - } - - // Our "parent" schema has parents as well. They're our indirect - // parents now. - for (const std::string_view& indirect_parent : - indirect_parents_iter->second) { - if (indirect_parent == property_schema_type) { - // We're our own indirect parent! Infinite loop found. - return absl_ports::InvalidArgumentError(absl_ports::StrCat( - "Infinite loop detected in type configs. '", - property_schema_type, "' references itself.")); - } - parent_schemas.insert(indirect_parent); - } - } - - schema_references.insert({property_schema_type, parent_schemas}); } ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(), @@ -227,15 +309,7 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) { } } - // A Document property claimed to be of a schema_type that we never - // saw/validated - if (!unknown_schema_types.empty()) { - return absl_ports::UnknownError( - absl_ports::StrCat("Undefined 'schema_type's: ", - absl_ports::StrJoin(unknown_schema_types, ","))); - } - - return libtextclassifier3::Status::OK; + return dependency_map; } libtextclassifier3::Status SchemaUtil::ValidateSchemaType( @@ -355,7 +429,8 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs( } const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( - const SchemaProto& old_schema, const SchemaProto& new_schema) { + const SchemaProto& old_schema, const SchemaProto& new_schema, + const DependencyMap& new_schema_dependency_map) { SchemaDelta schema_delta; schema_delta.index_incompatible = false; @@ -385,7 +460,26 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( // be reindexed. int32_t old_required_properties = 0; int32_t old_indexed_properties = 0; + + // If there is a different number of properties, then there must have been a + // change. + bool is_incompatible = false; + bool is_index_incompatible = false; for (const auto& old_property_config : old_type_config.properties()) { + if (old_property_config.cardinality() == + PropertyConfigProto::Cardinality::REQUIRED) { + ++old_required_properties; + } + + // A non-default term_match_type indicates that this property is meant to + // be indexed. + bool is_indexed_property = + old_property_config.string_indexing_config().term_match_type() != + TermMatchType::UNKNOWN; + if (is_indexed_property) { + ++old_indexed_properties; + } + auto new_property_name_and_config = new_parsed_property_configs.property_config_map.find( old_property_config.property_name()); @@ -397,8 +491,8 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( "Previously defined property type '", old_type_config.schema_type(), ".", old_property_config.property_name(), "' was not defined in new schema"); - schema_delta.schema_types_incompatible.insert( - old_type_config.schema_type()); + is_incompatible = true; + is_index_incompatible |= is_indexed_property; continue; } @@ -409,27 +503,18 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( ICING_VLOG(1) << absl_ports::StrCat( "Property '", old_type_config.schema_type(), ".", old_property_config.property_name(), "' is incompatible."); - schema_delta.schema_types_incompatible.insert( - old_type_config.schema_type()); - } - - if (old_property_config.cardinality() == - PropertyConfigProto::Cardinality::REQUIRED) { - ++old_required_properties; - } - - // A non-default term_match_type indicates that this property is meant to - // be indexed. - if (old_property_config.string_indexing_config().term_match_type() != - TermMatchType::UNKNOWN) { - ++old_indexed_properties; + is_incompatible = true; } // Any change in the indexed property requires a reindexing if (!IsTermMatchTypeCompatible( old_property_config.string_indexing_config(), - new_property_config->string_indexing_config())) { - schema_delta.index_incompatible = true; + new_property_config->string_indexing_config()) || + old_property_config.document_indexing_config() + .index_nested_properties() != + new_property_config->document_indexing_config() + .index_nested_properties()) { + is_index_incompatible = true; } } @@ -444,8 +529,7 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( "New schema '", old_type_config.schema_type(), "' has REQUIRED properties that are not " "present in the previously defined schema"); - schema_delta.schema_types_incompatible.insert( - old_type_config.schema_type()); + is_incompatible = true; } // If we've gained any new indexed properties, then the section ids may @@ -457,8 +541,30 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta( "Set of indexed properties in schema type '", old_type_config.schema_type(), "' has changed, required reindexing."); + is_index_incompatible = true; + } + + if (is_incompatible) { + // If this type is incompatible, then every type that depends on it might + // also be incompatible. Use the dependency map to mark those ones as + // incompatible too. + schema_delta.schema_types_incompatible.insert( + old_type_config.schema_type()); + auto parent_types_itr = + new_schema_dependency_map.find(old_type_config.schema_type()); + if (parent_types_itr != new_schema_dependency_map.end()) { + schema_delta.schema_types_incompatible.reserve( + schema_delta.schema_types_incompatible.size() + + parent_types_itr->second.size()); + schema_delta.schema_types_incompatible.insert( + parent_types_itr->second.begin(), parent_types_itr->second.end()); + } + } + + if (is_index_incompatible) { schema_delta.index_incompatible = true; } + } return schema_delta; diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h index 7b989a8..abbc55d 100644 --- a/icing/schema/schema-util.h +++ b/icing/schema/schema-util.h @@ -22,6 +22,7 @@ #include <unordered_set> #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/proto/schema.pb.h" namespace icing { @@ -32,6 +33,13 @@ class SchemaUtil { using TypeConfigMap = std::unordered_map<std::string, const SchemaTypeConfigProto>; + // Maps from a child type to the parent types that depend on it. + // Ex. type A has a single property of type B + // The dependency map will be { { "B", { "A" } } } + using DependencyMap = + std::unordered_map<std::string_view, + std::unordered_set<std::string_view>>; + struct SchemaDelta { // Whether an indexing config has changed, requiring the index to be // regenerated. We don't list out all the types that make the index @@ -90,10 +98,12 @@ class SchemaUtil { // document properties can be opted out of indexing. // // Returns: + // On success, a dependency map from each child types to all parent types + // that depend on it directly or indirectly. // ALREADY_EXISTS for case 1 and 2 // INVALID_ARGUMENT for 3-13 - // OK otherwise - static libtextclassifier3::Status Validate(const SchemaProto& schema); + static libtextclassifier3::StatusOr<DependencyMap> Validate( + const SchemaProto& schema); // Creates a mapping of schema type -> schema type config proto. The // type_config_map is cleared, and then each schema-type_config_proto pair is @@ -142,7 +152,8 @@ class SchemaUtil { // // Returns a SchemaDelta that captures the aforementioned differences. static const SchemaDelta ComputeCompatibilityDelta( - const SchemaProto& old_schema, const SchemaProto& new_schema); + const SchemaProto& old_schema, const SchemaProto& new_schema, + const DependencyMap& new_schema_dependency_map); // Validates the 'property_name' field. // 1. Can't be an empty string diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc index a2fc8d9..049dd79 100644 --- a/icing/schema/schema-util_test.cc +++ b/icing/schema/schema-util_test.cc @@ -17,6 +17,7 @@ #include <cstdint> #include <string> #include <string_view> +#include <unordered_set> #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -34,6 +35,7 @@ using ::testing::HasSubstr; // Properties/fields in a schema type constexpr char kEmailType[] = "EmailMessage"; +constexpr char kMessageType[] = "Text"; constexpr char kPersonType[] = "Person"; constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT = @@ -63,6 +65,367 @@ constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN; constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY; constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX; +TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) { + // Create a schema with the following dependencies: + // C + // / \ + // A - B E - F + // \ / + // D + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("f") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("F", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty(PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + // Provide these in alphabetical (also parent-child) order: A, B, C, D, E, F + SchemaProto schema = SchemaBuilder() + .AddType(type_a) + .AddType(type_b) + .AddType(type_c) + .AddType(type_d) + .AddType(type_e) + .AddType(type_f) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, testing::SizeIs(5)); + EXPECT_THAT(d_map["F"], + testing::UnorderedElementsAre("A", "B", "C", "D", "E")); + EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D")); + EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A")); +} + +TEST(SchemaUtilTest, DependencyGraphReverseAlphabeticalOrder) { + // Create a schema with the following dependencies: + // C + // / \ + // A - B E - F + // \ / + // D + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("f") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("F", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty(PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + // Provide these in reverse alphabetical (also child-parent) order: + // F, E, D, C, B, A + SchemaProto schema = SchemaBuilder() + .AddType(type_f) + .AddType(type_e) + .AddType(type_d) + .AddType(type_c) + .AddType(type_b) + .AddType(type_a) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, testing::SizeIs(5)); + EXPECT_THAT(d_map["F"], + testing::UnorderedElementsAre("A", "B", "C", "D", "E")); + EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D")); + EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A")); +} + +TEST(SchemaUtilTest, DependencyGraphMixedOrder) { + // Create a schema with the following dependencies: + // C + // / \ + // A - B E - F + // \ / + // D + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .AddProperty( + PropertyConfigBuilder() + .SetName("d") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("D", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_d = + SchemaTypeConfigBuilder() + .SetType("D") + .AddProperty( + PropertyConfigBuilder() + .SetName("e") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("E", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_e = + SchemaTypeConfigBuilder() + .SetType("E") + .AddProperty( + PropertyConfigBuilder() + .SetName("f") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("F", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_f = + SchemaTypeConfigBuilder() + .SetType("F") + .AddProperty(PropertyConfigBuilder() + .SetName("text") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)) + .Build(); + + // Provide these in a random order: C, E, F, A, B, D + SchemaProto schema = SchemaBuilder() + .AddType(type_c) + .AddType(type_e) + .AddType(type_f) + .AddType(type_a) + .AddType(type_b) + .AddType(type_d) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(SchemaUtil::DependencyMap d_map, + SchemaUtil::Validate(schema)); + EXPECT_THAT(d_map, testing::SizeIs(5)); + EXPECT_THAT(d_map["F"], + testing::UnorderedElementsAre("A", "B", "C", "D", "E")); + EXPECT_THAT(d_map["E"], testing::UnorderedElementsAre("A", "B", "C", "D")); + EXPECT_THAT(d_map["D"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["C"], testing::UnorderedElementsAre("A", "B")); + EXPECT_THAT(d_map["B"], testing::UnorderedElementsAre("A")); +} + +TEST(SchemaUtilTest, TopLevelCycle) { + // Create a schema with the following dependencies: + // A - B - B - B - B.... + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = SchemaBuilder().AddType(type_a).AddType(type_b).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, + HasSubstr("Infinite loop"))); +} + +TEST(SchemaUtilTest, MultiLevelCycle) { + // Create a schema with the following dependencies: + // A - B - C - A - B - C - A ... + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("a") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("A", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + +TEST(SchemaUtilTest, NonExistentType) { + // Create a schema with the following dependencies: + // A - B - C - X (does not exist) + SchemaTypeConfigProto type_a = + SchemaTypeConfigBuilder() + .SetType("A") + .AddProperty( + PropertyConfigBuilder() + .SetName("b") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("B", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_b = + SchemaTypeConfigBuilder() + .SetType("B") + .AddProperty( + PropertyConfigBuilder() + .SetName("c") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("C", /*index_nested_properties=*/true)) + .Build(); + SchemaTypeConfigProto type_c = + SchemaTypeConfigBuilder() + .SetType("C") + .AddProperty( + PropertyConfigBuilder() + .SetName("x") + .SetCardinality(CARDINALITY_OPTIONAL) + .SetDataTypeDocument("X", /*index_nested_properties=*/true)) + .Build(); + + SchemaProto schema = + SchemaBuilder().AddType(type_a).AddType(type_b).AddType(type_c).Build(); + EXPECT_THAT(SchemaUtil::Validate(schema), + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); +} + TEST(SchemaUtilTest, EmptySchemaProtoIsValid) { SchemaProto schema; ICING_ASSERT_OK(SchemaUtil::Validate(schema)); @@ -309,7 +672,7 @@ TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) { .Build(); ASSERT_THAT(SchemaUtil::Validate(schema), - StatusIs(libtextclassifier3::StatusCode::UNKNOWN, + StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT, HasSubstr("Undefined 'schema_type'"))); } @@ -342,8 +705,9 @@ TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, - new_schema_with_optional), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( + old_schema, new_schema_with_optional, no_dependencies_map), Eq(schema_delta)); } @@ -377,8 +741,9 @@ TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) { SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_incompatible.emplace(kEmailType); - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, - new_schema_with_required), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( + old_schema, new_schema_with_required, no_dependencies_map), Eq(schema_delta)); } @@ -412,7 +777,9 @@ TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) { SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_incompatible.emplace(kEmailType); - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + no_dependencies_map), Eq(schema_delta)); } @@ -442,16 +809,17 @@ TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) { // We can't have a new schema be more restrictive, REPEATED->OPTIONAL SchemaUtil::SchemaDelta incompatible_schema_delta; incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType); + SchemaUtil::DependencyMap no_dependencies_map; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( /*old_schema=*/less_restrictive_schema, - /*new_schema=*/more_restrictive_schema), + /*new_schema=*/more_restrictive_schema, no_dependencies_map), Eq(incompatible_schema_delta)); // We can have the new schema be less restrictive, OPTIONAL->REPEATED; SchemaUtil::SchemaDelta compatible_schema_delta; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( /*old_schema=*/more_restrictive_schema, - /*new_schema=*/less_restrictive_schema), + /*new_schema=*/less_restrictive_schema, no_dependencies_map), Eq(compatible_schema_delta)); } @@ -480,7 +848,9 @@ TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) { SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_incompatible.emplace(kEmailType); - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + no_dependencies_map), Eq(schema_delta)); } @@ -495,6 +865,12 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { .SetDataType(TYPE_INT) .SetCardinality(CARDINALITY_REPEATED))) .AddType(SchemaTypeConfigBuilder() + .SetType(kMessageType) + .AddProperty(PropertyConfigBuilder() + .SetName("prop") + .SetDataType(TYPE_INT) + .SetCardinality(CARDINALITY_REPEATED))) + .AddType(SchemaTypeConfigBuilder() .SetType(kEmailType) .AddProperty(PropertyConfigBuilder() .SetName("Property") @@ -514,19 +890,32 @@ TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) { .SetDataType(TYPE_INT) .SetCardinality(CARDINALITY_REPEATED))) .AddType(SchemaTypeConfigBuilder() + .SetType(kMessageType) + .AddProperty(PropertyConfigBuilder() + .SetName("prop") + .SetDataType(TYPE_INT) + .SetCardinality(CARDINALITY_REPEATED))) + .AddType(SchemaTypeConfigBuilder() .SetType(kEmailType) - .AddProperty( - PropertyConfigBuilder() - .SetName("Property") - .SetDataTypeDocument( - kEmailType, /*index_nested_properties=*/true) - .SetCardinality(CARDINALITY_REPEATED))) + .AddProperty(PropertyConfigBuilder() + .SetName("Property") + .SetDataTypeDocument( + kMessageType, + /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) .Build(); SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_incompatible.emplace(kEmailType); - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), - Eq(schema_delta)); + // kEmailType depends on kMessageType + SchemaUtil::DependencyMap dependencies_map = {{kMessageType, {kEmailType}}}; + SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( + old_schema, new_schema, dependencies_map); + EXPECT_THAT(actual, Eq(schema_delta)); + EXPECT_THAT(actual.index_incompatible, testing::IsFalse()); + EXPECT_THAT(actual.schema_types_incompatible, + testing::ElementsAre(kEmailType)); + EXPECT_THAT(actual.schema_types_deleted, testing::IsEmpty()); } TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) { @@ -558,13 +947,16 @@ TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) { schema_delta.index_incompatible = true; // New schema gained a new indexed property. + SchemaUtil::DependencyMap no_dependencies_map; EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( - schema_with_indexed_property, schema_with_unindexed_property), + schema_with_indexed_property, schema_with_unindexed_property, + no_dependencies_map), Eq(schema_delta)); // New schema lost an indexed property. EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta( - schema_with_indexed_property, schema_with_unindexed_property), + schema_with_indexed_property, schema_with_unindexed_property, + no_dependencies_map), Eq(schema_delta)); } @@ -600,7 +992,9 @@ TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) { SchemaUtil::SchemaDelta schema_delta; schema_delta.index_incompatible = true; - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + no_dependencies_map), Eq(schema_delta)); } @@ -637,7 +1031,9 @@ TEST(SchemaUtilTest, AddingTypeIsCompatible) { .Build(); SchemaUtil::SchemaDelta schema_delta; - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + no_dependencies_map), Eq(schema_delta)); } @@ -676,10 +1072,105 @@ TEST(SchemaUtilTest, DeletingTypeIsNoted) { SchemaUtil::SchemaDelta schema_delta; schema_delta.schema_types_deleted.emplace(kPersonType); - EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema), + SchemaUtil::DependencyMap no_dependencies_map; + EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema, + no_dependencies_map), Eq(schema_delta)); } +TEST(SchemaUtilTest, DeletingPropertyAndChangingProperty) { + SchemaProto old_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType(kEmailType) + .AddProperty(PropertyConfigBuilder() + .SetName("Property1") + .SetDataType(TYPE_STRING) + .SetCardinality(CARDINALITY_OPTIONAL)) + .AddProperty( + PropertyConfigBuilder() + .SetName("Property2") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_REQUIRED))) + .Build(); + + // Remove Property2 and make Property1 indexed now. Removing Property2 should + // be incompatible. + SchemaProto new_schema = + SchemaBuilder() + .AddType(SchemaTypeConfigBuilder() + .SetType(kEmailType) + .AddProperty( + PropertyConfigBuilder() + .SetName("Property1") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL))) + .Build(); + + SchemaUtil::SchemaDelta schema_delta; + schema_delta.schema_types_incompatible.emplace(kEmailType); + schema_delta.index_incompatible = true; + SchemaUtil::DependencyMap no_dependencies_map; + SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( + old_schema, new_schema, no_dependencies_map); + EXPECT_THAT(actual, Eq(schema_delta)); +} + +TEST(SchemaUtilTest, IndexNestedDocumentsIndexIncompatible) { + // Make two schemas. One that sets index_nested_properties to false and one + // that sets it to true. + SchemaTypeConfigProto email_type_config = + SchemaTypeConfigBuilder() + .SetType(kEmailType) + .AddProperty(PropertyConfigBuilder() + .SetName("subject") + .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN) + .SetCardinality(CARDINALITY_OPTIONAL)) + .Build(); + SchemaProto no_nested_index_schema = + SchemaBuilder() + .AddType(email_type_config) + .AddType(SchemaTypeConfigBuilder() + .SetType(kPersonType) + .AddProperty(PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + kEmailType, + /*index_nested_properties=*/false) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + SchemaProto nested_index_schema = + SchemaBuilder() + .AddType(email_type_config) + .AddType(SchemaTypeConfigBuilder() + .SetType(kPersonType) + .AddProperty( + PropertyConfigBuilder() + .SetName("emails") + .SetDataTypeDocument( + kEmailType, /*index_nested_properties=*/true) + .SetCardinality(CARDINALITY_REPEATED))) + .Build(); + + // Going from index_nested_properties=false to index_nested_properties=true + // should make kPersonType index_incompatible. kEmailType should be + // unaffected. + SchemaUtil::SchemaDelta schema_delta; + schema_delta.index_incompatible = true; + SchemaUtil::DependencyMap dependencies_map = {{kEmailType, {kPersonType}}}; + SchemaUtil::SchemaDelta actual = SchemaUtil::ComputeCompatibilityDelta( + no_nested_index_schema, nested_index_schema, dependencies_map); + EXPECT_THAT(actual, Eq(schema_delta)); + + // Going from index_nested_properties=true to index_nested_properties=false + // should also make kPersonType index_incompatible. kEmailType should be + // unaffected. + actual = SchemaUtil::ComputeCompatibilityDelta( + nested_index_schema, no_nested_index_schema, dependencies_map); + EXPECT_THAT(actual, Eq(schema_delta)); +} + TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) { SchemaProto schema = SchemaBuilder() |