aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2020-12-09 04:46:41 +0000
committerTim Barron <tjbarron@google.com>2020-12-09 04:53:04 +0000
commit282a5708af10879b12a09a59ad5bbfa253b1e92a (patch)
treee54fd6ebc322a8983a707b3bc05d52faf48ef22f
parent1ae91ee03a08a4f0391f0f90e2797fb3c2e02960 (diff)
downloadicing-282a5708af10879b12a09a59ad5bbfa253b1e92a.tar.gz
Update Icing from upstream.
Change-Id: I662cd2f97794722eca9cc43b73549babed884be4
-rw-r--r--icing/document-builder.h116
-rw-r--r--icing/icing-search-engine_test.cc238
-rw-r--r--icing/index/hit/doc-hit-info.h2
-rw-r--r--icing/index/hit/doc-hit-info_test.cc14
-rw-r--r--icing/index/hit/hit.cc4
-rw-r--r--icing/index/hit/hit.h11
-rw-r--r--icing/index/hit/hit_test.cc29
-rw-r--r--icing/index/index-processor.cc17
-rw-r--r--icing/index/index-processor_test.cc46
-rw-r--r--icing/index/index.cc31
-rw-r--r--icing/index/index.h8
-rw-r--r--icing/index/index_test.cc362
-rw-r--r--icing/index/lite/lite-index.cc2
-rw-r--r--icing/index/main/index-block_test.cc28
-rw-r--r--icing/index/main/main-index-merger.cc70
-rw-r--r--icing/index/main/main-index-merger_test.cc47
-rw-r--r--icing/index/main/main-index_test.cc42
-rw-r--r--icing/index/main/posting-list-accessor_test.cc10
-rw-r--r--icing/index/main/posting-list-used_test.cc10
-rw-r--r--icing/query/query-processor_benchmark.cc3
-rw-r--r--icing/query/query-processor_test.cc3
-rw-r--r--icing/result/page-result-state.h6
-rw-r--r--icing/result/projection-tree.cc50
-rw-r--r--icing/result/projection-tree.h53
-rw-r--r--icing/result/projection-tree_test.cc102
-rw-r--r--icing/result/result-retriever.cc49
-rw-r--r--icing/result/result-retriever_test.cc845
-rw-r--r--icing/result/result-state-manager.cc15
-rw-r--r--icing/result/result-state.cc6
-rw-r--r--icing/result/result-state.h11
-rw-r--r--icing/schema/schema-util.cc63
-rw-r--r--icing/schema/schema-util.h9
-rw-r--r--icing/schema/schema-util_test.cc177
-rw-r--r--icing/schema/section-manager.cc63
-rw-r--r--icing/schema/section-manager_test.cc61
-rw-r--r--icing/scoring/scorer.cc3
-rw-r--r--icing/testing/common-matchers.h38
-rw-r--r--icing/testing/hit-test-utils.cc4
-rw-r--r--icing/testing/icu-i18n-test-utils.cc2
-rw-r--r--icing/util/document-validator.cc3
-rw-r--r--icing/util/document-validator.h2
-rw-r--r--icing/util/document-validator_test.cc12
-rw-r--r--icing/util/i18n-utils.cc2
-rw-r--r--proto/icing/proto/document.proto9
-rw-r--r--proto/icing/proto/scoring.proto7
-rw-r--r--proto/icing/proto/search.proto18
46 files changed, 2085 insertions, 618 deletions
diff --git a/icing/document-builder.h b/icing/document-builder.h
index 4c95b89..ba68ec5 100644
--- a/icing/document-builder.h
+++ b/icing/document-builder.h
@@ -71,11 +71,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& ClearCustomProperties() {
- document_.clear_custom_properties();
- return *this;
- }
-
// Takes a property name and any number of string values.
template <typename... V>
DocumentBuilder& AddStringProperty(std::string property_name,
@@ -83,14 +78,6 @@ class DocumentBuilder {
return AddStringProperty(std::move(property_name), {string_values...});
}
- // Takes a custom property name and any number of string values.
- template <typename... V>
- DocumentBuilder& AddCustomStringProperty(std::string property_name,
- V... string_values) {
- return AddCustomStringProperty(std::move(property_name),
- {string_values...});
- }
-
// Takes a property name and any number of int64_t values.
template <typename... V>
DocumentBuilder& AddInt64Property(std::string property_name,
@@ -98,13 +85,6 @@ class DocumentBuilder {
return AddInt64Property(std::move(property_name), {int64_values...});
}
- // Takes a custom property name and any number of int64_t values.
- template <typename... V>
- DocumentBuilder& AddCustomInt64Property(std::string property_name,
- V... int64_values) {
- return AddCustomInt64Property(std::move(property_name), {int64_values...});
- }
-
// Takes a property name and any number of double values.
template <typename... V>
DocumentBuilder& AddDoubleProperty(std::string property_name,
@@ -112,14 +92,6 @@ class DocumentBuilder {
return AddDoubleProperty(std::move(property_name), {double_values...});
}
- // Takes a custom property name and any number of double values.
- template <typename... V>
- DocumentBuilder& AddCustomDoubleProperty(std::string property_name,
- V... double_values) {
- return AddCustomDoubleProperty(std::move(property_name),
- {double_values...});
- }
-
// Takes a property name and any number of boolean values.
template <typename... V>
DocumentBuilder& AddBooleanProperty(std::string property_name,
@@ -127,28 +99,12 @@ class DocumentBuilder {
return AddBooleanProperty(std::move(property_name), {boolean_values...});
}
- // Takes a custom property name and any number of boolean values.
- template <typename... V>
- DocumentBuilder& AddCustomBooleanProperty(std::string property_name,
- V... boolean_values) {
- return AddCustomBooleanProperty(std::move(property_name),
- {boolean_values...});
- }
-
// Takes a property name and any number of bytes values.
template <typename... V>
DocumentBuilder& AddBytesProperty(std::string property_name,
V... bytes_values) {
return AddBytesProperty(std::move(property_name), {bytes_values...});
}
-
- // Takes a custom property name and any number of bytes values.
- template <typename... V>
- DocumentBuilder& AddCustomBytesProperty(std::string property_name,
- V... bytes_values) {
- return AddCustomBytesProperty(std::move(property_name), {bytes_values...});
- }
-
// Takes a property name and any number of document values.
template <typename... V>
DocumentBuilder& AddDocumentProperty(std::string property_name,
@@ -156,14 +112,6 @@ class DocumentBuilder {
return AddDocumentProperty(std::move(property_name), {document_values...});
}
- // Takes a custom property name and any number of document values.
- template <typename... V>
- DocumentBuilder& AddCustomDocumentProperty(std::string property_name,
- V&&... document_values) {
- return AddCustomDocumentProperty(std::move(property_name),
- {document_values...});
- }
-
DocumentProto Build() const { return document_; }
private:
@@ -180,17 +128,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddCustomStringProperty(
- std::string property_name,
- std::initializer_list<std::string_view> string_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (std::string_view string_value : string_values) {
- custom_property->mutable_string_values()->Add(std::string(string_value));
- }
- return *this;
- }
-
DocumentBuilder& AddInt64Property(
std::string property_name, std::initializer_list<int64_t> int64_values) {
auto property = document_.add_properties();
@@ -201,16 +138,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddCustomInt64Property(
- std::string property_name, std::initializer_list<int64_t> int64_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (int64_t int64_value : int64_values) {
- custom_property->mutable_int64_values()->Add(int64_value);
- }
- return *this;
- }
-
DocumentBuilder& AddDoubleProperty(
std::string property_name, std::initializer_list<double> double_values) {
auto property = document_.add_properties();
@@ -221,16 +148,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddCustomDoubleProperty(
- std::string property_name, std::initializer_list<double> double_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (double double_value : double_values) {
- custom_property->mutable_double_values()->Add(double_value);
- }
- return *this;
- }
-
DocumentBuilder& AddBooleanProperty(
std::string property_name, std::initializer_list<bool> boolean_values) {
auto property = document_.add_properties();
@@ -241,16 +158,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddCustomBooleanProperty(
- std::string property_name, std::initializer_list<bool> boolean_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (bool boolean_value : boolean_values) {
- custom_property->mutable_boolean_values()->Add(boolean_value);
- }
- return *this;
- }
-
DocumentBuilder& AddBytesProperty(
std::string property_name,
std::initializer_list<std::string> bytes_values) {
@@ -262,17 +169,6 @@ class DocumentBuilder {
return *this;
}
- DocumentBuilder& AddCustomBytesProperty(
- std::string property_name,
- std::initializer_list<std::string> bytes_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (const std::string& bytes_value : bytes_values) {
- custom_property->mutable_bytes_values()->Add(std::string(bytes_value));
- }
- return *this;
- }
-
DocumentBuilder& AddDocumentProperty(
std::string property_name,
std::initializer_list<DocumentProto> document_values) {
@@ -283,18 +179,6 @@ class DocumentBuilder {
}
return *this;
}
-
- DocumentBuilder& AddCustomDocumentProperty(
- std::string property_name,
- std::initializer_list<DocumentProto> document_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (DocumentProto document_value : document_values) {
- custom_property->mutable_document_values()->Add(
- std::move(document_value));
- }
- return *this;
- }
};
} // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index ab05646..8d69d78 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -194,6 +194,56 @@ SchemaProto CreateEmailSchema() {
TermMatchType::PREFIX);
subj->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
+ return schema;
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ SchemaProto schema;
+
+ auto* person_type = schema.add_types();
+ person_type->set_schema_type("Person");
+ auto* name = person_type->add_properties();
+ name->set_property_name("name");
+ name->set_data_type(PropertyConfigProto::DataType::STRING);
+ name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ name->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ name->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* address = person_type->add_properties();
+ address->set_property_name("emailAddress");
+ address->set_data_type(PropertyConfigProto::DataType::STRING);
+ address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ address->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ address->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ auto* type = schema.add_types();
+ type->set_schema_type("Email");
+
+ auto* body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* subj = type->add_properties();
+ subj->set_property_name("subject");
+ subj->set_data_type(PropertyConfigProto::DataType::STRING);
+ subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ subj->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ subj->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* sender = type->add_properties();
+ sender->set_property_name("sender");
+ sender->set_schema_type("Person");
+ sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ sender->mutable_document_indexing_config()->set_index_nested_properties(true);
return schema;
}
@@ -5243,6 +5293,194 @@ TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
Eq(10));
}
+TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and use an empty field
+ // mask to request NO properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ ResultSpecProto::TypePropertyMask* email_field_mask =
+ result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results contain no properties.
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and request only
+ // 'sender.name' and 'subject' properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ ResultSpecProto::TypePropertyMask* email_field_mask =
+ result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("sender.name");
+ email_field_mask->add_paths("subject");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results only contain the 'sender.name'
+ // property.
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
index 386822d..32ba97e 100644
--- a/icing/index/hit/doc-hit-info.h
+++ b/icing/index/hit/doc-hit-info.h
@@ -36,7 +36,7 @@ class DocHitInfo {
SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
: document_id_(document_id_in),
hit_section_ids_mask_(hit_section_ids_mask) {
- memset(max_hit_score_, Hit::kMaxHitScore, sizeof(max_hit_score_));
+ memset(max_hit_score_, Hit::kDefaultHitScore, sizeof(max_hit_score_));
}
DocumentId document_id() const { return document_id_; }
diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
index d8adbc1..1e1880f 100644
--- a/icing/index/hit/doc-hit-info_test.cc
+++ b/icing/index/hit/doc-hit-info_test.cc
@@ -34,17 +34,17 @@ constexpr DocumentId kSomeOtherDocumentId = 54;
TEST(DocHitInfoTest, InitialMaxHitScores) {
DocHitInfo info(kSomeDocumentId);
for (SectionId i = 0; i <= kMaxSectionId; ++i) {
- EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kMaxHitScore));
+ EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kDefaultHitScore));
}
}
TEST(DocHitInfoTest, UpdateHitScores) {
DocHitInfo info(kSomeDocumentId);
- ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+ ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kDefaultHitScore));
// Updating a section for the first time, should change its max hit score,
// even though the hit score (16) may be lower than the current value returned
- // by info.max_hit_score(3) (kMaxHitScore)
+ // by info.max_hit_score(3) (kDefaultHitScore)
info.UpdateSection(3, 16);
EXPECT_THAT(info.max_hit_score(3), Eq(16));
@@ -58,6 +58,12 @@ TEST(DocHitInfoTest, UpdateHitScores) {
info.UpdateSection(3, 17);
EXPECT_THAT(info.max_hit_score(3), Eq(17));
+ // Updating a section with kDefaultHitScore should *never* set the
+ // max_hit_score to kDefaultHitScore (unless it already was kDefaultHitScore)
+ // because kDefaultHitScore is the lowest possible valid hit score.
+ info.UpdateSection(3, Hit::kDefaultHitScore);
+ EXPECT_THAT(info.max_hit_score(3), Eq(17));
+
// Updating a section with kMaxHitScore should *always* set the max hit
// score to kMaxHitScore (regardless of what value kMaxHitScore is
// defined with).
@@ -150,7 +156,7 @@ TEST(DocHitInfoTest, Comparison) {
DocHitInfo high_section_id_info(kDocumentId);
high_section_id_info.UpdateSection(1, 12);
- high_section_id_info.UpdateSection(6, Hit::kMaxHitScore);
+ high_section_id_info.UpdateSection(6, Hit::kDefaultHitScore);
std::vector<DocHitInfo> infos{info, high_document_id_info,
high_section_id_info};
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 1852bd5..d089dd5 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -30,7 +30,7 @@ enum FlagOffset {
// This hit represents a prefix of a longer term. If exact matches are
// required, then this hit should be ignored.
kPrefixHit = 1,
- // Whether or not the hit has a hit score other than kMaxHitScore.
+ // Whether or not the hit has a hit score other than kDefaultHitScore.
kHasScore = 2,
kNumFlags = 3,
};
@@ -64,7 +64,7 @@ Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
kSectionIdBits + kNumFlags, kDocumentIdBits,
&temp_value);
bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
- bit_util::BitfieldSet(score != kMaxHitScore, kHasScore, 1, &temp_value);
+ bit_util::BitfieldSet(score != kDefaultHitScore, kHasScore, 1, &temp_value);
bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
value_ = temp_value;
diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index d1be204..53553f0 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h
@@ -55,11 +55,15 @@ class Hit {
// A score reflecting the "quality" of this hit. The higher the score, the
// higher quality the hit.
+ // The score is being repurposed for term frequency.
+ // TODO(b/173156700): refactor Score to TermFrequency.
using Score = uint8_t;
- // By default, hits are given the highest possible score.
+ // Max Score is 255.
static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max();
+ // Default value of term frequency is 1.
+ static constexpr Score kDefaultHitScore = 1;
- explicit Hit(Value value = kInvalidValue, Score score = kMaxHitScore)
+ explicit Hit(Value value = kInvalidValue, Score score = kDefaultHitScore)
: value_(value), score_(score) {}
Hit(SectionId section_id, DocumentId document_id, Score score,
bool is_in_prefix_section = false, bool is_prefix_hit = false);
@@ -68,8 +72,7 @@ class Hit {
Value value() const { return value_; }
DocumentId document_id() const;
SectionId section_id() const;
- // Whether or not the hit contains a non-default score. Hits with non-default
- // score are considered to be of lower quality.
+ // Whether or not the hit contains a non-default score.
bool has_score() const;
Score score() const { return score_; }
bool is_prefix_hit() const;
diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
index 17db66b..8c883d1 100644
--- a/icing/index/hit/hit_test.cc
+++ b/icing/index/hit/hit_test.cc
@@ -36,9 +36,9 @@ static constexpr SectionId kSomeSectionid = 5;
static constexpr Hit::Score kSomeHitScore = 57;
TEST(HitTest, HasScoreFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
EXPECT_THAT(h1.has_score(), IsFalse());
- EXPECT_THAT(h1.score(), Eq(Hit::kMaxHitScore));
+ EXPECT_THAT(h1.score(), Eq(Hit::kDefaultHitScore));
Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore);
EXPECT_THAT(h2.has_score(), IsTrue());
@@ -46,33 +46,33 @@ TEST(HitTest, HasScoreFlag) {
}
TEST(HitTest, IsPrefixHitFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
}
TEST(HitTest, IsInPrefixSectionFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
}
TEST(HitTest, Accessors) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
}
@@ -102,18 +102,19 @@ TEST(HitTest, Valid) {
}
TEST(HitTest, Comparison) {
- Hit hit(1, 243, Hit::kMaxHitScore);
+ Hit hit(1, 243, Hit::kDefaultHitScore);
// DocumentIds are sorted in ascending order. So a hit with a lower
// document_id should be considered greater than one with a higher
// document_id.
- Hit higher_document_id_hit(1, 2409, Hit::kMaxHitScore);
- Hit higher_section_id_hit(15, 243, Hit::kMaxHitScore);
+ Hit higher_document_id_hit(1, 2409, Hit::kDefaultHitScore);
+ Hit higher_section_id_hit(15, 243, Hit::kDefaultHitScore);
// Whether or not a hit score was set is considered, but the score itself is
// not.
Hit hitscore_hit(1, 243, 12);
- Hit prefix_hit(1, 243, Hit::kMaxHitScore, /*is_in_prefix_section=*/false,
+ Hit prefix_hit(1, 243, Hit::kDefaultHitScore,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
- Hit hit_in_prefix_section(1, 243, Hit::kMaxHitScore,
+ Hit hit_in_prefix_section(1, 243, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/false);
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 9df9b87..892263b 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -85,6 +85,8 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
tokenizer->Tokenize(subcontent));
while (itr->Advance()) {
if (++num_tokens > options_.max_tokens_per_document) {
+ // Index all tokens buffered so far.
+ editor.IndexAllBufferedTerms();
if (put_document_stats != nullptr) {
put_document_stats->mutable_tokenization_stats()
->set_exceeded_max_token_num(true);
@@ -96,16 +98,16 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
return absl_ports::ResourceExhaustedError(
"Max number of tokens reached!");
case Options::TokenLimitBehavior::kSuppressError:
- return libtextclassifier3::Status::OK;
+ return overall_status;
}
}
std::string term = normalizer_.NormalizeTerm(itr->GetToken().text);
- // Add this term to the index. Even if adding this hit fails, we keep
+ // Add this term to Hit buffer. Even if adding this hit fails, we keep
// trying to add more hits because it's possible that future hits could
// still be added successfully. For instance if the lexicon is full, we
// might fail to add a hit for a new term, but should still be able to
// add hits for terms that are already in the index.
- auto status = editor.AddHit(term.c_str());
+ auto status = editor.BufferTerm(term.c_str());
if (overall_status.ok() && !status.ok()) {
// If we've succeeded to add everything so far, set overall_status to
// represent this new failure. If we've already failed, no need to
@@ -115,6 +117,15 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
}
}
}
+ // Add all the seen terms to the index with their term frequency.
+ auto status = editor.IndexAllBufferedTerms();
+ if (overall_status.ok() && !status.ok()) {
+ // If we've succeeded so far, set overall_status to
+ // represent this new failure. If we've already failed, no need to
+ // update the status - we're already going to return a resource
+ // exhausted error.
+ overall_status = status;
+ }
}
if (put_document_stats != nullptr) {
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index ff11e60..3168dad 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -292,9 +293,10 @@ TEST_F(IndexProcessorTest, OneDoc) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("hello", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
- EXPECT_THAT(GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(
- kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+ std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::Score> expectedMap{{kExactSectionId, 1}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId0, expectedMap)));
ICING_ASSERT_OK_AND_ASSIGN(
itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
@@ -313,12 +315,18 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+ std::string coffeeRepeatedString = "coffee";
+ for (int i = 0; i < Hit::kMaxHitScore + 1; i++) {
+ coffeeRepeatedString += " coffee";
+ }
+
document =
DocumentBuilder()
.SetKey("icing", "fake_type/2")
.SetSchema(std::string(kFakeType))
- .AddStringProperty(std::string(kExactProperty), "pitbull")
- .AddStringProperty(std::string(kPrefixedProperty), "mr. world wide")
+ .AddStringProperty(std::string(kExactProperty), coffeeRepeatedString)
+ .AddStringProperty(std::string(kPrefixedProperty),
+ "mr. world world wide")
.Build();
EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -326,19 +334,31 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("world", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
+ std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::Score> expectedMap1{
+ {kPrefixedSectionId, 2}};
+ std::unordered_map<SectionId, Hit::Score> expectedMap2{{kExactSectionId, 1}};
EXPECT_THAT(
- GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(kDocumentId1,
- std::vector<SectionId>{kPrefixedSectionId}),
- EqualsDocHitInfo(kDocumentId0,
- std::vector<SectionId>{kExactSectionId})));
+ hits, ElementsAre(
+ EqualsDocHitInfoWithTermFrequency(kDocumentId1, expectedMap1),
+ EqualsDocHitInfoWithTermFrequency(kDocumentId0, expectedMap2)));
ICING_ASSERT_OK_AND_ASSIGN(
itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
TermMatchType::EXACT_ONLY));
- EXPECT_THAT(GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
+ hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::Score> expectedMap{
+ {kPrefixedSectionId, 2}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId1, expectedMap)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ index_->GetIterator("coffee", kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ hits = GetHits(std::move(itr));
+ expectedMap = {{kExactSectionId, Hit::kMaxHitScore}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId1, expectedMap)));
}
TEST_F(IndexProcessorTest, DocWithNestedProperty) {
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 1b808f8..f0c8bbd 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -277,8 +277,7 @@ Index::FindTermsByPrefix(const std::string& prefix,
std::move(main_term_metadata_list), num_to_return);
}
-libtextclassifier3::Status Index::Editor::AddHit(const char* term,
- Hit::Score score) {
+libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
// Step 1: See if this term is already in the lexicon
uint32_t tvi;
auto tvi_or = lite_index_->GetTermId(term);
@@ -287,8 +286,10 @@ libtextclassifier3::Status Index::Editor::AddHit(const char* term,
if (tvi_or.ok()) {
tvi = tvi_or.ValueOrDie();
if (seen_tokens_.find(tvi) != seen_tokens_.end()) {
- ICING_VLOG(1) << "A hit for term " << term
- << " has already been added. Skipping.";
+ ICING_VLOG(1) << "Updating term frequency for term " << term;
+ if (seen_tokens_[tvi] != Hit::kMaxHitScore) {
+ ++seen_tokens_[tvi];
+ }
return libtextclassifier3::Status::OK;
}
ICING_VLOG(1) << "Term " << term
@@ -302,14 +303,20 @@ libtextclassifier3::Status Index::Editor::AddHit(const char* term,
ICING_ASSIGN_OR_RETURN(
tvi, lite_index_->InsertTerm(term, term_match_type_, namespace_id_));
}
- seen_tokens_.insert(tvi);
-
- // Step 3: Add the hit itself
- Hit hit(section_id_, document_id_, score,
- term_match_type_ == TermMatchType::PREFIX);
- ICING_ASSIGN_OR_RETURN(uint32_t term_id,
- term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- return lite_index_->AddHit(term_id, hit);
+ // Token seen for the first time in the current document.
+ seen_tokens_[tvi] = 1;
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() {
+ for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) {
+ Hit hit(section_id_, document_id_, /*score=*/itr->second,
+ term_match_type_ == TermMatchType::PREFIX);
+ ICING_ASSIGN_OR_RETURN(
+ uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE));
+ ICING_RETURN_IF_ERROR(lite_index_->AddHit(term_id, hit));
+ }
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/index/index.h b/icing/index/index.h
index 1305b2c..32f2b17 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -197,14 +197,16 @@ class Index {
namespace_id_(namespace_id),
section_id_(section_id) {}
- libtextclassifier3::Status AddHit(const char* term,
- Hit::Score score = Hit::kMaxHitScore);
+ // Buffer the term in seen_tokens_.
+ libtextclassifier3::Status BufferTerm(const char* term);
+ // Index all the terms stored in seen_tokens_.
+ libtextclassifier3::Status IndexAllBufferedTerms();
private:
// The Editor is able to store previously seen terms as TermIds. This is
// is more efficient than a client doing this externally because TermIds are
// not exposed to clients.
- std::unordered_set<uint32_t> seen_tokens_;
+ std::unordered_map<uint32_t, Hit::Score> seen_tokens_;
const TermIdCodec* term_id_codec_;
LiteIndex* lite_index_;
DocumentId document_id_;
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 1d12274..3479ab1 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -177,7 +177,8 @@ TEST_F(IndexTest, EmptyIndexAfterMerge) {
TEST_F(IndexTest, AdvancePastEnd) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -200,7 +201,8 @@ TEST_F(IndexTest, AdvancePastEnd) {
TEST_F(IndexTest, AdvancePastEndAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -225,7 +227,8 @@ TEST_F(IndexTest, AdvancePastEndAfterMerge) {
TEST_F(IndexTest, SingleHitSingleTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -240,7 +243,8 @@ TEST_F(IndexTest, SingleHitSingleTermIndex) {
TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -257,8 +261,9 @@ TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
TEST_F(IndexTest, SingleHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -273,8 +278,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndex) {
TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -291,8 +297,9 @@ TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
TEST_F(IndexTest, NoHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -305,8 +312,9 @@ TEST_F(IndexTest, NoHitMultiTermIndex) {
TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -321,15 +329,18 @@ TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
TEST_F(IndexTest, MultiHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -345,15 +356,18 @@ TEST_F(IndexTest, MultiHitMultiTermIndex) {
TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -371,11 +385,13 @@ TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
TEST_F(IndexTest, MultiHitSectionRestrict) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -391,11 +407,13 @@ TEST_F(IndexTest, MultiHitSectionRestrict) {
TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -415,12 +433,13 @@ TEST_F(IndexTest, SingleHitDedupeIndex) {
EXPECT_THAT(size, Eq(0));
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
EXPECT_THAT(size, Gt(0));
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
EXPECT_THAT(new_size, Eq(size));
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -435,7 +454,8 @@ TEST_F(IndexTest, SingleHitDedupeIndex) {
TEST_F(IndexTest, PrefixHit) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -450,7 +470,8 @@ TEST_F(IndexTest, PrefixHit) {
TEST_F(IndexTest, PrefixHitAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -467,11 +488,13 @@ TEST_F(IndexTest, PrefixHitAfterMerge) {
TEST_F(IndexTest, MultiPrefixHit) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -488,11 +511,13 @@ TEST_F(IndexTest, MultiPrefixHit) {
TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -511,11 +536,13 @@ TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
TEST_F(IndexTest, NoExactHitInPrefixQuery) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -529,11 +556,13 @@ TEST_F(IndexTest, NoExactHitInPrefixQuery) {
TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -549,8 +578,9 @@ TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
TEST_F(IndexTest, PrefixHitDedupe) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -564,8 +594,9 @@ TEST_F(IndexTest, PrefixHitDedupe) {
TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -621,8 +652,9 @@ TEST_F(IndexTest, ExactToString) {
TEST_F(IndexTest, NonAsciiTerms) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
- ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -642,8 +674,9 @@ TEST_F(IndexTest, NonAsciiTerms) {
TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
- ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -684,7 +717,11 @@ TEST_F(IndexTest, FullIndex) {
index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
size_t idx = uniform(random);
- status = edit.AddHit(query_terms.at(idx).c_str());
+ status = edit.BufferTerm(query_terms.at(idx).c_str());
+ if (!status.ok()) {
+ break;
+ }
+ status = edit.IndexAllBufferedTerms();
if (!status.ok()) {
break;
}
@@ -696,11 +733,10 @@ TEST_F(IndexTest, FullIndex) {
Index::Editor edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("bar"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("baz"),
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
for (int i = 0; i < query_terms.size(); i += 25) {
@@ -737,7 +773,11 @@ TEST_F(IndexTest, FullIndexMerge) {
index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
size_t idx = uniform(random);
- status = edit.AddHit(query_terms.at(idx).c_str());
+ status = edit.BufferTerm(query_terms.at(idx).c_str());
+ if (!status.ok()) {
+ break;
+ }
+ status = edit.IndexAllBufferedTerms();
if (!status.ok()) {
break;
}
@@ -751,11 +791,10 @@ TEST_F(IndexTest, FullIndexMerge) {
Index::Editor edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("bar"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("baz"),
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
@@ -763,9 +802,10 @@ TEST_F(IndexTest, FullIndexMerge) {
ICING_ASSERT_OK(index_->Merge());
edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY, 0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
- EXPECT_THAT(edit.AddHit("baz"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -790,8 +830,9 @@ TEST_F(IndexTest, IndexCreateCorruptionFailure) {
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Close the index.
index_.reset();
@@ -820,8 +861,9 @@ TEST_F(IndexTest, IndexPersistence) {
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->PersistToDisk(), IsOk());
// Close the index.
@@ -847,8 +889,9 @@ TEST_F(IndexTest, IndexPersistenceAfterMerge) {
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
EXPECT_THAT(index_->PersistToDisk(), IsOk());
@@ -881,7 +924,8 @@ TEST_F(IndexTest, InvalidHitBufferSize) {
TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
/*num_to_return=*/0),
@@ -903,8 +947,9 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// "b" should only match "bar" but not "foo".
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
@@ -923,9 +968,10 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fo"), IsOk());
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// We have 3 results but only 2 should be returned.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -944,13 +990,15 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInOneNamespace) {
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
- EXPECT_THAT(edit1.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
// namespace with id 0 has 2 results.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -982,17 +1030,20 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInMultipleNamespaces) {
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
Index::Editor edit3 =
index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/2);
- EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
// Should return "foo" and "fool" which are in namespaces with ids 1 and 2.
EXPECT_THAT(
@@ -1015,17 +1066,20 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
Index::Editor edit3 =
index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/2);
- EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
// Should return "fo", "foo" and "fool" across all namespaces.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
@@ -1049,13 +1103,15 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("foo"), IsOk());
- EXPECT_THAT(edit1.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit, 'fool' has 2 hits.
EXPECT_THAT(
@@ -1079,30 +1135,38 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnApproximateHitCountForMain) {
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit, 'fool' has 8 hits.
EXPECT_THAT(
@@ -1125,14 +1189,16 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit in the main index, 'fool' has 1 hit in the main index and
// 1 hit in the lite index.
@@ -1147,13 +1213,15 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -1171,7 +1239,8 @@ TEST_F(IndexTest, GetElementsSize) {
// Add an element.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
EXPECT_THAT(size, Gt(0));
@@ -1183,19 +1252,23 @@ TEST_F(IndexTest, GetElementsSize) {
TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -1212,19 +1285,23 @@ TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -1244,19 +1321,23 @@ TEST_F(IndexTest, GetDebugInfo) {
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
std::string out0;
index_->GetDebugInfo(/*verbosity=*/0, &out0);
@@ -1269,7 +1350,8 @@ TEST_F(IndexTest, GetDebugInfo) {
// Add one more doc to the lite index. Debug strings should change.
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
std::string out2;
index_->GetDebugInfo(/*verbosity=*/0, &out2);
@@ -1298,13 +1380,16 @@ TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId0, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should have posting lists for
// "fool" {(doc0,sec3)},
@@ -1315,7 +1400,8 @@ TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
// Add one more doc to the lite index.
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should add a posting list for "far" and a
// backfill branch point for "f". In addition to the posting lists described
@@ -1343,11 +1429,13 @@ TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should have posting lists for
// "fool" {(doc0,sec2)},
// "foot" {(doc1,sec3)},
@@ -1356,14 +1444,17 @@ TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Add one more doc to the lite index. Debug strings should change.
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should add posting lists for "far" and "footer"
// and a backfill branch point for "f". The new posting lists should be
@@ -1400,7 +1491,8 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
// Add one document to the lite index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should have no effect.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1420,7 +1512,8 @@ TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should still have no effect even if both indices have
// hits.
@@ -1447,7 +1540,8 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
// Add one document to the lite index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
// Clipping to invalid should have no effect.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1467,7 +1561,8 @@ TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should still have no effect even if both indices have
// hits.
@@ -1487,14 +1582,16 @@ TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
// Add one document to the lite index and merge it into main.
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
// Add another document to the lite index.
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
@@ -1513,17 +1610,20 @@ TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
// Add two documents to the lite index and merge them into main.
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foul"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
// Add another document to the lite index.
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 89240ee..ea4bcaf 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -448,7 +448,7 @@ uint32_t LiteIndex::Seek(uint32_t term_id) {
// Binary search for our term_id. Make sure we get the first
// element. Using kBeginSortValue ensures this for the hit value.
TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore));
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultHitScore));
const TermIdHitPair::Value* array =
hit_buffer_.array_cast<TermIdHitPair::Value>();
diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc
index 08ba57d..493055f 100644
--- a/icing/index/main/index-block_test.cc
+++ b/icing/index/main/index-block_test.cc
@@ -105,11 +105,11 @@ TEST(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> test_hits{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
};
PostingListIndex allocated_index;
{
@@ -152,18 +152,18 @@ TEST(IndexBlockTest, IndexBlockMultiplePostingLists) {
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
};
std::vector<Hit> hits_in_posting_list2{
Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore),
Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore),
};
PostingListIndex allocated_index_1;
PostingListIndex allocated_index_2;
@@ -242,11 +242,11 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Add hits to the first posting list.
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
block.AllocatePostingList());
@@ -262,10 +262,10 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Add hits to the second posting list.
std::vector<Hit> hits_in_posting_list2{
Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore),
Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
block.AllocatePostingList());
@@ -289,7 +289,7 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
std::vector<Hit> hits_in_posting_list3{
Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/1, Hit::kMaxHitScore),
+ Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultHitScore),
Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
index 8142b79..500774d 100644
--- a/icing/index/main/main-index-merger.cc
+++ b/icing/index/main/main-index-merger.cc
@@ -33,8 +33,8 @@ namespace {
class HitSelector {
public:
- // Returns whether or not term_id_hit_pair has the same term_id, document_id and section_id
- // as the previously selected hits.
+ // Returns whether or not term_id_hit_pair has the same term_id, document_id
+ // and section_id as the previously selected hits.
bool IsEquivalentHit(const TermIdHitPair& term_id_hit_pair) {
return prev_.term_id() == term_id_hit_pair.term_id() &&
prev_.hit().document_id() == term_id_hit_pair.hit().document_id() &&
@@ -56,20 +56,24 @@ class HitSelector {
// This function may add between 0-2 hits depending on whether the HitSelector
// holds both a valid exact hit and a valid prefix hit, one of those or none.
size_t InsertSelectedHits(size_t pos, std::vector<TermIdHitPair>* hits) {
- // Given highest scoring prefix/exact hits for a given
- // term+docid+sectionid, push needed hits into hits array at offset
- // pos. Return new pos.
+ // Given the prefix/exact hits for a given term+docid+sectionid, push needed
+ // hits into hits array at offset pos. Return new pos.
if (best_prefix_hit_.hit().is_valid() && best_exact_hit_.hit().is_valid()) {
- // Output both if scores are unequal. Otherwise only exact hit is
- // sufficient because 1) they have the same scores and 2) any prefix query
- // will also accept an exact hit.
(*hits)[pos++] = best_exact_hit_;
- if (best_prefix_hit_.hit().score() != best_exact_hit_.hit().score()) {
- (*hits)[pos++] = best_prefix_hit_;
- // Ensure sorted.
- if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
- std::swap((*hits)[pos - 1], (*hits)[pos - 2]);
- }
+ const Hit& prefix_hit = best_prefix_hit_.hit();
+ // The prefix hit has score equal to the sum of the scores, capped at
+ // kMaxHitScore.
+ Hit::Score final_score =
+ std::min(static_cast<int>(Hit::kMaxHitScore),
+ prefix_hit.score() + best_exact_hit_.hit().score());
+ best_prefix_hit_ = TermIdHitPair(
+ best_prefix_hit_.term_id(),
+ Hit(prefix_hit.section_id(), prefix_hit.document_id(), final_score,
+ prefix_hit.is_in_prefix_section(), prefix_hit.is_prefix_hit()));
+ (*hits)[pos++] = best_prefix_hit_;
+ // Ensure sorted.
+ if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
+ std::swap((*hits)[pos - 1], (*hits)[pos - 2]);
}
} else if (best_prefix_hit_.hit().is_valid()) {
(*hits)[pos++] = best_prefix_hit_;
@@ -88,16 +92,38 @@ class HitSelector {
private:
void SelectPrefixHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
- if (!best_prefix_hit_.hit().is_valid() ||
- best_prefix_hit_.hit().score() < term_id_hit_pair.hit().score()) {
+ if (!best_prefix_hit_.hit().is_valid()) {
best_prefix_hit_ = term_id_hit_pair;
+ } else {
+ const Hit& hit = term_id_hit_pair.hit();
+ // Create a new prefix hit with term_frequency as the sum of the term
+ // frequencies. The term frequency is capped at kMaxHitScore.
+ Hit::Score final_score =
+ std::min(static_cast<int>(Hit::kMaxHitScore),
+ hit.score() + best_prefix_hit_.hit().score());
+ best_prefix_hit_ =
+ TermIdHitPair(term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_score,
+ best_prefix_hit_.hit().is_in_prefix_section(),
+ best_prefix_hit_.hit().is_prefix_hit()));
}
}
void SelectExactHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
- if (!best_exact_hit_.hit().is_valid() ||
- best_exact_hit_.hit().score() < term_id_hit_pair.hit().score()) {
+ if (!best_exact_hit_.hit().is_valid()) {
best_exact_hit_ = term_id_hit_pair;
+ } else {
+ const Hit& hit = term_id_hit_pair.hit();
+ // Create a new exact hit with term_frequency as the sum of the term
+ // frequencies. The term frequency is capped at kMaxHitScore.
+ Hit::Score final_score =
+ std::min(static_cast<int>(Hit::kMaxHitScore),
+ hit.score() + best_exact_hit_.hit().score());
+ best_exact_hit_ =
+ TermIdHitPair(term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_score,
+ best_exact_hit_.hit().is_in_prefix_section(),
+ best_exact_hit_.hit().is_prefix_hit()));
}
}
@@ -166,10 +192,10 @@ class HitComparator {
// {"foot", docid0, sectionid0}
// {"fool", docid0, sectionid0}
//
-// When duplicates are encountered, we prefer the hit with the highest hit
-// score. If there is both an exact and prefix hit for the same term, we prefer
-// the exact hit, unless they have different scores, in which case we keep both
-// them.
+// When two or more prefix hits are duplicates, merge into one hit with score as
+// the sum of the scores. If there is both an exact and prefix hit for the same
+// term, keep the exact hit as it is, update the prefix hit so that its score is
+// the sum of the scores.
void DedupeHits(
std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec,
const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) {
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 59d3e82..93f4576 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -89,7 +89,7 @@ TEST_F(MainIndexMergerTest, TranslateTermNotAdded) {
Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -128,7 +128,7 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) {
Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -138,7 +138,8 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) {
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1,
+ Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -190,7 +191,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
@@ -201,8 +202,8 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
uint32_t foo_main_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
// The prefix hit for 'foot' should have the same score as the exact hit for
- // 'foot'.
- Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ // 'foot'. The final prefix hit has score equal to 58.
+ Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/58,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -221,8 +222,8 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
- // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot'
- // because they have different scores.
+ // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+ // latter with score as the sum of the scores.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
@@ -255,6 +256,10 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
+ // The prefix hit should take the sum as score - 114.
+ Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/114,
+ /*is_in_prefix_section=*/true,
+ /*is_prefix_hit=*/true);
// 2. Build up a fake LexiconMergeOutputs
// This is some made up number that doesn't matter for this test.
@@ -279,16 +284,17 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
- // c. Keep only the exact hit for 'foo' since they both have the same hit
- // score.
+ // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+ // latter with score as the sum of the scores.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
- EXPECT_THAT(
- expanded_term_id_hit_pairs,
- UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
- TermIdHitPair(foo_main_term_id, foo_doc0_hit)));
+ EXPECT_THAT(expanded_term_id_hit_pairs,
+ UnorderedElementsAre(
+ TermIdHitPair(foot_main_term_id, foot_doc0_hit),
+ TermIdHitPair(foo_main_term_id, foo_doc0_hit),
+ TermIdHitPair(foo_main_term_id, prefix_foo_doc0_hit)));
}
TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
@@ -307,10 +313,11 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ /*score=*/Hit::kMaxHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit));
@@ -320,9 +327,9 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- // The prefix hit should take the best score - MaxHitScore when merging these
- // two.
- Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ // The prefix hit should take the sum as score - 256, capped at kMaxHitScore.
+ Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+ /*score=*/Hit::kMaxHitScore,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -349,7 +356,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' and 'fool' to have hits for 'foo'
- // c. Merge the prefix hits from 'foot' and 'fool', taking the best hit
+ // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as hit
// score.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index abe7181..0f87b09 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -145,7 +145,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -182,7 +182,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForExactReturnsValidAccessor) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -219,18 +219,18 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -292,18 +292,18 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -345,14 +345,14 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fall_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kMaxHitScore,
+ Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit));
- Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kMaxHitScore,
+ Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit));
@@ -404,15 +404,15 @@ TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit));
@@ -453,15 +453,15 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit));
@@ -500,17 +500,17 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) {
for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
}
@@ -543,7 +543,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
@@ -570,7 +570,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
diff --git a/icing/index/main/posting-list-accessor_test.cc b/icing/index/main/posting-list-accessor_test.cc
index 8a5ef07..85f6d4a 100644
--- a/icing/index/main/posting-list-accessor_test.cc
+++ b/icing/index/main/posting-list-accessor_test.cc
@@ -82,7 +82,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPLKeepOnSameBlock) {
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add a single hit. This will fit in a min-sized posting list.
- Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
@@ -324,14 +324,14 @@ TEST(PostingListAccessorStorageTest, HitsNotDecreasingReturnsInvalidArgument) {
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
- Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultHitScore);
EXPECT_THAT(pl_accessor.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
- Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore);
+ Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore);
EXPECT_THAT(pl_accessor.PrependHit(hit3),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -364,7 +364,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPostingListNoHitsAdded) {
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
diff --git a/icing/index/main/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc
index eb62aeb..f6b5898 100644
--- a/icing/index/main/posting-list-used_test.cc
+++ b/icing/index/main/posting-list-used_test.cc
@@ -80,7 +80,7 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
- Hit hit1(/*section_id=*/0, 1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/0, 1, Hit::kDefaultHitScore);
pl_used.PrependHit(hit1);
// Size = sizeof(uncompressed hit1)
// + sizeof(hit0-hit1) + sizeof(hit0::score)
@@ -97,7 +97,7 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
- Hit hit3(/*section_id=*/0, 3, Hit::kMaxHitScore);
+ Hit hit3(/*section_id=*/0, 3, Hit::kDefaultHitScore);
pl_used.PrependHit(hit3);
// Size = sizeof(uncompressed hit3)
// + sizeof(hit2-hit3) + sizeof(hit2::score)
@@ -122,7 +122,7 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
// Adding hit0: EMPTY -> NOT_FULL
// Adding hit1: NOT_FULL -> NOT_FULL
// Adding hit2: NOT_FULL -> NOT_FULL
- Hit hit0(/*section_id=*/0, 0, Hit::kMaxHitScore);
+ Hit hit0(/*section_id=*/0, 0, Hit::kDefaultHitScore);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
@@ -227,7 +227,7 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
@@ -268,7 +268,7 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 9dc5c07..eb8b7a4 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -70,7 +70,8 @@ void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
const std::string& token) {
Index::Editor editor =
index->Edit(document_id, section_id, term_match_type, /*namespace_id=*/0);
- ICING_ASSERT_OK(editor.AddHit(token.c_str()));
+ ICING_ASSERT_OK(editor.BufferTerm(token.c_str()));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
}
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 8c46736..7546ae4 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -128,7 +128,8 @@ class QueryProcessorTest : public Test {
TermMatchType::Code term_match_type, const std::string& token) {
Index::Editor editor = index_->Edit(document_id, section_id,
term_match_type, /*namespace_id=*/0);
- return editor.AddHit(token.c_str());
+ auto status = editor.BufferTerm(token.c_str());
+ return status.ok() ? editor.IndexAllBufferedTerms() : status;
}
void TearDown() override {
diff --git a/icing/result/page-result-state.h b/icing/result/page-result-state.h
index a26c44e..85f1dd7 100644
--- a/icing/result/page-result-state.h
+++ b/icing/result/page-result-state.h
@@ -18,6 +18,7 @@
#include <cstdint>
#include <vector>
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/scoring/scored-document-hit.h"
@@ -29,10 +30,12 @@ struct PageResultState {
PageResultState(std::vector<ScoredDocumentHit> scored_document_hits_in,
uint64_t next_page_token_in,
SnippetContext snippet_context_in,
+ std::unordered_map<std::string, ProjectionTree> tree_map,
int num_previously_returned_in)
: scored_document_hits(std::move(scored_document_hits_in)),
next_page_token(next_page_token_in),
snippet_context(std::move(snippet_context_in)),
+ projection_tree_map(std::move(tree_map)),
num_previously_returned(num_previously_returned_in) {}
// Results of one page
@@ -44,6 +47,9 @@ struct PageResultState {
// Information needed for snippeting.
SnippetContext snippet_context;
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map;
+
// Number of results that have been returned in previous pages.
int num_previously_returned;
};
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
new file mode 100644
index 0000000..382fcb4
--- /dev/null
+++ b/icing/result/projection-tree.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include <algorithm>
+
+#include "icing/absl_ports/str_join.h"
+#include "icing/schema/section-manager.h"
+
+namespace icing {
+namespace lib {
+
+ProjectionTree::ProjectionTree(
+ const ResultSpecProto::TypePropertyMask& type_field_mask) {
+ for (const std::string& field_mask : type_field_mask.paths()) {
+ Node* current_node = &root_;
+ for (std::string_view sub_field_mask :
+ absl_ports::StrSplit(field_mask, kPropertySeparator)) {
+ current_node = AddChildNode(sub_field_mask, &current_node->children);
+ }
+ }
+}
+
+ProjectionTree::Node* ProjectionTree::AddChildNode(
+ std::string_view property_name, std::vector<Node>* current_children) {
+ auto itr = std::find_if(current_children->begin(), current_children->end(),
+ [&property_name](const Node& node) {
+ return node.name == property_name;
+ });
+ if (itr != current_children->end()) {
+ return &(*itr);
+ }
+ current_children->push_back(ProjectionTree::Node(property_name));
+ return &current_children->back();
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
new file mode 100644
index 0000000..7ace295
--- /dev/null
+++ b/icing/result/projection-tree.h
@@ -0,0 +1,53 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PROJECTION_TREE_H_
+#define ICING_RESULT_PROJECTION_TREE_H_
+
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+class ProjectionTree {
+ public:
+ struct Node {
+ explicit Node(std::string_view name = "") : name(name) {}
+
+ std::string_view name;
+ std::vector<Node> children;
+ };
+
+ explicit ProjectionTree(
+ const ResultSpecProto::TypePropertyMask& type_field_mask);
+
+ const Node& root() const { return root_; }
+
+ private:
+ // Add a child node with property_name to current_children and returns a
+ // pointer to the child node.
+ Node* AddChildNode(std::string_view property_name,
+ std::vector<Node>* current_children);
+
+ Node root_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_PROJECTION_TREE_H_
diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
new file mode 100644
index 0000000..77d1d21
--- /dev/null
+++ b/icing/result/projection-tree_test.cc
@@ -0,0 +1,102 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ EXPECT_THAT(tree.root().children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeTopLevel) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject");
+ type_field_mask.add_paths("body");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+ ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNested) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject.body");
+ type_field_mask.add_paths("body");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children, IsEmpty());
+ ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("sender.name.first");
+ type_field_mask.add_paths("sender.emailAddress");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("sender"));
+ ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("name"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).name,
+ Eq("first"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).children,
+ IsEmpty());
+ ASSERT_THAT(tree.root().children.at(0).children.at(1).name,
+ Eq("emailAddress"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(1).children, IsEmpty());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index f09d834..ff6320b 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc
@@ -14,15 +14,56 @@
#include "icing/result/result-retriever.h"
+#include <string_view>
+#include <utility>
+
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/page-result-state.h"
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+
+namespace {
+
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+ proto2::RepeatedPtrField<PropertyProto>* properties) {
+ int num_kept = 0;
+ for (int cur_pos = 0; cur_pos < properties->size(); ++cur_pos) {
+ PropertyProto* prop = properties->Mutable(cur_pos);
+ auto itr = std::find_if(projection_tree.begin(), projection_tree.end(),
+ [&prop](const ProjectionTree::Node& node) {
+ return node.name == prop->name();
+ });
+ if (itr == projection_tree.end()) {
+ // Property is not present in the projection tree. Just skip it.
+ continue;
+ }
+ // This property should be kept.
+ properties->SwapElements(num_kept, cur_pos);
+ ++num_kept;
+ if (itr->children.empty()) {
+ // A field mask does refer to this property, but it has no children. So
+ // we should take the entire property, with all of its
+ // subproperties/values
+ continue;
+ }
+ // The field mask refers to children of this property. Recurse through the
+ // document values that this property holds and project the children
+ // requested by this field mask.
+ for (DocumentProto& subproperty : *(prop->mutable_document_values())) {
+ Project(itr->children, subproperty.mutable_properties());
+ }
+ }
+ properties->DeleteSubrange(num_kept, properties->size() - num_kept);
+}
+
+} // namespace
+
libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>>
ResultRetriever::Create(const DocumentStore* doc_store,
const SchemaStore* schema_store,
@@ -74,6 +115,14 @@ ResultRetriever::RetrieveResults(
}
}
+ // Apply projection
+ auto itr = page_result_state.projection_tree_map.find(
+ document_or.ValueOrDie().schema());
+ if (itr != page_result_state.projection_tree_map.end()) {
+ Project(itr->second.root().children,
+ document_or.ValueOrDie().mutable_properties());
+ }
+
SearchResultProto::ResultProto result;
// Add the snippet if requested.
if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 1078c5a..82e32ee 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -16,6 +16,8 @@
#include <limits>
#include <memory>
+#include <string_view>
+#include <unordered_map>
#include "gtest/gtest.h"
#include "icing/document-builder.h"
@@ -26,6 +28,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
@@ -74,30 +77,63 @@ class ResultRetrieverTest : public testing::Test {
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
+ ASSERT_THAT(schema_store_->SetSchema(CreatePersonAndEmailSchema()), IsOk());
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ }
+
+ SchemaProto CreatePersonAndEmailSchema() {
SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- PropertyConfigProto* prop_config = type_config->add_properties();
- prop_config->set_property_name("subject");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
+
+ auto* type = schema.add_types();
+ type->set_schema_type("Email");
+
+ auto* subj = type->add_properties();
+ subj->set_property_name("subject");
+ subj->set_data_type(PropertyConfigProto::DataType::STRING);
+ subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ subj->mutable_string_indexing_config()->set_term_match_type(
TermMatchType::PREFIX);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+ subj->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
- prop_config = type_config->add_properties();
- prop_config->set_property_name("body");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
+ auto* body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ body->mutable_string_indexing_config()->set_term_match_type(
TermMatchType::EXACT_ONLY);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* sender = type->add_properties();
+ sender->set_property_name("sender");
+ sender->set_schema_type("Person");
+ sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ sender->mutable_document_indexing_config()->set_index_nested_properties(
+ true);
+
+ auto* person_type = schema.add_types();
+ person_type->set_schema_type("Person");
+ auto* name = person_type->add_properties();
+ name->set_property_name("name");
+ name->set_data_type(PropertyConfigProto::DataType::STRING);
+ name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ name->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ name->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* address = person_type->add_properties();
+ address->set_property_name("emailAddress");
+ address->set_data_type(PropertyConfigProto::DataType::STRING);
+ address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ address->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ address->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
- }
- void TearDown() override {
- filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ return schema;
}
const Filesystem filesystem_;
@@ -118,8 +154,8 @@ ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
DocumentProto CreateDocument(int id) {
return DocumentBuilder()
- .SetKey("icing", "email/" + std::to_string(id))
- .SetSchema("email")
+ .SetKey("icing", "Email/" + std::to_string(id))
+ .SetSchema("Email")
.AddStringProperty("subject", "subject foo " + std::to_string(id))
.AddStringProperty("body", "body bar " + std::to_string(id))
.SetCreationTimestampMs(1574365086666 + id)
@@ -169,9 +205,9 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -190,7 +226,9 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
@@ -212,9 +250,9 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
DocumentId invalid_document_id = -1;
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -232,7 +270,9 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
@@ -253,9 +293,9 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
DocumentId invalid_document_id = -1;
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -268,15 +308,17 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
DocumentId non_existing_document_id = 4;
page_result_state.scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {non_existing_document_id, /*hit_section_id_mask=*/0b00000011,
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {non_existing_document_id, /*hit_section_id_mask=*/0b00001001,
/*score=*/0}};
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -299,8 +341,8 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
doc_store->Put(CreateDocument(/*id=*/2)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -314,7 +356,9 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -335,9 +379,9 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -349,7 +393,9 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> results,
result_retriever->RetrieveResults(page_result_state));
@@ -378,9 +424,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -391,7 +437,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
@@ -449,9 +497,9 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
snippet_spec.set_num_to_snippet(1);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -461,7 +509,9 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
snippet_spec, TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
@@ -502,9 +552,9 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -517,7 +567,9 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
@@ -547,9 +599,9 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -562,7 +614,9 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/3);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/3);
// num_to_snippet = 5, num_previously_returned_in = 3,
// We can return 5 - 3 = 2 snippets.
@@ -591,9 +645,9 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
doc_store->Put(CreateDocument(/*id=*/3)));
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -606,7 +660,9 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/6);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/6);
// num_to_snippet = 5, num_previously_returned_in = 6,
// We can't return any snippets for this page.
@@ -619,6 +675,671 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
}
+TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("subject");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'subject' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender.name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender.name'
+ // property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender'
+ // property and all of the subproperties of 'sender'.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender.name");
+ type_property_mask.add_paths("sender.emailAddress");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender.name' and
+ // 'sender.address' properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results contain *no* properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("nonExistentProperty");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results contain *no* properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
+ {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("subject");
+ type_property_mask.add_paths("nonExistentProperty");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'subject' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index ff5dbf0..4488409 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -43,21 +43,26 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
std::vector<ScoredDocumentHit> page_result_document_hits =
result_state.GetNextPage();
+ SnippetContext snippet_context_copy = result_state.snippet_context();
+
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_copy =
+ result_state.projection_tree_map();
if (!result_state.HasMoreResults()) {
// No more pages, won't store ResultState, returns directly
return PageResultState(
std::move(page_result_document_hits), kInvalidNextPageToken,
- result_state.snippet_context(), num_previously_returned);
+ std::move(snippet_context_copy), std::move(projection_tree_map_copy),
+ num_previously_returned);
}
absl_ports::unique_lock l(&mutex_);
// ResultState has multiple pages, storing it
- SnippetContext snippet_context_copy = result_state.snippet_context();
uint64_t next_page_token = Add(std::move(result_state));
return PageResultState(std::move(page_result_document_hits), next_page_token,
std::move(snippet_context_copy),
+ std::move(projection_tree_map_copy),
num_previously_returned);
}
@@ -97,13 +102,17 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
SnippetContext snippet_context_copy =
state_iterator->second.snippet_context();
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_copy =
+ state_iterator->second.projection_tree_map();
+
if (!state_iterator->second.HasMoreResults()) {
InternalInvalidateResultState(next_page_token);
next_page_token = kInvalidNextPageToken;
}
return PageResultState(result_of_page, next_page_token,
- std::move(snippet_context_copy), num_returned);
+ std::move(snippet_context_copy),
+ std::move(projection_tree_map_copy), num_returned);
}
void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index bf28f52..f1479b9 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc
@@ -14,6 +14,7 @@
#include "icing/result/result-state.h"
+#include "icing/result/projection-tree.h"
#include "icing/scoring/ranker.h"
#include "icing/util/logging.h"
@@ -46,6 +47,11 @@ ResultState::ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
num_returned_(0),
scored_document_hit_comparator_(scoring_spec.order_by() ==
ScoringSpecProto::Order::DESC) {
+ for (const ResultSpecProto::TypePropertyMask& type_field_mask :
+ result_spec.type_property_masks()) {
+ projection_tree_map_.insert(
+ {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
+ }
BuildHeapInPlace(&scored_document_hits_, scored_document_hit_comparator_);
}
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index 82e783b..de36b40 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -19,6 +19,7 @@
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/scoring/scored-document-hit.h"
@@ -52,6 +53,13 @@ class ResultState {
// constructor.
const SnippetContext& snippet_context() const { return snippet_context_; }
+ // Returns a vector of TypePropertyMasks generated from the specs passed in
+ // via constructor.
+ const std::unordered_map<std::string, ProjectionTree>& projection_tree_map()
+ const {
+ return projection_tree_map_;
+ }
+
// The number of results that have already been returned. This number is
// increased when GetNextPage() is called.
int num_returned() const { return num_returned_; }
@@ -65,6 +73,9 @@ class ResultState {
// Information needed for snippeting.
SnippetContext snippet_context_;
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_;
+
// Number of results to return in each page.
int num_per_page_;
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 9626a0b..49e7096 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -107,6 +107,21 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
// already.
std::unordered_set<std::string_view> known_property_names;
+ // Tracks which schemas reference other schemas. This is used to detect
+ // infinite loops between indexed schema references (e.g. A -> B -> C -> A).
+ // We could get into an infinite loop while trying to assign section ids.
+ //
+ // The key is the "child" schema that is being referenced within another
+ // schema.
+ // The value is a set of all the direct/indirect "parent" schemas that
+ // reference the "child" schema.
+ //
+ // For example, if A has a nested document property of type B, then A is the
+ // "parent" and B is the "child" and so schema_references will contain
+ // schema_references[B] == {A}.
+ std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
+ schema_references;
+
for (const auto& type_config : schema.types()) {
std::string_view schema_type(type_config.schema_type());
ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
@@ -120,6 +135,7 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
// We only care about properties being unique within one type_config
known_property_names.clear();
+
for (const auto& property_config : type_config.properties()) {
std::string_view property_name(property_config.property_name());
ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
@@ -149,10 +165,55 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
schema_type, ".", property_name, "'"));
}
+ if (property_schema_type == schema_type) {
+ // The schema refers to itself. This also causes a infinite loop.
+ //
+ // TODO(b/171996137): When clients can opt out of indexing document
+ // properties, then we don't need to do this if the document property
+ // isn't indexed. We only care about infinite loops while we're trying
+ // to assign section ids for indexing.
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Infinite loop detected in type configs. '",
+ schema_type, "' references itself."));
+ }
+
// Need to make sure we eventually see/validate this schema_type
if (known_schema_types.count(property_schema_type) == 0) {
unknown_schema_types.insert(property_schema_type);
}
+
+ // Start tracking the parent schemas that references this nested schema
+ // for infinite loop detection.
+ //
+ // TODO(b/171996137): When clients can opt out of indexing document
+ // properties, then we don't need to do this if the document property
+ // isn't indexed. We only care about infinite loops while we're trying
+ // to assign section ids for indexing.
+ std::unordered_set<std::string_view> parent_schemas;
+ parent_schemas.insert(schema_type);
+
+ for (const auto& parent : parent_schemas) {
+ // Check for any indirect parents
+ auto indirect_parents_iter = schema_references.find(parent);
+ if (indirect_parents_iter == schema_references.end()) {
+ continue;
+ }
+
+ // Our "parent" schema has parents as well. They're our indirect
+ // parents now.
+ for (const std::string_view& indirect_parent :
+ indirect_parents_iter->second) {
+ if (indirect_parent == property_schema_type) {
+ // We're our own indirect parent! Infinite loop found.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Infinite loop detected in type configs. '",
+ property_schema_type, "' references itself."));
+ }
+ parent_schemas.insert(indirect_parent);
+ }
+ }
+
+ schema_references.insert({property_schema_type, parent_schemas});
}
ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
@@ -166,7 +227,7 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
}
}
- // An Document property claimed to be of a schema_type that we never
+ // A Document property claimed to be of a schema_type that we never
// saw/validated
if (!unknown_schema_types.empty()) {
return absl_ports::UnknownError(
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index ccb2eea..7b989a8 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -81,10 +81,17 @@ class SchemaUtil {
// SchemaTypeConfigProto.schema_type
// 10. Property names can only be alphanumeric.
// 11. Any STRING data types have a valid string_indexing_config
+ // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is
+ // itself, thus creating an infinite loop.
+ // 13. Two SchemaTypeConfigProtos cannot have properties that reference each
+ // other's schema_type, thus creating an infinite loop.
+ //
+ // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
+ // document properties can be opted out of indexing.
//
// Returns:
// ALREADY_EXISTS for case 1 and 2
- // INVALID_ARGUMENT for 3-11
+ // INVALID_ARGUMENT for 3-13
// OK otherwise
static libtextclassifier3::Status Validate(const SchemaProto& schema);
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index ed3bde7..61a861c 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -29,6 +29,7 @@ namespace lib {
namespace {
using ::testing::Eq;
+using ::testing::HasSubstr;
// Properties/fields in a schema type
constexpr char kEmailType[] = "EmailMessage";
@@ -84,7 +85,7 @@ class SchemaUtilTest : public ::testing::Test {
}
};
-TEST_F(SchemaUtilTest, Valid_Empty) {
+TEST_F(SchemaUtilTest, EmptySchemaProtoIsValid) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
@@ -98,7 +99,7 @@ TEST_F(SchemaUtilTest, Valid_Nested) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Valid_ClearedPropertyConfigs) {
+TEST_F(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
// No property fields is technically ok, but probably not realistic.
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -107,7 +108,7 @@ TEST_F(SchemaUtilTest, Valid_ClearedPropertyConfigs) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedSchemaType) {
+TEST_F(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
type->clear_schema_type();
@@ -116,7 +117,7 @@ TEST_F(SchemaUtilTest, Invalid_ClearedSchemaType) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_EmptySchemaType) {
+TEST_F(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
type->set_schema_type("");
@@ -133,7 +134,7 @@ TEST_F(SchemaUtilTest, AnySchemaTypeOk) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertyName) {
+TEST_F(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -146,7 +147,7 @@ TEST_F(SchemaUtilTest, Invalid_ClearedPropertyName) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertyName) {
+TEST_F(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -184,7 +185,7 @@ TEST_F(SchemaUtilTest, AlphanumericPropertyNameOk) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_DuplicatePropertyName) {
+TEST_F(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -202,7 +203,7 @@ TEST_F(SchemaUtilTest, Invalid_DuplicatePropertyName) {
StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedDataType) {
+TEST_F(SchemaUtilTest, ClearedDataTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -215,7 +216,7 @@ TEST_F(SchemaUtilTest, Invalid_ClearedDataType) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_UnknownDataType) {
+TEST_F(SchemaUtilTest, UnknownDataTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -228,7 +229,7 @@ TEST_F(SchemaUtilTest, Invalid_UnknownDataType) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedCardinality) {
+TEST_F(SchemaUtilTest, ClearedCardinalityIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -241,7 +242,7 @@ TEST_F(SchemaUtilTest, Invalid_ClearedCardinality) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_UnknownCardinality) {
+TEST_F(SchemaUtilTest, UnknownCardinalityIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -254,7 +255,7 @@ TEST_F(SchemaUtilTest, Invalid_UnknownCardinality) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertySchemaType) {
+TEST_F(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -282,7 +283,7 @@ TEST_F(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_NoMatchingSchemaType) {
+TEST_F(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -293,7 +294,8 @@ TEST_F(SchemaUtilTest, Invalid_NoMatchingSchemaType) {
property->set_schema_type("NewSchemaType");
ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
- StatusIs(libtextclassifier3::StatusCode::UNKNOWN));
+ StatusIs(libtextclassifier3::StatusCode::UNKNOWN,
+ HasSubstr("Undefined 'schema_type'")));
}
TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
@@ -618,6 +620,153 @@ TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
+TEST_F(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+ SchemaProto schema;
+
+ // Create a parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("ParentSchema");
+
+ // Create multiple references to the same child schema
+ auto property = type->add_properties();
+ property->set_property_name("ChildProperty1");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("ChildSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ property = type->add_properties();
+ property->set_property_name("ChildProperty2");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("ChildSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Create a child schema
+ type = schema.add_types();
+ type->set_schema_type("ChildSchema");
+
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST_F(SchemaUtilTest, InvalidSelfReference) {
+ SchemaProto schema;
+
+ // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+ auto type = schema.add_types();
+ type->set_schema_type("OwnSchema");
+
+ // Reference a child schema, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("OwnSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
+ SchemaProto schema;
+
+ // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+ auto type = schema.add_types();
+ type->set_schema_type("OwnSchema");
+
+ // Reference a child schema, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("OwnSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ property = type->add_properties();
+ property->set_property_name("SomeString");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
+ SchemaProto schema;
+
+ // Create a schema for the parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("A");
+
+ // Reference schema B, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("B");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("B");
+
+ // Reference the schema A, causing an infinite loop of references.
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("A");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Two degrees of referencing: A -> B -> A
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+ SchemaProto schema;
+
+ // Create a schema for the parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("A");
+
+ // Reference schema B , so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("B");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("B");
+
+ // Reference schema C, so far so good
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("C");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("C");
+
+ // Reference schema A, no good
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("A");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Three degrees of referencing: A -> B -> C -> A
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 73aa947..0285cef 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -48,39 +48,6 @@ namespace {
using TypeSectionMap =
std::unordered_map<std::string, const std::vector<SectionMetadata>>;
-// This state helps detect infinite loops (e.g. two type configs referencing
-// each other) when assigning sections. The combination of 'number of section
-// assigned' and 'current schema name' represents a unique state in the
-// section-assign process. If the same state is seen the second time, that means
-// an infinite loop.
-struct SectionAssigningState {
- size_t num_sections_assigned;
- std::string current_schema_name;
-
- SectionAssigningState(size_t num_sections_assigned_in,
- std::string&& current_schema_name_in)
- : num_sections_assigned(num_sections_assigned_in),
- current_schema_name(std::move(current_schema_name_in)) {}
-};
-
-// Provides a hash value of this struct so that it can be stored in a hash
-// set.
-struct SectionAssigningStateHasher {
- size_t operator()(const SectionAssigningState& state) const {
- size_t str_hash = std::hash<std::string>()(state.current_schema_name);
- size_t int_hash = std::hash<size_t>()(state.num_sections_assigned);
- // Combine the two hashes by taking the upper 16-bits of the string hash and
- // the lower 16-bits of the int hash.
- return (str_hash & 0xFFFF0000) | (int_hash & 0x0000FFFF);
- }
-};
-
-bool operator==(const SectionAssigningState& lhs,
- const SectionAssigningState& rhs) {
- return lhs.num_sections_assigned == rhs.num_sections_assigned &&
- lhs.current_schema_name == rhs.current_schema_name;
-}
-
// Helper function to concatenate a path and a property name
std::string ConcatenatePath(const std::string& path,
const std::string& next_property_name) {
@@ -90,28 +57,14 @@ std::string ConcatenatePath(const std::string& path,
return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
}
-// Helper function to recursively identify sections from a type config and add
-// them to a section metadata list
libtextclassifier3::Status AssignSections(
- const SchemaTypeConfigProto& type_config,
+ const SchemaTypeConfigProto& current_type_config,
const std::string& current_section_path,
const SchemaUtil::TypeConfigMap& type_config_map,
- std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>*
- visited_states,
std::vector<SectionMetadata>* metadata_list) {
- if (!visited_states
- ->emplace(metadata_list->size(),
- std::string(type_config.schema_type()))
- .second) {
- // Failed to insert, the same state has been seen before, there's an
- // infinite loop in type configs
- return absl_ports::InvalidArgumentError(
- "Infinite loop detected in type configs");
- }
-
// Sorts properties by name's alphabetical order so that order doesn't affect
// section assigning.
- auto sorted_properties = type_config.properties();
+ auto sorted_properties = current_type_config.properties();
std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
[](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
return p1->property_name() < p2->property_name();
@@ -137,7 +90,7 @@ libtextclassifier3::Status AssignSections(
AssignSections(nested_type_config,
ConcatenatePath(current_section_path,
property_config.property_name()),
- type_config_map, visited_states, metadata_list));
+ type_config_map, metadata_list));
}
}
@@ -162,6 +115,7 @@ libtextclassifier3::Status AssignSections(
"allowed: %d",
kMaxSectionId - kMinSectionId + 1));
}
+
// Creates section metadata from property config
metadata_list->emplace_back(
new_section_id,
@@ -182,17 +136,14 @@ BuildSectionMetadataCache(const SchemaUtil::TypeConfigMap& type_config_map,
std::vector<std::vector<SectionMetadata>> section_metadata_cache(
schema_type_mapper.num_keys());
- std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>
- visited_states;
for (const auto& name_and_type : type_config_map) {
// Assigns sections for each type config
- visited_states.clear();
const std::string& type_config_name = name_and_type.first;
const SchemaTypeConfigProto& type_config = name_and_type.second;
std::vector<SectionMetadata> metadata_list;
- ICING_RETURN_IF_ERROR(
- AssignSections(type_config, /*current_section_path*/ "",
- type_config_map, &visited_states, &metadata_list));
+ ICING_RETURN_IF_ERROR(AssignSections(type_config,
+ /*current_section_path*/ "",
+ type_config_map, &metadata_list));
// Insert the section metadata list at the index of the type's SchemaTypeId
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 1a4d324..2d995df 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -163,67 +163,6 @@ TEST_F(SectionManagerTest, CreationWithNullPointerShouldFail) {
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(SectionManagerTest, CreationWithSchemaInfiniteLoopShouldFail) {
- // Creates 2 type configs that reference each other
- SchemaTypeConfigProto type_config1;
- type_config1.set_schema_type("type1");
- auto property1 = type_config1.add_properties();
- property1->set_property_name("property1");
- property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property1->set_schema_type("type2"); // Here we reference type2
- property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property1->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- SchemaTypeConfigProto type_config2;
- type_config2.set_schema_type("type2");
- auto property2 = type_config2.add_properties();
- property2->set_property_name("property2");
- property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- // Here we reference type1, which references type2 causing the infinite loop
- property2->set_schema_type("type1");
- property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property2->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type1", type_config1);
- type_config_map.emplace("type2", type_config2);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop detected")));
-}
-
-TEST_F(SectionManagerTest, CreationWithSchemaSelfReferenceShouldFail) {
- // Creates a type config that has a section and references to self.
- SchemaTypeConfigProto type_config;
- type_config.set_schema_type("type");
- auto property1 = type_config.add_properties();
- property1->set_property_name("property1");
- property1->set_data_type(PropertyConfigProto::DataType::STRING);
- property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property1->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- auto property2 = type_config.add_properties();
- property2->set_property_name("property2");
- property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property2->mutable_document_indexing_config()->set_index_nested_properties(
- true);
- // Here we're referencing our own type, causing an infinite loop
- property2->set_schema_type("type");
- property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type", type_config);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
- HasSubstr("Too many properties")));
-}
-
TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
SchemaTypeConfigProto type_config;
type_config.set_schema_type("type");
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index 42ec09a..0739532 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -144,6 +144,9 @@ libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
return std::make_unique<UsageScorer>(document_store, rank_by,
default_score);
+ case ScoringSpecProto::RankingStrategy::
+ RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER:
+ [[fallthrough]];
case ScoringSpecProto::RankingStrategy::NONE:
return std::make_unique<NoScorer>(default_score);
}
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 31d41fc..225b498 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -58,6 +58,44 @@ MATCHER_P2(EqualsDocHitInfo, document_id, section_ids, "") {
actual.hit_section_ids_mask() == section_mask;
}
+// Used to match a DocHitInfo
+MATCHER_P2(EqualsDocHitInfoWithTermFrequency, document_id,
+ section_ids_to_term_frequencies_map, "") {
+ const DocHitInfo& actual = arg;
+ SectionIdMask section_mask = kSectionIdMaskNone;
+
+ bool term_frequency_as_expected = true;
+ std::vector<Hit::Score> expected_tfs;
+ std::vector<Hit::Score> actual_tfs;
+ for (auto itr = section_ids_to_term_frequencies_map.begin();
+ itr != section_ids_to_term_frequencies_map.end(); itr++) {
+ SectionId section_id = itr->first;
+ section_mask |= 1U << section_id;
+ expected_tfs.push_back(itr->second);
+ actual_tfs.push_back(actual.max_hit_score(section_id));
+ if (actual.max_hit_score(section_id) != itr->second) {
+ term_frequency_as_expected = false;
+ }
+ }
+ std::string actual_term_frequencies = absl_ports::StrCat(
+ "[", absl_ports::StrJoin(actual_tfs, ",", absl_ports::NumberFormatter()),
+ "]");
+ std::string expected_term_frequencies = absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(expected_tfs, ",", absl_ports::NumberFormatter()),
+ "]");
+ *result_listener << IcingStringUtil::StringPrintf(
+ "(actual is {document_id=%d, section_mask=%d, term_frequencies=%s}, but "
+ "expected was "
+ "{document_id=%d, section_mask=%d, term_frequencies=%s}.)",
+ actual.document_id(), actual.hit_section_ids_mask(),
+ actual_term_frequencies.c_str(), document_id, section_mask,
+ expected_term_frequencies.c_str());
+ return actual.document_id() == document_id &&
+ actual.hit_section_ids_mask() == section_mask &&
+ term_frequency_as_expected;
+}
+
// Used to match a ScoredDocumentHit
MATCHER_P(EqualsScoredDocumentHit, expected_scored_document_hit, "") {
if (arg.document_id() != expected_scored_document_hit.document_id() ||
diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
index 0e2eb2a..eba1dfa 100644
--- a/icing/testing/hit-test-utils.cc
+++ b/icing/testing/hit-test-utils.cc
@@ -42,8 +42,8 @@ std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
if (num_hits < 1) {
return hits;
}
- hits.push_back(
- Hit(/*section_id=*/1, /*document_id=*/start_docid, Hit::kMaxHitScore));
+ hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid,
+ Hit::kDefaultHitScore));
while (hits.size() < num_hits) {
hits.push_back(CreateHit(hits.back(), desired_byte_length));
}
diff --git a/icing/testing/icu-i18n-test-utils.cc b/icing/testing/icu-i18n-test-utils.cc
index 09878db..50dc26c 100644
--- a/icing/testing/icu-i18n-test-utils.cc
+++ b/icing/testing/icu-i18n-test-utils.cc
@@ -29,7 +29,7 @@ std::string UCharToString(UChar32 uchar) {
uint8_t utf8_buffer[4]; // U8_APPEND writes 0 to 4 bytes
int utf8_index = 0;
- UBool has_error = FALSE;
+ UBool has_error = false;
// utf8_index is advanced to the end of the contents if successful
U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index 4d86913..fb1fc4b 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -96,7 +96,8 @@ libtextclassifier3::Status DocumentValidator::Validate(
if (property_iter == parsed_property_configs.property_config_map.end()) {
return absl_ports::NotFoundError(absl_ports::StrCat(
"Property config '", property.name(), "' not found for key: (",
- document.namespace_(), ", ", document.uri(), ")."));
+ document.namespace_(), ", ", document.uri(),
+ ") of type: ", document.schema(), "."));
}
const PropertyConfigProto& property_config = *property_iter->second;
diff --git a/icing/util/document-validator.h b/icing/util/document-validator.h
index 34a3217..036d1fa 100644
--- a/icing/util/document-validator.h
+++ b/icing/util/document-validator.h
@@ -56,8 +56,6 @@ class DocumentValidator {
// In addition, all nested DocumentProto will also be validated towards the
// requirements above.
//
- // DocumentProto.custom_properties are not validated.
- //
// Returns:
// OK on success
// FAILED_PRECONDITION if no schema is set yet
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 6067162..ad5a93e 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -195,18 +195,6 @@ TEST_F(DocumentValidatorTest, ValidateNonexistentPropertyNotFound) {
HasSubstr("'WrongPropertyName' not found")));
}
-TEST_F(DocumentValidatorTest, ValidateAllCustomPropertyOk) {
- DocumentProto email =
- SimpleEmailBuilder()
- // A nonexistent property, would've triggered a NotFound message
- .AddCustomStringProperty("WrongPropertyName", kDefaultString)
- // 'subject' property should've been a string according to the schema
- .AddCustomBooleanProperty(kPropertySubject, false, true)
- .Build();
-
- EXPECT_THAT(document_validator_->Validate(email), IsOk());
-}
-
TEST_F(DocumentValidatorTest, ValidateExactlyOneRequiredValueOk) {
// Required property should have exactly 1 value
DocumentProto email =
diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
index d6754d5..cd0a227 100644
--- a/icing/util/i18n-utils.cc
+++ b/icing/util/i18n-utils.cc
@@ -156,7 +156,7 @@ void AppendUchar32ToUtf8(std::string* utf8_string, UChar32 uchar) {
uint8_t utf8_buffer[4]; // U8_APPEND writes 0 to 4 bytes
int utf8_index = 0;
- UBool has_error = FALSE;
+ UBool has_error = false;
// utf8_index is advanced to the end of the contents if successful
U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index ff215bd..ecbd5ce 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -16,8 +16,8 @@ syntax = "proto2";
package icing.lib;
-import "icing/proto/status.proto";
import "icing/proto/logging.proto";
+import "icing/proto/status.proto";
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
@@ -50,11 +50,6 @@ message DocumentProto {
// already defined in the schema for this Document's schema_type.
repeated PropertyProto properties = 5;
- // OPTIONAL: Properties that will not be validated against the schema,
- // indexed, or be searchable. The properties will be stored in the Documents,
- // but never looked at by Icing.
- repeated PropertyProto custom_properties = 6;
-
// OPTIONAL: Score of the document which could be used during search result
// ranking. Negative values will lead to validation errors. The default is the
// lowest score 0.
@@ -69,6 +64,8 @@ message DocumentProto {
// TODO(cassiewang): Benchmark if fixed64 or some other proto type is better
// in terms of space/time efficiency. Both for ttl_ms and timestamp fields
optional int64 ttl_ms = 8 [default = 0];
+
+ reserved 6;
}
// Holds a property field of the Document.
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index 3a99b09..bfa7aec 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -18,7 +18,6 @@ package icing.lib;
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Encapsulates the configurations on how Icing should score and rank the search
@@ -64,6 +63,12 @@ message ScoringSpecProto {
// Ranked by last used timestamp with usage type 3. The timestamps are
// compared in seconds.
USAGE_TYPE3_LAST_USED_TIMESTAMP = 8;
+
+ // Placeholder for ranking by relevance score, currently computed as BM25F
+ // score.
+ // TODO(b/173156803): one the implementation is ready, rename to
+ // RELEVANCE_SCORE.
+ RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER = 9;
}
}
optional RankingStrategy.Code rank_by = 1;
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index abbfc32..6cb590f 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -64,7 +64,7 @@ message SearchSpecProto {
// Client-supplied specifications on what to include/how to format the search
// results.
-// Next tag: 4
+// Next tag: 5
message ResultSpecProto {
// The results will be returned in pages, and num_per_page specifies the
// number of documents in one page.
@@ -96,6 +96,22 @@ message ResultSpecProto {
optional int32 max_window_bytes = 3;
}
optional SnippetSpecProto snippet_spec = 3;
+
+ // How to specify a subset of properties to retrieve. If no type property mask
+ // has been specified for a schema type, then *all* properties of that schema
+ // type will be retrieved.
+ // Next tag: 3
+ message TypePropertyMask {
+ // The schema type to which these property masks should apply.
+ optional string schema_type = 1;
+
+ // The property masks specifying the property to be retrieved. Property
+ // masks must be composed only of property names, property separators (the
+ // '.' character). For example, "subject", "recipients.name". Specifying no
+ // property masks will result in *no* properties being retrieved.
+ repeated string paths = 2;
+ }
+ repeated TypePropertyMask type_property_masks = 4;
}
// The representation of a single match within a DocumentProto property.