diff options
author | Tim Barron <tjbarron@google.com> | 2020-12-29 00:23:52 +0000 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2021-01-05 19:03:14 +0000 |
commit | 59c2caa38fd8dca3760dad751f4f8e5de8be25f5 (patch) | |
tree | 28b5f15e8bbfb73eb7470bef37f554ea4f4847a7 /icing | |
parent | 282a5708af10879b12a09a59ad5bbfa253b1e92a (diff) | |
download | icing-59c2caa38fd8dca3760dad751f4f8e5de8be25f5.tar.gz |
Update Icing from upstream.
Change-Id: Iff50aebffb83529b0454e7c3a6dc6864e7a85f4a
Diffstat (limited to 'icing')
48 files changed, 2492 insertions, 864 deletions
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc index 4a76c01..6a596f5 100644 --- a/icing/file/filesystem.cc +++ b/icing/file/filesystem.cc @@ -464,6 +464,20 @@ bool Filesystem::Write(const char* filename, const void* data, return success; } +bool Filesystem::CopyFile(const char* src, const char* dst) const { + ScopedFd src_fd(OpenForRead(src)); + ScopedFd dst_fd(OpenForWrite(dst)); + if (!src_fd.is_valid() || !dst_fd.is_valid()) { + return false; + } + uint64_t size = GetFileSize(*src_fd); + std::unique_ptr<uint8_t[]> buf = std::make_unique<uint8_t[]>(size); + if (!Read(*src_fd, buf.get(), size)) { + return false; + } + return Write(*dst_fd, buf.get(), size); +} + bool Filesystem::PWrite(int fd, off_t offset, const void* data, size_t data_size) const { size_t write_len = data_size; diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h index b85f3a0..d3c7787 100644 --- a/icing/file/filesystem.h +++ b/icing/file/filesystem.h @@ -83,6 +83,9 @@ class Filesystem { // success or if the directory did not yet exist. virtual bool DeleteDirectoryRecursively(const char* dir_name) const; + // Copies the src file to the dst file. + virtual bool CopyFile(const char* src, const char* dst) const; + // Returns true if a file exists. False if the file doesn't exist. // If there is an error getting stat on the file, it logs the error and // // asserts. diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h index b89295e..88475cd 100644 --- a/icing/file/mock-filesystem.h +++ b/icing/file/mock-filesystem.h @@ -225,6 +225,8 @@ class MockFilesystem : public Filesystem { MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char* dir_name), (const)); + MOCK_METHOD(bool, CopyFile, (const char* src, const char* dst), (const)); + MOCK_METHOD(bool, FileExists, (const char* file_name), (const)); MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const)); diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc index 1cb8620..5a9327e 100644 --- a/icing/icing-search-engine-with-icu-file_test.cc +++ b/icing/icing-search-engine-with-icu-file_test.cc @@ -27,6 +27,7 @@ #include "icing/proto/search.pb.h" #include "icing/proto/status.pb.h" #include "icing/proto/term.pb.h" +#include "icing/testing/common-matchers.h" #include "icing/testing/tmp-directory.h" namespace icing { @@ -114,7 +115,8 @@ TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) { // The token is a random number so we don't verify it. expected_search_result_proto.set_next_page_token( search_result_proto.next_page_token()); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } } // namespace diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc index c40bac9..d915d65 100644 --- a/icing/icing-search-engine.cc +++ b/icing/icing-search-engine.cc @@ -749,6 +749,11 @@ ReportUsageResultProto IcingSearchEngine::ReportUsage( StatusProto* result_status = result_proto.mutable_status(); absl_ports::unique_lock l(&mutex_); + if (!initialized_) { + result_status->set_code(StatusProto::FAILED_PRECONDITION); + result_status->set_message("IcingSearchEngine has not been initialized!"); + return result_proto; + } libtextclassifier3::Status status = document_store_->ReportUsage(usage_report); @@ -761,6 +766,11 @@ GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() { StatusProto* result_status = result_proto.mutable_status(); absl_ports::shared_lock l(&mutex_); + if (!initialized_) { + result_status->set_code(StatusProto::FAILED_PRECONDITION); + result_status->set_message("IcingSearchEngine has not been initialized!"); + return result_proto; + } std::vector<std::string> namespaces = document_store_->GetAllNamespaces(); @@ -786,6 +796,10 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space, return result_proto; } + NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats(); + delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE); + + std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. libtextclassifier3::Status status = document_store_->Delete(name_space, uri); @@ -798,6 +812,8 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space, } result_status->set_code(StatusProto::OK); + delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds()); + delete_stats->set_num_documents_deleted(1); return result_proto; } @@ -814,18 +830,24 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace( return delete_result; } + NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats(); + delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE); + + std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. - libtextclassifier3::Status status = + DocumentStore::DeleteByGroupResult doc_store_result = document_store_->DeleteByNamespace(name_space); - if (!status.ok()) { - ICING_LOG(ERROR) << status.error_message() + if (!doc_store_result.status.ok()) { + ICING_LOG(ERROR) << doc_store_result.status.error_message() << "Failed to delete Namespace: " << name_space; - TransformStatus(status, result_status); + TransformStatus(doc_store_result.status, result_status); return delete_result; } result_status->set_code(StatusProto::OK); + delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds()); + delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted); return delete_result; } @@ -842,27 +864,33 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType( return delete_result; } + NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats(); + delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE); + + std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. - libtextclassifier3::Status status = + DocumentStore::DeleteByGroupResult doc_store_result = document_store_->DeleteBySchemaType(schema_type); - if (!status.ok()) { - ICING_LOG(ERROR) << status.error_message() + if (!doc_store_result.status.ok()) { + ICING_LOG(ERROR) << doc_store_result.status.error_message() << "Failed to delete SchemaType: " << schema_type; - TransformStatus(status, result_status); + TransformStatus(doc_store_result.status, result_status); return delete_result; } result_status->set_code(StatusProto::OK); + delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds()); + delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted); return delete_result; } -DeleteResultProto IcingSearchEngine::DeleteByQuery( +DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery( const SearchSpecProto& search_spec) { ICING_VLOG(1) << "Deleting documents for query " << search_spec.query() << " from doc store"; - DeleteResultProto result_proto; + DeleteByQueryResultProto result_proto; StatusProto* result_status = result_proto.mutable_status(); absl_ports::unique_lock l(&mutex_); @@ -872,6 +900,10 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery( return result_proto; } + NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats(); + delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY); + + std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer(); libtextclassifier3::Status status = ValidateSearchSpec(search_spec, performance_configuration_); if (!status.ok()) { @@ -898,13 +930,12 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery( QueryProcessor::QueryResults query_results = std::move(query_results_or).ValueOrDie(); - ICING_LOG(ERROR) << "Deleting the docs that matched the query."; - bool found_results = false; + ICING_VLOG(2) << "Deleting the docs that matched the query."; + int num_deleted = 0; while (query_results.root_iterator->Advance().ok()) { - ICING_LOG(ERROR) - << "Deleting doc " - << query_results.root_iterator->doc_hit_info().document_id(); - found_results = true; + ICING_VLOG(3) << "Deleting doc " + << query_results.root_iterator->doc_hit_info().document_id(); + ++num_deleted; status = document_store_->Delete( query_results.root_iterator->doc_hit_info().document_id()); if (!status.ok()) { @@ -912,13 +943,15 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery( return result_proto; } } - if (found_results) { + if (num_deleted > 0) { result_proto.mutable_status()->set_code(StatusProto::OK); } else { result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); result_proto.mutable_status()->set_message( "No documents matched the query to delete by!"); } + delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds()); + delete_stats->set_num_documents_deleted(num_deleted); return result_proto; } @@ -1141,6 +1174,9 @@ SearchResultProto IcingSearchEngine::Search( return result_proto; } + NativeQueryStats* query_stats = result_proto.mutable_query_stats(); + std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer(); + libtextclassifier3::Status status = ValidateResultSpec(result_spec); if (!status.ok()) { TransformStatus(status, result_status); @@ -1152,6 +1188,15 @@ SearchResultProto IcingSearchEngine::Search( return result_proto; } + query_stats->set_num_namespaces_filtered( + search_spec.namespace_filters_size()); + query_stats->set_num_schema_types_filtered( + search_spec.schema_type_filters_size()); + query_stats->set_ranking_strategy(scoring_spec.rank_by()); + query_stats->set_is_first_page(true); + query_stats->set_requested_page_size(result_spec.num_per_page()); + + std::unique_ptr<Timer> component_timer = clock_->GetNewTimer(); // Gets unordered results from query processor auto query_processor_or = QueryProcessor::Create( index_.get(), language_segmenter_.get(), normalizer_.get(), @@ -1170,7 +1215,16 @@ SearchResultProto IcingSearchEngine::Search( } QueryProcessor::QueryResults query_results = std::move(query_results_or).ValueOrDie(); + query_stats->set_parse_query_latency_ms( + component_timer->GetElapsedMilliseconds()); + int term_count = 0; + for (const auto& section_and_terms : query_results.query_terms) { + term_count += section_and_terms.second.size(); + } + query_stats->set_num_terms(term_count); + + component_timer = clock_->GetNewTimer(); // Scores but does not rank the results. libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> scoring_processor_or = @@ -1184,6 +1238,9 @@ SearchResultProto IcingSearchEngine::Search( std::vector<ScoredDocumentHit> result_document_hits = scoring_processor->Score(std::move(query_results.root_iterator), performance_configuration_.num_to_score); + query_stats->set_scoring_latency_ms( + component_timer->GetElapsedMilliseconds()); + query_stats->set_num_documents_scored(result_document_hits.size()); // Returns early for empty result if (result_document_hits.empty()) { @@ -1191,6 +1248,7 @@ SearchResultProto IcingSearchEngine::Search( return result_proto; } + component_timer = clock_->GetNewTimer(); // Ranks and paginates results libtextclassifier3::StatusOr<PageResultState> page_result_state_or = result_state_manager_.RankAndPaginate(ResultState( @@ -1202,7 +1260,10 @@ SearchResultProto IcingSearchEngine::Search( } PageResultState page_result_state = std::move(page_result_state_or).ValueOrDie(); + query_stats->set_ranking_latency_ms( + component_timer->GetElapsedMilliseconds()); + component_timer = clock_->GetNewTimer(); // Retrieves the document protos and snippets if requested auto result_retriever_or = ResultRetriever::Create(document_store_.get(), schema_store_.get(), @@ -1236,6 +1297,14 @@ SearchResultProto IcingSearchEngine::Search( if (page_result_state.next_page_token != kInvalidNextPageToken) { result_proto.set_next_page_token(page_result_state.next_page_token); } + query_stats->set_document_retrieval_latency_ms( + component_timer->GetElapsedMilliseconds()); + query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds()); + query_stats->set_num_results_returned_current_page( + result_proto.results_size()); + query_stats->set_num_results_snippeted( + std::min(result_proto.results_size(), + result_spec.snippet_spec().num_to_snippet())); return result_proto; } @@ -1252,6 +1321,10 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { return result_proto; } + NativeQueryStats* query_stats = result_proto.mutable_query_stats(); + query_stats->set_is_first_page(false); + + std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer(); libtextclassifier3::StatusOr<PageResultState> page_result_state_or = result_state_manager_.GetNextPage(next_page_token); @@ -1268,6 +1341,7 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { PageResultState page_result_state = std::move(page_result_state_or).ValueOrDie(); + query_stats->set_requested_page_size(page_result_state.requested_page_size); // Retrieves the document protos. auto result_retriever_or = @@ -1299,6 +1373,21 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) { if (page_result_state.next_page_token != kInvalidNextPageToken) { result_proto.set_next_page_token(page_result_state.next_page_token); } + + // The only thing that we're doing is document retrieval. So document + // retrieval latency and overall latency are the same and can use the same + // timer. + query_stats->set_document_retrieval_latency_ms( + overall_timer->GetElapsedMilliseconds()); + query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds()); + query_stats->set_num_results_returned_current_page( + result_proto.results_size()); + int num_left_to_snippet = + std::max(page_result_state.snippet_context.snippet_spec.num_to_snippet() - + page_result_state.num_previously_returned, + 0); + query_stats->set_num_results_snippeted( + std::min(result_proto.results_size(), num_left_to_snippet)); return result_proto; } diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h index 70a9c07..b2bb4f1 100644 --- a/icing/icing-search-engine.h +++ b/icing/icing-search-engine.h @@ -287,7 +287,7 @@ class IcingSearchEngine { // NOT_FOUND if the query doesn't match any documents // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet // INTERNAL_ERROR on IO error - DeleteResultProto DeleteByQuery(const SearchSpecProto& search_spec) + DeleteByQueryResultProto DeleteByQuery(const SearchSpecProto& search_spec) ICING_LOCKS_EXCLUDED(mutex_); // Retrieves, scores, ranks, and returns the results according to the specs. @@ -404,15 +404,18 @@ class IcingSearchEngine { bool initialized_ ICING_GUARDED_BY(mutex_) = false; // Abstraction for accessing time values. - std::unique_ptr<Clock> clock_; + const std::unique_ptr<const Clock> clock_; // Provides key thresholds that affects the running time and memory of major // components in Icing search engine. - PerformanceConfiguration performance_configuration_; - - // Used to manage pagination state of query results. A lock is not needed here - // because ResultStateManager has its own reader-writer lock. - ResultStateManager result_state_manager_; + const PerformanceConfiguration performance_configuration_; + + // Used to manage pagination state of query results. Even though + // ResultStateManager has its own reader-writer lock, mutex_ must still be + // acquired first in order to adhere to the global lock ordering: + // 1. mutex_ + // 2. result_state_manager_.lock_ + ResultStateManager result_state_manager_ ICING_GUARDED_BY(mutex_); // Used to provide reader and writer locks absl_ports::shared_mutex mutex_; diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc index a6d96e0..9d33a82 100644 --- a/icing/icing-search-engine_benchmark.cc +++ b/icing/icing-search-engine_benchmark.cc @@ -117,7 +117,7 @@ std::vector<std::string> CreateNamespaces(int num_namespaces) { // Creates a vector containing num_words randomly-generated words for use by // documents. template <typename Rand> -std::vector<std::string> CreateLanguage(int num_words, Rand* r) { +std::vector<std::string> CreateLanguages(int num_words, Rand* r) { std::vector<std::string> language; std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev); while (--num_words >= 0) { @@ -175,6 +175,165 @@ class DestructibleDirectory { std::string dir_; }; +std::vector<DocumentProto> GenerateRandomDocuments( + EvenDistributionTypeSelector* type_selector, int num_docs) { + std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces); + EvenDistributionNamespaceSelector namespace_selector(namespaces); + + std::default_random_engine random; + std::vector<std::string> language = CreateLanguages(kLanguageSize, &random); + UniformDistributionLanguageTokenGenerator<std::default_random_engine> + token_generator(language, &random); + + DocumentGenerator< + EvenDistributionNamespaceSelector, EvenDistributionTypeSelector, + UniformDistributionLanguageTokenGenerator<std::default_random_engine>> + generator(&namespace_selector, type_selector, &token_generator, + kAvgDocumentSize * kContentSizePct); + + std::vector<DocumentProto> random_docs; + random_docs.reserve(num_docs); + for (int i = 0; i < num_docs; i++) { + random_docs.push_back(generator.generateDoc()); + } + return random_docs; +} + +void BM_IndexLatency(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + std::default_random_engine random; + int num_types = kAvgNumNamespaces * kAvgNumTypes; + ExactStringPropertyGenerator property_generator; + SchemaGenerator<ExactStringPropertyGenerator> schema_generator( + /*num_properties=*/state.range(1), &property_generator); + SchemaProto schema = schema_generator.GenerateSchema(num_types); + EvenDistributionTypeSelector type_selector(schema); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + int num_docs = state.range(0); + const std::vector<DocumentProto> random_docs = + GenerateRandomDocuments(&type_selector, num_docs); + Timer timer; + for (const DocumentProto& doc : random_docs) { + ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk()); + } + int64_t time_taken_ns = timer.GetElapsedNanoseconds(); + int64_t time_per_doc_ns = time_taken_ns / num_docs; + std::cout << "Number of indexed documents:\t" << num_docs + << "\t\tNumber of indexed sections:\t" << state.range(1) + << "\t\tTime taken (ms):\t" << time_taken_ns / 1000000 + << "\t\tTime taken per doc (us):\t" << time_per_doc_ns / 1000 + << std::endl; +} +BENCHMARK(BM_IndexLatency) + // Arguments: num_indexed_documents, num_sections + ->ArgPair(1, 1) + ->ArgPair(2, 1) + ->ArgPair(8, 1) + ->ArgPair(32, 1) + ->ArgPair(128, 1) + ->ArgPair(1 << 10, 1) + ->ArgPair(1 << 13, 1) + ->ArgPair(1 << 15, 1) + ->ArgPair(1 << 17, 1) + ->ArgPair(1, 5) + ->ArgPair(2, 5) + ->ArgPair(8, 5) + ->ArgPair(32, 5) + ->ArgPair(128, 5) + ->ArgPair(1 << 10, 5) + ->ArgPair(1 << 13, 5) + ->ArgPair(1 << 15, 5) + ->ArgPair(1 << 17, 5) + ->ArgPair(1, 10) + ->ArgPair(2, 10) + ->ArgPair(8, 10) + ->ArgPair(32, 10) + ->ArgPair(128, 10) + ->ArgPair(1 << 10, 10) + ->ArgPair(1 << 13, 10) + ->ArgPair(1 << 15, 10) + ->ArgPair(1 << 17, 10); + +void BM_IndexThroughput(benchmark::State& state) { + // Initialize the filesystem + std::string test_dir = GetTestTempDir() + "/icing/benchmark"; + Filesystem filesystem; + DestructibleDirectory ddir(filesystem, test_dir); + + // Create the schema. + std::default_random_engine random; + int num_types = kAvgNumNamespaces * kAvgNumTypes; + ExactStringPropertyGenerator property_generator; + SchemaGenerator<ExactStringPropertyGenerator> schema_generator( + /*num_properties=*/state.range(1), &property_generator); + SchemaProto schema = schema_generator.GenerateSchema(num_types); + EvenDistributionTypeSelector type_selector(schema); + + // Create the index. + IcingSearchEngineOptions options; + options.set_base_dir(test_dir); + options.set_index_merge_size(kIcingFullIndexSize); + std::unique_ptr<IcingSearchEngine> icing = + std::make_unique<IcingSearchEngine>(options); + + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); + + int num_docs = state.range(0); + const std::vector<DocumentProto> random_docs = + GenerateRandomDocuments(&type_selector, num_docs); + for (auto s : state) { + for (const DocumentProto& doc : random_docs) { + ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk()); + } + } + state.SetItemsProcessed(state.iterations() * num_docs); +} +BENCHMARK(BM_IndexThroughput) + // Arguments: num_indexed_documents, num_sections + ->ArgPair(1, 1) + ->ArgPair(2, 1) + ->ArgPair(8, 1) + ->ArgPair(32, 1) + ->ArgPair(128, 1) + ->ArgPair(1 << 10, 1) + ->ArgPair(1 << 13, 1) + ->ArgPair(1 << 15, 1) + ->ArgPair(1 << 17, 1) + ->ArgPair(1, 5) + ->ArgPair(2, 5) + ->ArgPair(8, 5) + ->ArgPair(32, 5) + ->ArgPair(128, 5) + ->ArgPair(1 << 10, 5) + ->ArgPair(1 << 13, 5) + ->ArgPair(1 << 15, 5) + ->ArgPair(1 << 17, 5) + ->ArgPair(1, 10) + ->ArgPair(2, 10) + ->ArgPair(8, 10) + ->ArgPair(32, 10) + ->ArgPair(128, 10) + ->ArgPair(1 << 10, 10) + ->ArgPair(1 << 13, 10) + ->ArgPair(1 << 15, 10) + ->ArgPair(1 << 17, 10); + void BM_MutlipleIndices(benchmark::State& state) { // Initialize the filesystem std::string test_dir = GetTestTempDir() + "/icing/benchmark"; @@ -202,11 +361,8 @@ void BM_MutlipleIndices(benchmark::State& state) { options.set_index_merge_size(kIcingFullIndexSize / num_indices); auto icing = std::make_unique<IcingSearchEngine>(options); - InitializeResultProto init_result = icing->Initialize(); - ASSERT_THAT(init_result.status().code(), Eq(StatusProto::OK)); - - SetSchemaResultProto schema_result = icing->SetSchema(schema); - ASSERT_THAT(schema_result.status().code(), Eq(StatusProto::OK)); + ASSERT_THAT(icing->Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk()); icings.push_back(std::move(icing)); } @@ -214,7 +370,7 @@ void BM_MutlipleIndices(benchmark::State& state) { std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces); EvenDistributionNamespaceSelector namespace_selector(namespaces); - std::vector<std::string> language = CreateLanguage(kLanguageSize, &random); + std::vector<std::string> language = CreateLanguages(kLanguageSize, &random); UniformDistributionLanguageTokenGenerator<std::default_random_engine> token_generator(language, &random); @@ -231,8 +387,7 @@ void BM_MutlipleIndices(benchmark::State& state) { ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN)); continue; } - put_result = icings.at(i % icings.size())->Put(doc); - ASSERT_THAT(put_result.status().code(), Eq(StatusProto::OK)); + ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk()); } // QUERY! @@ -255,13 +410,13 @@ void BM_MutlipleIndices(benchmark::State& state) { continue; } result = icings.at(0)->Search(search_spec, scoring_spec, result_spec); - ASSERT_THAT(result.status().code(), Eq(StatusProto::OK)); + ASSERT_THAT(result.status(), ProtoIsOk()); while (!result.results().empty()) { num_results += result.results_size(); if (!icings.empty()) { result = icings.at(0)->GetNextPage(result.next_page_token()); } - ASSERT_THAT(result.status().code(), Eq(StatusProto::OK)); + ASSERT_THAT(result.status(), ProtoIsOk()); } } diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc index 8d69d78..f4249f3 100644 --- a/icing/icing-search-engine_test.cc +++ b/icing/icing-search-engine_test.cc @@ -397,22 +397,26 @@ TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // The query token is also truncated to length of 1, so "me"->"m" matches "m" search_spec.set_query("me"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // The query token is still truncated to length of 1, so "massage"->"m" // matches "m" search_spec.set_query("massage"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -445,9 +449,11 @@ TEST_F(IcingSearchEngineTest, SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, FailToCreateDocStore) { @@ -801,9 +807,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) { SearchResultProto empty_result; empty_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(empty_result)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result)); SchemaProto schema_with_indexed_property = CreateMessageSchema(); // Index restoration should be triggered here because new schema requires more @@ -815,9 +822,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) { @@ -1093,9 +1101,11 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) { SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) { @@ -1127,7 +1137,8 @@ TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) { // The token is a random number so we don't verify it. expected_search_result_proto.set_next_page_token( search_result_proto.next_page_token()); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) { @@ -1143,8 +1154,10 @@ TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) { SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) { @@ -1163,8 +1176,10 @@ TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) { StatusProto::INVALID_ARGUMENT); expected_search_result_proto.mutable_status()->set_message( "ResultSpecProto.num_per_page cannot be negative."); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) { @@ -1205,17 +1220,19 @@ TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = CreateMessageDocument("namespace", "uri"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto actual_results = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); search_spec.set_query("foo"); SearchResultProto empty_result; empty_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(empty_result)); + actual_results = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result)); } } @@ -1236,7 +1253,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) { icing.Search(search_spec, GetDefaultScoringSpec(), ResultSpecProto::default_instance()); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) { @@ -1276,7 +1294,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) { uint64_t next_page_token = search_result_proto.next_page_token(); // Since the token is a random number, we don't need to verify expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Second page, 2 results expected_search_result_proto.clear_results(); @@ -1284,8 +1303,9 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) { document3; *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2; - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Third page, 1 result expected_search_result_proto.clear_results(); @@ -1294,13 +1314,15 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) { // Because there are no more results, we should not return the next page // token. expected_search_result_proto.clear_next_page_token(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // No more results expected_search_result_proto.clear_results(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) { @@ -1343,7 +1365,8 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) { uint64_t next_page_token = search_result_proto.next_page_token(); // Since the token is a random number, we don't need to verify expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Second page, 2 results expected_search_result_proto.clear_results(); @@ -1351,8 +1374,9 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) { document3; *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2; - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Third page, 1 result expected_search_result_proto.clear_results(); @@ -1361,13 +1385,15 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) { // Because there are no more results, we should not return the next page // token. expected_search_result_proto.clear_next_page_token(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // No more results expected_search_result_proto.clear_results(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) { @@ -1479,7 +1505,8 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) { uint64_t next_page_token = search_result_proto.next_page_token(); // Since the token is a random number, we don't need to verify expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Now document1 is still to be fetched. // Invalidates token @@ -1488,8 +1515,9 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) { // Tries to fetch the second page, no result since it's invalidated expected_search_result_proto.clear_results(); expected_search_result_proto.clear_next_page_token(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -1521,7 +1549,8 @@ TEST_F(IcingSearchEngineTest, uint64_t next_page_token = search_result_proto.next_page_token(); // Since the token is a random number, we don't need to verify expected_search_result_proto.set_next_page_token(next_page_token); - EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto)); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Now document1 is still to be fetched. OptimizeResultProto optimize_result_proto; @@ -1533,8 +1562,9 @@ TEST_F(IcingSearchEngineTest, // invalidated during Optimize() expected_search_result_proto.clear_results(); expected_search_result_proto.clear_next_page_token(); - EXPECT_THAT(icing.GetNextPage(next_page_token), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.GetNextPage(next_page_token); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) { @@ -1855,7 +1885,13 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) { .AddStringProperty("subject", "message body2") .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); @@ -1873,7 +1909,14 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) { // Delete the first type. The first doc should be irretrievable. The // second should still be present. - EXPECT_THAT(icing.DeleteBySchemaType("message").status(), ProtoIsOk()); + DeleteBySchemaTypeResultProto result_proto = + icing.DeleteBySchemaType("message"); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + NativeDeleteStats exp_stats; + exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE); + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(1); + EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); expected_get_result_proto.mutable_status()->set_message( @@ -1896,9 +1939,11 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) { SearchSpecProto search_spec; search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("message"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) { @@ -1965,9 +2010,11 @@ TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, DeleteByNamespace) { @@ -1993,7 +2040,12 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) { .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); @@ -2016,7 +2068,14 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) { // Delete namespace1. Document1 and document2 should be irretrievable. // Document3 should still be present. - EXPECT_THAT(icing.DeleteByNamespace("namespace1").status(), ProtoIsOk()); + DeleteByNamespaceResultProto result_proto = + icing.DeleteByNamespace("namespace1"); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + NativeDeleteStats exp_stats; + exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE); + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(2); + EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); expected_get_result_proto.mutable_status()->set_message( @@ -2046,9 +2105,11 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) { SearchSpecProto search_spec; search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); search_spec.set_query("message"); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) { @@ -2110,9 +2171,11 @@ TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, DeleteByQuery) { @@ -2131,7 +2194,12 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) { .SetCreationTimestampMs(kDefaultCreationTimestampMs) .Build(); - IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(7); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); @@ -2152,7 +2220,13 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) { SearchSpecProto search_spec; search_spec.set_query("body1"); search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); - EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk()); + DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec); + EXPECT_THAT(result_proto.status(), ProtoIsOk()); + NativeDeleteStats exp_stats; + exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY); + exp_stats.set_latency_ms(7); + exp_stats.set_num_documents_deleted(1); + EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats)); expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND); expected_get_result_proto.mutable_status()->set_message( @@ -2175,9 +2249,11 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) { @@ -2242,9 +2318,11 @@ TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) { document2; *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document1; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) { @@ -2304,16 +2382,20 @@ TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) { ASSERT_THAT(icing.Optimize().status(), ProtoIsOk()); // Validates that Search() works right after Optimize() - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // Destroys IcingSearchEngine to make sure nothing is cached. IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) { @@ -2364,9 +2446,11 @@ TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document1; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -2422,9 +2506,11 @@ TEST_F(IcingSearchEngineTest, // Searching old content returns nothing because original file directory is // missing - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); search_spec.set_query("n"); @@ -2432,9 +2518,10 @@ TEST_F(IcingSearchEngineTest, new_document; // Searching new content returns the new document - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) { @@ -2490,9 +2577,11 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); // Searching old content returns nothing because original files are missing - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); search_spec.set_query("n"); @@ -2500,9 +2589,10 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) { new_document; // Searching new content returns the new document - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) { @@ -2551,9 +2641,11 @@ TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) { EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); // Check that the document is returned as part of search results - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) { @@ -2600,9 +2692,11 @@ TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) { EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); // Check that the document is not returned as part of search results - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) { @@ -2637,9 +2731,11 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); // The message isn't indexed, so we get nothing - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // With just the schema type filter, we can search for the message search_spec.Clear(); @@ -2648,9 +2744,10 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = message_document; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Since SchemaTypeIds are assigned based on order in the SchemaProto, this // will force a change in the DocumentStore's cached SchemaTypeIds @@ -2679,9 +2776,10 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) { search_spec.add_schema_type_filters("message"); // We can still search for the message document - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) { @@ -2708,9 +2806,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) { ProtoIsOk()); EXPECT_THAT(icing.Get("namespace", "uri"), EqualsProto(expected_get_result_proto)); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str())); @@ -2724,9 +2824,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) { EqualsProto(expected_get_result_proto)); // Checks that the index is still ok so we can search over it - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Checks that Schema is still since it'll be needed to validate the document EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), @@ -2757,9 +2859,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) { ProtoIsOk()); EXPECT_THAT(icing.Get("namespace", "uri"), EqualsProto(expected_get_result_proto)); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Change the header's magic value @@ -2777,9 +2881,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) { EqualsProto(expected_get_result_proto)); // Checks that the index is still ok so we can search over it - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Checks that Schema is still since it'll be needed to validate the document EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), @@ -2810,9 +2916,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) { ProtoIsOk()); EXPECT_THAT(icing.Get("namespace", "uri"), EqualsProto(expected_get_result_proto)); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Change the header's checksum value @@ -2831,9 +2939,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) { EqualsProto(expected_get_result_proto)); // Checks that the index is still ok so we can search over it - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); // Checks that Schema is still since it'll be needed to validate the document EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), @@ -2945,9 +3055,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) { SearchResultProto expected_search_result_proto; expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to { @@ -3025,9 +3137,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document2_with_additional_property; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) { @@ -3090,9 +3204,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) { *expected_search_result_proto.mutable_results()->Add()->mutable_document() = document1; - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) { @@ -3112,9 +3228,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) { EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Pretend we lost the entire index @@ -3125,9 +3243,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) { EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); // Check that our index is ok by searching over the restored index - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) { @@ -3147,9 +3267,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) { EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), ProtoIsOk()); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to // Pretend index is corrupted @@ -3162,9 +3284,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) { EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); // Check that our index is ok by searching over the restored index - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) { @@ -3222,9 +3346,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) { ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) { @@ -3280,9 +3405,10 @@ TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) { // order. ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) { @@ -3335,9 +3461,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) { ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) { @@ -3405,9 +3532,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) { ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -3462,9 +3590,10 @@ TEST_F(IcingSearchEngineTest, ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) { @@ -3531,9 +3660,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) { ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -3588,9 +3718,10 @@ TEST_F(IcingSearchEngineTest, ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) { @@ -3656,9 +3787,10 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) { ScoringSpecProto scoring_spec; scoring_spec.set_rank_by( ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) { @@ -3717,9 +3849,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) { ScoringSpecProto scoring_spec = GetDefaultScoringSpec(); scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); scoring_spec.set_order_by(ScoringSpecProto::Order::ASC); - EXPECT_THAT(icing.Search(search_spec, scoring_spec, - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = icing.Search( + search_spec, scoring_spec, ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } TEST_F(IcingSearchEngineTest, @@ -3797,9 +3930,11 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) { expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); *expected_search_result_proto.mutable_results()->Add()->mutable_document() = CreateMessageDocument("namespace", "uri"); - ASSERT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(expected_search_result_proto)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, + EqualsSearchResultIgnoreStats(expected_search_result_proto)); } // This should shut down IcingSearchEngine and persist anything it needs to ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str())); @@ -3821,9 +3956,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) { // Can't search for it SearchResultProto empty_result; empty_result.mutable_status()->set_code(StatusProto::OK); - EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), - ResultSpecProto::default_instance()), - EqualsProto(empty_result)); + SearchResultProto search_result_proto = + icing.Search(search_spec, GetDefaultScoringSpec(), + ResultSpecProto::default_instance()); + EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStats(empty_result)); } TEST_F(IcingSearchEngineTest, PersistToDisk) { @@ -5481,6 +5617,101 @@ TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) { EqualsProto(projected_document_one)); } +TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) { + auto fake_clock = std::make_unique<FakeClock>(); + fake_clock->SetTimerElapsedMilliseconds(5); + TestIcingSearchEngine icing(GetDefaultIcingOptions(), + std::make_unique<Filesystem>(), + std::make_unique<IcingFilesystem>(), + std::move(fake_clock), GetTestJniCache()); + ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); + ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); + + // Creates and inserts 5 documents + DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); + DocumentProto document2 = CreateMessageDocument("namespace", "uri2"); + DocumentProto document3 = CreateMessageDocument("namespace", "uri3"); + DocumentProto document4 = CreateMessageDocument("namespace", "uri4"); + DocumentProto document5 = CreateMessageDocument("namespace", "uri5"); + ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk()); + ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk()); + + SearchSpecProto search_spec; + search_spec.set_term_match_type(TermMatchType::PREFIX); + search_spec.add_namespace_filters("namespace"); + search_spec.add_schema_type_filters(document1.schema()); + search_spec.set_query("message"); + + ResultSpecProto result_spec; + result_spec.set_num_per_page(2); + result_spec.mutable_snippet_spec()->set_max_window_bytes(64); + result_spec.mutable_snippet_spec()->set_num_matches_per_property(1); + result_spec.mutable_snippet_spec()->set_num_to_snippet(3); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + + // Searches and gets the first page, 2 results with 2 snippets + SearchResultProto search_result = + icing.Search(search_spec, scoring_spec, result_spec); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken)); + + // Check the stats + NativeQueryStats exp_stats; + exp_stats.set_num_terms(1); + exp_stats.set_num_namespaces_filtered(1); + exp_stats.set_num_schema_types_filtered(1); + exp_stats.set_ranking_strategy( + ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); + exp_stats.set_is_first_page(true); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(2); + exp_stats.set_num_documents_scored(5); + exp_stats.set_num_results_snippeted(2); + exp_stats.set_latency_ms(5); + exp_stats.set_parse_query_latency_ms(5); + exp_stats.set_scoring_latency_ms(5); + exp_stats.set_ranking_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Second page, 2 result with 1 snippet + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(2)); + ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken)); + + exp_stats = NativeQueryStats(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(2); + exp_stats.set_num_results_snippeted(1); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); + + // Third page, 1 result with 0 snippets + search_result = icing.GetNextPage(search_result.next_page_token()); + ASSERT_THAT(search_result.status(), ProtoIsOk()); + ASSERT_THAT(search_result.results(), SizeIs(1)); + ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken)); + + exp_stats = NativeQueryStats(); + exp_stats.set_is_first_page(false); + exp_stats.set_requested_page_size(2); + exp_stats.set_num_results_returned_current_page(1); + exp_stats.set_num_results_snippeted(0); + exp_stats.set_latency_ms(5); + exp_stats.set_document_retrieval_latency_ms(5); + EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats)); +} + } // namespace } // namespace lib } // namespace icing diff --git a/icing/index/hit/doc-hit-info.cc b/icing/index/hit/doc-hit-info.cc index 80dbbde..8e418c8 100644 --- a/icing/index/hit/doc-hit-info.cc +++ b/icing/index/hit/doc-hit-info.cc @@ -34,26 +34,28 @@ bool DocHitInfo::operator<(const DocHitInfo& other) const { } // Doesn't matter which way we compare this array, as long as // DocHitInfo is unequal when it is unequal. - return memcmp(max_hit_score_, other.max_hit_score_, sizeof(max_hit_score_)) < - 0; + return memcmp(hit_term_frequency_, other.hit_term_frequency_, + sizeof(hit_term_frequency_)) < 0; } -void DocHitInfo::UpdateSection(SectionId section_id, Hit::Score hit_score) { +void DocHitInfo::UpdateSection(SectionId section_id, + Hit::TermFrequency hit_term_frequency) { SectionIdMask section_id_mask = (1u << section_id); - if (hit_section_ids_mask() & section_id_mask) { - max_hit_score_[section_id] = - std::max(max_hit_score_[section_id], hit_score); - } else { - max_hit_score_[section_id] = hit_score; - hit_section_ids_mask_ |= section_id_mask; + if ((hit_section_ids_mask() & section_id_mask)) { + // If the sectionId is already embedded in the hit_section_ids_mask, + // then the term frequencies should always match. So there is no + // need to update anything. + return; } + hit_term_frequency_[section_id] = hit_term_frequency; + hit_section_ids_mask_ |= section_id_mask; } void DocHitInfo::MergeSectionsFrom(const DocHitInfo& other) { SectionIdMask other_mask = other.hit_section_ids_mask(); while (other_mask) { SectionId section_id = __builtin_ctz(other_mask); - UpdateSection(section_id, other.max_hit_score(section_id)); + UpdateSection(section_id, other.hit_term_frequency(section_id)); other_mask &= ~(1u << section_id); } } diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h index 32ba97e..8171960 100644 --- a/icing/index/hit/doc-hit-info.h +++ b/icing/index/hit/doc-hit-info.h @@ -26,17 +26,18 @@ namespace icing { namespace lib { // DocHitInfo provides a collapsed view of all hits for a specific term and doc. -// Hits contain a document_id, section_id and a hit score. The information in -// multiple hits is collapse into a DocHitInfo by providing a SectionIdMask of -// all sections that contained a hit for this term as well as the highest hit -// score of any hit for each section. +// Hits contain a document_id, section_id and a term frequency. The +// information in multiple hits is collapse into a DocHitInfo by providing a +// SectionIdMask of all sections that contained a hit for this term as well as +// the highest term frequency of any hit for each section. class DocHitInfo { public: explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId, SectionIdMask hit_section_ids_mask = kSectionIdMaskNone) : document_id_(document_id_in), hit_section_ids_mask_(hit_section_ids_mask) { - memset(max_hit_score_, Hit::kDefaultHitScore, sizeof(max_hit_score_)); + memset(hit_term_frequency_, Hit::kDefaultTermFrequency, + sizeof(hit_term_frequency_)); } DocumentId document_id() const { return document_id_; } @@ -49,8 +50,8 @@ class DocHitInfo { hit_section_ids_mask_ = section_id_mask; } - Hit::Score max_hit_score(SectionId section_id) const { - return max_hit_score_[section_id]; + Hit::TermFrequency hit_term_frequency(SectionId section_id) const { + return hit_term_frequency_[section_id]; } bool operator<(const DocHitInfo& other) const; @@ -58,12 +59,14 @@ class DocHitInfo { return (*this < other) == (other < *this); } - // Updates the hit_section_ids_mask and max_hit_score for the section, if - // necessary. - void UpdateSection(SectionId section_id, Hit::Score hit_score); + // Updates the hit_section_ids_mask and hit_term_frequency for the + // section, if necessary. + void UpdateSection(SectionId section_id, + Hit::TermFrequency hit_term_frequency); - // Merges the sections of other into this. The hit_section_ids_masks are or'd - // and the max hit score for each section between the two is set. + // Merges the sections of other into this. The hit_section_ids_masks are or'd; + // if this.hit_term_frequency_[sectionId] has already been defined, + // other.hit_term_frequency_[sectionId] value is ignored. // // This does not affect the DocumentId of this or other. If callers care about // only merging sections for DocHitInfos with the same DocumentId, callers @@ -73,14 +76,15 @@ class DocHitInfo { private: DocumentId document_id_; SectionIdMask hit_section_ids_mask_; - Hit::Score max_hit_score_[kMaxSectionId + 1]; + Hit::TermFrequency hit_term_frequency_[kMaxSectionId + 1]; } __attribute__((packed)); static_assert(sizeof(DocHitInfo) == 22, ""); // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions. static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan"); -static_assert(sizeof(Hit::Score) == 1, - "Change how max_hit_score_ is initialized if changing the type " - "of Hit::Score"); +static_assert( + sizeof(Hit::TermFrequency) == 1, + "Change how hit_term_frequency_ is initialized if changing the type " + "of Hit::TermFrequency"); } // namespace lib } // namespace icing diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc index 1e1880f..15c0de9 100644 --- a/icing/index/hit/doc-hit-info_test.cc +++ b/icing/index/hit/doc-hit-info_test.cc @@ -31,50 +31,43 @@ using ::testing::Ne; constexpr DocumentId kSomeDocumentId = 12; constexpr DocumentId kSomeOtherDocumentId = 54; -TEST(DocHitInfoTest, InitialMaxHitScores) { +TEST(DocHitInfoTest, InitialMaxHitTermFrequencies) { DocHitInfo info(kSomeDocumentId); for (SectionId i = 0; i <= kMaxSectionId; ++i) { - EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kDefaultHitScore)); + EXPECT_THAT(info.hit_term_frequency(i), Eq(Hit::kDefaultTermFrequency)); } } -TEST(DocHitInfoTest, UpdateHitScores) { +TEST(DocHitInfoTest, UpdateHitTermFrequenciesForTheFirstTime) { DocHitInfo info(kSomeDocumentId); - ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kDefaultHitScore)); + ASSERT_THAT(info.hit_term_frequency(3), Eq(Hit::kDefaultTermFrequency)); - // Updating a section for the first time, should change its max hit score, - // even though the hit score (16) may be lower than the current value returned - // by info.max_hit_score(3) (kDefaultHitScore) + // Updating a section for the first time, should change its hit + // term_frequency info.UpdateSection(3, 16); - EXPECT_THAT(info.max_hit_score(3), Eq(16)); + EXPECT_THAT(info.hit_term_frequency(3), Eq(16)); +} - // Updating a section with a hit score lower than the previously set one - // should not update max hit score. +TEST(DocHitInfoTest, UpdateSectionLowerHitTermFrequencyHasNoEffect) { + DocHitInfo info(kSomeDocumentId); + info.UpdateSection(3, 16); + ASSERT_THAT(info.hit_term_frequency(3), Eq(16)); + + // Updating a section with a term frequency lower than the previously set + // one should have no effect. info.UpdateSection(3, 15); - EXPECT_THAT(info.max_hit_score(3), Eq(16)); + EXPECT_THAT(info.hit_term_frequency(3), Eq(16)); +} - // Updating a section with a hit score higher than the previously set one - // should update the max hit score. - info.UpdateSection(3, 17); - EXPECT_THAT(info.max_hit_score(3), Eq(17)); - - // Updating a section with kDefaultHitScore should *never* set the - // max_hit_score to kDefaultHitScore (unless it already was kDefaultHitScore) - // because kDefaultHitScore is the lowest possible valid hit score. - info.UpdateSection(3, Hit::kDefaultHitScore); - EXPECT_THAT(info.max_hit_score(3), Eq(17)); - - // Updating a section with kMaxHitScore should *always* set the max hit - // score to kMaxHitScore (regardless of what value kMaxHitScore is - // defined with). - info.UpdateSection(3, Hit::kMaxHitScore); - EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore)); - - // Updating a section that has had kMaxHitScore explicitly set, should - // *never* change the max hit score (regardless of what value kMaxHitScore - // is defined with). +TEST(DocHitInfoTest, UpdateSectionHigherHitTermFrequencyHasNoEffect) { + DocHitInfo info(kSomeDocumentId); info.UpdateSection(3, 16); - EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore)); + ASSERT_THAT(info.hit_term_frequency(3), Eq(16)); + + // Updating a section with a term frequency higher than the previously set + // one should have no effect. + info.UpdateSection(3, 17); + EXPECT_THAT(info.hit_term_frequency(3), Eq(16)); } TEST(DocHitInfoTest, UpdateSectionIdMask) { @@ -99,7 +92,7 @@ TEST(DocHitInfoTest, MergeSectionsFromDifferentDocumentId) { DocHitInfo info2(kSomeOtherDocumentId); info2.UpdateSection(7, 12); info1.MergeSectionsFrom(info2); - EXPECT_THAT(info1.max_hit_score(7), Eq(12)); + EXPECT_THAT(info1.hit_term_frequency(7), Eq(12)); EXPECT_THAT(info1.document_id(), Eq(kSomeDocumentId)); } @@ -110,7 +103,7 @@ TEST(DocHitInfoTest, MergeSectionsFromKeepsOldSection) { info1.UpdateSection(3, 16); DocHitInfo info2(kSomeDocumentId); info1.MergeSectionsFrom(info2); - EXPECT_THAT(info1.max_hit_score(3), Eq(16)); + EXPECT_THAT(info1.hit_term_frequency(3), Eq(16)); } TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) { @@ -120,29 +113,29 @@ TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) { DocHitInfo info2(kSomeDocumentId); info2.UpdateSection(7, 12); info1.MergeSectionsFrom(info2); - EXPECT_THAT(info1.max_hit_score(7), Eq(12)); + EXPECT_THAT(info1.hit_term_frequency(7), Eq(12)); } -TEST(DocHitInfoTest, MergeSectionsFromSetsHigherHitScore) { - // Merging should override the value of a section in info1 if the same section - // is present in info2 with a higher hit score. +TEST(DocHitInfoTest, MergeSectionsFromHigherHitTermFrequencyHasNoEffect) { + // Merging should not override the value of a section in info1 if the same + // section is present in info2. DocHitInfo info1(kSomeDocumentId); info1.UpdateSection(2, 77); DocHitInfo info2(kSomeDocumentId); info2.UpdateSection(2, 89); info1.MergeSectionsFrom(info2); - EXPECT_THAT(info1.max_hit_score(2), Eq(89)); + EXPECT_THAT(info1.hit_term_frequency(2), Eq(77)); } -TEST(DocHitInfoTest, MergeSectionsFromDoesNotSetLowerHitScore) { +TEST(DocHitInfoTest, MergeSectionsFromLowerHitScoreHasNoEffect) { // Merging should not override the hit score of a section in info1 if the same - // section is present in info2 but with a lower hit score. + // section is present in info2. DocHitInfo info1(kSomeDocumentId); info1.UpdateSection(5, 108); DocHitInfo info2(kSomeDocumentId); info2.UpdateSection(5, 13); info1.MergeSectionsFrom(info2); - EXPECT_THAT(info1.max_hit_score(5), Eq(108)); + EXPECT_THAT(info1.hit_term_frequency(5), Eq(108)); } TEST(DocHitInfoTest, Comparison) { @@ -156,7 +149,7 @@ TEST(DocHitInfoTest, Comparison) { DocHitInfo high_section_id_info(kDocumentId); high_section_id_info.UpdateSection(1, 12); - high_section_id_info.UpdateSection(6, Hit::kDefaultHitScore); + high_section_id_info.UpdateSection(6, Hit::kDefaultTermFrequency); std::vector<DocHitInfo> infos{info, high_document_id_info, high_section_id_info}; @@ -166,10 +159,10 @@ TEST(DocHitInfoTest, Comparison) { // There are no requirements for how DocHitInfos with the same DocumentIds and // hit masks will compare, but they must not be equal. - DocHitInfo different_hit_score_info(kDocumentId); - different_hit_score_info.UpdateSection(1, 76); - EXPECT_THAT(info < different_hit_score_info, - Ne(different_hit_score_info < info)); + DocHitInfo different_term_frequency_info(kDocumentId); + different_term_frequency_info.UpdateSection(1, 76); + EXPECT_THAT(info < different_term_frequency_info, + Ne(different_term_frequency_info < info)); } } // namespace lib diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc index d089dd5..2a5a0d9 100644 --- a/icing/index/hit/hit.cc +++ b/icing/index/hit/hit.cc @@ -30,8 +30,9 @@ enum FlagOffset { // This hit represents a prefix of a longer term. If exact matches are // required, then this hit should be ignored. kPrefixHit = 1, - // Whether or not the hit has a hit score other than kDefaultHitScore. - kHasScore = 2, + // Whether or not the hit has a term_frequency other than + // kDefaultTermFrequency. + kHasTermFrequency = 2, kNumFlags = 3, }; static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <= @@ -51,9 +52,10 @@ inline DocumentId InvertDocumentId(DocumentId document_id) { } // namespace -Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score, - bool is_in_prefix_section, bool is_prefix_hit) - : score_(score) { +Hit::Hit(SectionId section_id, DocumentId document_id, + Hit::TermFrequency term_frequency, bool is_in_prefix_section, + bool is_prefix_hit) + : term_frequency_(term_frequency) { // Values are stored so that when sorted, they appear in document_id // descending, section_id ascending, order. Also, all else being // equal, non-prefix hits sort before prefix hits. So inverted @@ -64,7 +66,8 @@ Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score, kSectionIdBits + kNumFlags, kDocumentIdBits, &temp_value); bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value); - bit_util::BitfieldSet(score != kDefaultHitScore, kHasScore, 1, &temp_value); + bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency, + kHasTermFrequency, 1, &temp_value); bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value); bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value); value_ = temp_value; @@ -81,8 +84,8 @@ SectionId Hit::section_id() const { return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits); } -bool Hit::has_score() const { - return bit_util::BitfieldGet(value(), kHasScore, 1); +bool Hit::has_term_frequency() const { + return bit_util::BitfieldGet(value(), kHasTermFrequency, 1); } bool Hit::is_prefix_hit() const { diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h index 53553f0..525a5e5 100644 --- a/icing/index/hit/hit.h +++ b/icing/index/hit/hit.h @@ -31,18 +31,17 @@ namespace lib { // - a SectionId // referring to the document and section that the hit corresponds to, as well as // metadata about the hit: -// - whether the Hit has a Score other than the default value +// - whether the Hit has a TermFrequency other than the default value // - whether the Hit does not appear exactly in the document, but instead // represents a term that is a prefix of a term in the document // - whether the Hit came from a section that has prefix expansion enabled -// and a score for the hit. Ranging from [0,255] a higher score indicates a -// higher quality hit. +// and a term frequency for the hit. // The hit is the most basic unit of the index and, when grouped together by // term, can be used to encode what terms appear in what documents. class Hit { public: // The datatype used to encode Hit information: the document_id, section_id - // and the has_score, prefix hit and in prefix section flags. + // and the has_term_frequency, prefix hit and in prefix section flags. using Value = uint32_t; // WARNING: Changing this value will invalidate any pre-existing posting lists @@ -53,28 +52,27 @@ class Hit { // the max in a descending sort. static constexpr Value kMaxDocumentIdSortValue = 0; - // A score reflecting the "quality" of this hit. The higher the score, the - // higher quality the hit. - // The score is being repurposed for term frequency. - // TODO(b/173156700): refactor Score to TermFrequency. - using Score = uint8_t; - // Max Score is 255. - static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max(); - // Default value of term frequency is 1. - static constexpr Score kDefaultHitScore = 1; + // The Term Frequency of a Hit. + using TermFrequency = uint8_t; + // Max TermFrequency is 255. + static constexpr TermFrequency kMaxTermFrequency = + std::numeric_limits<TermFrequency>::max(); + static constexpr TermFrequency kDefaultTermFrequency = 1; - explicit Hit(Value value = kInvalidValue, Score score = kDefaultHitScore) - : value_(value), score_(score) {} - Hit(SectionId section_id, DocumentId document_id, Score score, - bool is_in_prefix_section = false, bool is_prefix_hit = false); + explicit Hit(Value value = kInvalidValue, + TermFrequency term_frequency = kDefaultTermFrequency) + : value_(value), term_frequency_(term_frequency) {} + Hit(SectionId section_id, DocumentId document_id, + TermFrequency term_frequency, bool is_in_prefix_section = false, + bool is_prefix_hit = false); bool is_valid() const { return value() != kInvalidValue; } Value value() const { return value_; } DocumentId document_id() const; SectionId section_id() const; - // Whether or not the hit contains a non-default score. - bool has_score() const; - Score score() const { return score_; } + // Whether or not the hit contains a valid term frequency. + bool has_term_frequency() const; + TermFrequency term_frequency() const { return term_frequency_; } bool is_prefix_hit() const; bool is_in_prefix_section() const; @@ -86,10 +84,10 @@ class Hit { }; private: - // Value and score must be in this order. + // Value and TermFrequency must be in this order. // Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags. Value value_; - Score score_; + TermFrequency term_frequency_; } __attribute__((packed)); static_assert(sizeof(Hit) == 5, ""); // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions. diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc index 8c883d1..d47ca37 100644 --- a/icing/index/hit/hit_test.cc +++ b/icing/index/hit/hit_test.cc @@ -33,46 +33,46 @@ using ::testing::Not; static constexpr DocumentId kSomeDocumentId = 24; static constexpr SectionId kSomeSectionid = 5; -static constexpr Hit::Score kSomeHitScore = 57; +static constexpr Hit::TermFrequency kSomeTermFrequency = 57; -TEST(HitTest, HasScoreFlag) { - Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore); - EXPECT_THAT(h1.has_score(), IsFalse()); - EXPECT_THAT(h1.score(), Eq(Hit::kDefaultHitScore)); +TEST(HitTest, HasTermFrequencyFlag) { + Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency); + EXPECT_THAT(h1.has_term_frequency(), IsFalse()); + EXPECT_THAT(h1.term_frequency(), Eq(Hit::kDefaultTermFrequency)); - Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore); - EXPECT_THAT(h2.has_score(), IsTrue()); - EXPECT_THAT(h2.score(), Eq(kSomeHitScore)); + Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency); + EXPECT_THAT(h2.has_term_frequency(), IsTrue()); + EXPECT_THAT(h2.term_frequency(), Eq(kSomeTermFrequency)); } TEST(HitTest, IsPrefixHitFlag) { - Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore); + Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency); EXPECT_THAT(h1.is_prefix_hit(), IsFalse()); - Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore, + Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); EXPECT_THAT(h2.is_prefix_hit(), IsFalse()); - Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore, + Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true); EXPECT_THAT(h3.is_prefix_hit(), IsTrue()); } TEST(HitTest, IsInPrefixSectionFlag) { - Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore); + Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency); EXPECT_THAT(h1.is_in_prefix_section(), IsFalse()); - Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore, + Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); EXPECT_THAT(h2.is_in_prefix_section(), IsFalse()); - Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore, + Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); EXPECT_THAT(h3.is_in_prefix_section(), IsTrue()); } TEST(HitTest, Accessors) { - Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore); + Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency); EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId)); EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid)); } @@ -88,48 +88,53 @@ TEST(HitTest, Valid) { Hit explicit_valid(kSomeValue); EXPECT_THAT(explicit_valid.is_valid(), IsTrue()); - Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, kSomeHitScore); + Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, + kSomeTermFrequency); EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue()); - Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeHitScore); + Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, + kSomeTermFrequency); EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue()); - Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeHitScore); + Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency); EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue()); - Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeHitScore); + Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency); EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue()); } TEST(HitTest, Comparison) { - Hit hit(1, 243, Hit::kDefaultHitScore); + Hit hit(1, 243, Hit::kDefaultTermFrequency); // DocumentIds are sorted in ascending order. So a hit with a lower // document_id should be considered greater than one with a higher // document_id. - Hit higher_document_id_hit(1, 2409, Hit::kDefaultHitScore); - Hit higher_section_id_hit(15, 243, Hit::kDefaultHitScore); - // Whether or not a hit score was set is considered, but the score itself is - // not. - Hit hitscore_hit(1, 243, 12); - Hit prefix_hit(1, 243, Hit::kDefaultHitScore, + Hit higher_document_id_hit(1, 2409, Hit::kDefaultTermFrequency); + Hit higher_section_id_hit(15, 243, Hit::kDefaultTermFrequency); + // Whether or not a term frequency was set is considered, but the term + // frequency itself is not. + Hit term_frequency_hit(1, 243, 12); + Hit prefix_hit(1, 243, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true); - Hit hit_in_prefix_section(1, 243, Hit::kDefaultHitScore, + Hit hit_in_prefix_section(1, 243, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false); - std::vector<Hit> hits{ - hit, higher_document_id_hit, higher_section_id_hit, hitscore_hit, - prefix_hit, hit_in_prefix_section}; + std::vector<Hit> hits{hit, + higher_document_id_hit, + higher_section_id_hit, + term_frequency_hit, + prefix_hit, + hit_in_prefix_section}; std::sort(hits.begin(), hits.end()); - EXPECT_THAT(hits, - ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section, - prefix_hit, hitscore_hit, higher_section_id_hit)); - - Hit higher_hitscore_hit(1, 243, 108); - // Hit score value is not considered when comparing hits. - EXPECT_THAT(hitscore_hit, Not(Lt(higher_hitscore_hit))); - EXPECT_THAT(higher_hitscore_hit, Not(Lt(hitscore_hit))); + EXPECT_THAT( + hits, ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section, + prefix_hit, term_frequency_hit, higher_section_id_hit)); + + Hit higher_term_frequency_hit(1, 243, 108); + // The term frequency value is not considered when comparing hits. + EXPECT_THAT(term_frequency_hit, Not(Lt(higher_term_frequency_hit))); + EXPECT_THAT(higher_term_frequency_hit, Not(Lt(term_frequency_hit))); } } // namespace diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc index 3168dad..bdd9575 100644 --- a/icing/index/index-processor_test.cc +++ b/icing/index/index-processor_test.cc @@ -294,7 +294,8 @@ TEST_F(IndexProcessorTest, OneDoc) { index_->GetIterator("hello", kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); std::vector<DocHitInfo> hits = GetHits(std::move(itr)); - std::unordered_map<SectionId, Hit::Score> expectedMap{{kExactSectionId, 1}}; + std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{ + {kExactSectionId, 1}}; EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency( kDocumentId0, expectedMap))); @@ -316,7 +317,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) { EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0)); std::string coffeeRepeatedString = "coffee"; - for (int i = 0; i < Hit::kMaxHitScore + 1; i++) { + for (int i = 0; i < Hit::kMaxTermFrequency + 1; i++) { coffeeRepeatedString += " coffee"; } @@ -335,9 +336,10 @@ TEST_F(IndexProcessorTest, MultipleDocs) { index_->GetIterator("world", kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); std::vector<DocHitInfo> hits = GetHits(std::move(itr)); - std::unordered_map<SectionId, Hit::Score> expectedMap1{ + std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{ {kPrefixedSectionId, 2}}; - std::unordered_map<SectionId, Hit::Score> expectedMap2{{kExactSectionId, 1}}; + std::unordered_map<SectionId, Hit::TermFrequency> expectedMap2{ + {kExactSectionId, 1}}; EXPECT_THAT( hits, ElementsAre( EqualsDocHitInfoWithTermFrequency(kDocumentId1, expectedMap1), @@ -347,7 +349,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) { itr, index_->GetIterator("world", 1U << kPrefixedSectionId, TermMatchType::EXACT_ONLY)); hits = GetHits(std::move(itr)); - std::unordered_map<SectionId, Hit::Score> expectedMap{ + std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{ {kPrefixedSectionId, 2}}; EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency( kDocumentId1, expectedMap))); @@ -356,7 +358,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) { index_->GetIterator("coffee", kSectionIdMaskAll, TermMatchType::EXACT_ONLY)); hits = GetHits(std::move(itr)); - expectedMap = {{kExactSectionId, Hit::kMaxHitScore}}; + expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}}; EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency( kDocumentId1, expectedMap))); } diff --git a/icing/index/index.cc b/icing/index/index.cc index f0c8bbd..bd41b51 100644 --- a/icing/index/index.cc +++ b/icing/index/index.cc @@ -287,7 +287,7 @@ libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) { tvi = tvi_or.ValueOrDie(); if (seen_tokens_.find(tvi) != seen_tokens_.end()) { ICING_VLOG(1) << "Updating term frequency for term " << term; - if (seen_tokens_[tvi] != Hit::kMaxHitScore) { + if (seen_tokens_[tvi] != Hit::kMaxTermFrequency) { ++seen_tokens_[tvi]; } return libtextclassifier3::Status::OK; @@ -310,7 +310,7 @@ libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) { libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() { for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) { - Hit hit(section_id_, document_id_, /*score=*/itr->second, + Hit hit(section_id_, document_id_, /*term_frequency=*/itr->second, term_match_type_ == TermMatchType::PREFIX); ICING_ASSIGN_OR_RETURN( uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE)); diff --git a/icing/index/index.h b/icing/index/index.h index 32f2b17..a4ea719 100644 --- a/icing/index/index.h +++ b/icing/index/index.h @@ -206,7 +206,7 @@ class Index { // The Editor is able to store previously seen terms as TermIds. This is // is more efficient than a client doing this externally because TermIds are // not exposed to clients. - std::unordered_map<uint32_t, Hit::Score> seen_tokens_; + std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_; const TermIdCodec* term_id_codec_; LiteIndex* lite_index_; DocumentId document_id_; diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc index ea4bcaf..e0379b8 100644 --- a/icing/index/lite/lite-index.cc +++ b/icing/index/lite/lite-index.cc @@ -365,7 +365,7 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask, last_document_id = document_id; } if (hits_out != nullptr) { - hits_out->back().UpdateSection(hit.section_id(), hit.score()); + hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency()); } } return count; @@ -448,7 +448,7 @@ uint32_t LiteIndex::Seek(uint32_t term_id) { // Binary search for our term_id. Make sure we get the first // element. Using kBeginSortValue ensures this for the hit value. TermIdHitPair term_id_hit_pair( - term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultHitScore)); + term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency)); const TermIdHitPair::Value* array = hit_buffer_.array_cast<TermIdHitPair::Value>(); diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h index 191f766..61ec502 100644 --- a/icing/index/lite/term-id-hit-pair.h +++ b/icing/index/lite/term-id-hit-pair.h @@ -29,39 +29,42 @@ namespace lib { class TermIdHitPair { public: - // Layout bits: 24 termid + 32 hit value + 8 hit score. + // Layout bits: 24 termid + 32 hit value + 8 hit term frequency. using Value = uint64_t; static constexpr int kTermIdBits = 24; static constexpr int kHitValueBits = sizeof(Hit::Value) * 8; - static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8; + static constexpr int kHitTermFrequencyBits = sizeof(Hit::TermFrequency) * 8; static const Value kInvalidValue; explicit TermIdHitPair(Value v = kInvalidValue) : value_(v) {} TermIdHitPair(uint32_t term_id, const Hit& hit) { - static_assert( - kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8, - "TermIdHitPairTooBig"); + static_assert(kTermIdBits + kHitValueBits + kHitTermFrequencyBits <= + sizeof(Value) * 8, + "TermIdHitPairTooBig"); value_ = 0; // Term id goes into the most significant bits because it takes // precedent in sorts. - bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits, + bit_util::BitfieldSet(term_id, kHitValueBits + kHitTermFrequencyBits, + kTermIdBits, &value_); + bit_util::BitfieldSet(hit.value(), kHitTermFrequencyBits, kHitValueBits, + &value_); + bit_util::BitfieldSet(hit.term_frequency(), 0, kHitTermFrequencyBits, &value_); - bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_); - bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_); } uint32_t term_id() const { - return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits, + return bit_util::BitfieldGet(value_, kHitValueBits + kHitTermFrequencyBits, kTermIdBits); } Hit hit() const { - return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits), - bit_util::BitfieldGet(value_, 0, kHitScoreBits)); + return Hit( + bit_util::BitfieldGet(value_, kHitTermFrequencyBits, kHitValueBits), + bit_util::BitfieldGet(value_, 0, kHitTermFrequencyBits)); } Value value() const { return value_; } diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc index a60764d..5553c1e 100644 --- a/icing/index/main/doc-hit-info-iterator-term-main.cc +++ b/icing/index/main/doc-hit-info-iterator-term-main.cc @@ -114,7 +114,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() { hit.document_id() != cached_doc_hit_infos_.back().document_id()) { cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id())); } - cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score()); + cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), + hit.term_frequency()); } return libtextclassifier3::Status::OK; } @@ -162,7 +163,8 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() { hit.document_id() != cached_doc_hit_infos_.back().document_id()) { cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id())); } - cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score()); + cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), + hit.term_frequency()); } return libtextclassifier3::Status::OK; } diff --git a/icing/index/main/flash-index-storage_test.cc b/icing/index/main/flash-index-storage_test.cc index cf899b3..7e15524 100644 --- a/icing/index/main/flash-index-storage_test.cc +++ b/icing/index/main/flash-index-storage_test.cc @@ -160,10 +160,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits1 = { - Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19), - Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100), - Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)}; + Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19), + Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100), + Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)}; for (const Hit& hit : hits1) { ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit)); } @@ -183,10 +183,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits2 = { - Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19), - Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100), - Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)}; + Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19), + Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100), + Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)}; for (const Hit& hit : hits2) { ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit)); } @@ -217,10 +217,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) { EXPECT_THAT(posting_list_holder3.posting_list.GetHits(), IsOkAndHolds(IsEmpty())); std::vector<Hit> hits3 = { - Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62), - Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45), - Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12), - Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)}; + Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62), + Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45), + Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12), + Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)}; for (const Hit& hit : hits3) { ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit)); } @@ -256,10 +256,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits1 = { - Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19), - Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100), - Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)}; + Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19), + Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100), + Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)}; for (const Hit& hit : hits1) { ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit)); } @@ -279,10 +279,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits2 = { - Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19), - Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100), - Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)}; + Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19), + Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100), + Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)}; for (const Hit& hit : hits2) { ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit)); } @@ -313,10 +313,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) { EXPECT_THAT(posting_list_holder3.posting_list.GetHits(), IsOkAndHolds(IsEmpty())); std::vector<Hit> hits3 = { - Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62), - Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45), - Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12), - Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)}; + Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62), + Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45), + Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12), + Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)}; for (const Hit& hit : hits3) { ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit)); } @@ -354,10 +354,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits1 = { - Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19), - Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100), - Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)}; + Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19), + Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100), + Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)}; for (const Hit& hit : hits1) { ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit)); } @@ -377,10 +377,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits2 = { - Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19), - Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100), - Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)}; + Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19), + Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100), + Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)}; for (const Hit& hit : hits2) { ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit)); } @@ -425,10 +425,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) { EXPECT_THAT(posting_list_holder3.posting_list.GetHits(), IsOkAndHolds(IsEmpty())); std::vector<Hit> hits3 = { - Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62), - Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45), - Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12), - Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)}; + Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62), + Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45), + Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12), + Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)}; for (const Hit& hit : hits3) { ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit)); } @@ -466,10 +466,10 @@ TEST_F(FlashIndexStorageTest, DifferentSizedPostingLists) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits1 = { - Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19), - Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100), - Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)}; + Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19), + Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100), + Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)}; for (const Hit& hit : hits1) { ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit)); } @@ -492,10 +492,10 @@ TEST_F(FlashIndexStorageTest, DifferentSizedPostingLists) { EXPECT_THAT(flash_index_storage.empty(), IsFalse()); std::vector<Hit> hits2 = { - Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12), - Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19), - Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100), - Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)}; + Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12), + Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19), + Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100), + Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)}; for (const Hit& hit : hits2) { ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit)); } diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc index 652dbc6..4590d06 100644 --- a/icing/index/main/index-block.cc +++ b/icing/index/main/index-block.cc @@ -51,7 +51,7 @@ libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes, uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock( uint32_t block_size, int posting_list_index_bits) { - // Assume 50% compressed and most don't have scores. + // Assume 50% compressed and most don't have term frequencies. uint32_t bytes_per_hit = sizeof(Hit::Value) / 2; return (block_size - sizeof(BlockHeader)) / ((1u << posting_list_index_bits) * bytes_per_hit); diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc index 493055f..322918d 100644 --- a/icing/index/main/index-block_test.cc +++ b/icing/index/main/index-block_test.cc @@ -105,11 +105,11 @@ TEST(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) { ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); std::vector<Hit> test_hits{ - Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), - Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), - Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore), + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency), }; PostingListIndex allocated_index; { @@ -152,18 +152,18 @@ TEST(IndexBlockTest, IndexBlockMultiplePostingLists) { ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize)); std::vector<Hit> hits_in_posting_list1{ - Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), - Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), - Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore), + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency), }; std::vector<Hit> hits_in_posting_list2{ - Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88), - Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore), - Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2), - Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12), - Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore), + Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88), + Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2), + Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12), + Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency), }; PostingListIndex allocated_index_1; PostingListIndex allocated_index_2; @@ -242,11 +242,11 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) { // Add hits to the first posting list. std::vector<Hit> hits_in_posting_list1{ - Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore), - Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99), - Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17), - Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore), + Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99), + Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17), + Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency), }; ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1, block.AllocatePostingList()); @@ -261,11 +261,11 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) { // Add hits to the second posting list. std::vector<Hit> hits_in_posting_list2{ - Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88), - Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore), - Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2), - Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12), - Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore), + Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88), + Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2), + Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12), + Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency), }; ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2, block.AllocatePostingList()); @@ -288,9 +288,9 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) { EXPECT_TRUE(block.has_free_posting_lists()); std::vector<Hit> hits_in_posting_list3{ - Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88), - Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultHitScore), - Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2), + Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88), + Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency), + Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2), }; ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3, block.AllocatePostingList()); diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc index 500774d..f49dc74 100644 --- a/icing/index/main/main-index-merger.cc +++ b/icing/index/main/main-index-merger.cc @@ -62,14 +62,15 @@ class HitSelector { (*hits)[pos++] = best_exact_hit_; const Hit& prefix_hit = best_prefix_hit_.hit(); // The prefix hit has score equal to the sum of the scores, capped at - // kMaxHitScore. - Hit::Score final_score = - std::min(static_cast<int>(Hit::kMaxHitScore), - prefix_hit.score() + best_exact_hit_.hit().score()); + // kMaxTermFrequency. + Hit::TermFrequency final_term_frequency = std::min( + static_cast<int>(Hit::kMaxTermFrequency), + prefix_hit.term_frequency() + best_exact_hit_.hit().term_frequency()); best_prefix_hit_ = TermIdHitPair( best_prefix_hit_.term_id(), - Hit(prefix_hit.section_id(), prefix_hit.document_id(), final_score, - prefix_hit.is_in_prefix_section(), prefix_hit.is_prefix_hit())); + Hit(prefix_hit.section_id(), prefix_hit.document_id(), + final_term_frequency, prefix_hit.is_in_prefix_section(), + prefix_hit.is_prefix_hit())); (*hits)[pos++] = best_prefix_hit_; // Ensure sorted. if (best_prefix_hit_.hit() < best_exact_hit_.hit()) { @@ -97,15 +98,15 @@ class HitSelector { } else { const Hit& hit = term_id_hit_pair.hit(); // Create a new prefix hit with term_frequency as the sum of the term - // frequencies. The term frequency is capped at kMaxHitScore. - Hit::Score final_score = - std::min(static_cast<int>(Hit::kMaxHitScore), - hit.score() + best_prefix_hit_.hit().score()); - best_prefix_hit_ = - TermIdHitPair(term_id_hit_pair.term_id(), - Hit(hit.section_id(), hit.document_id(), final_score, - best_prefix_hit_.hit().is_in_prefix_section(), - best_prefix_hit_.hit().is_prefix_hit())); + // frequencies. The term frequency is capped at kMaxTermFrequency. + Hit::TermFrequency final_term_frequency = std::min( + static_cast<int>(Hit::kMaxTermFrequency), + hit.term_frequency() + best_prefix_hit_.hit().term_frequency()); + best_prefix_hit_ = TermIdHitPair( + term_id_hit_pair.term_id(), + Hit(hit.section_id(), hit.document_id(), final_term_frequency, + best_prefix_hit_.hit().is_in_prefix_section(), + best_prefix_hit_.hit().is_prefix_hit())); } } @@ -116,14 +117,14 @@ class HitSelector { const Hit& hit = term_id_hit_pair.hit(); // Create a new exact hit with term_frequency as the sum of the term // frequencies. The term frequency is capped at kMaxHitScore. - Hit::Score final_score = - std::min(static_cast<int>(Hit::kMaxHitScore), - hit.score() + best_exact_hit_.hit().score()); - best_exact_hit_ = - TermIdHitPair(term_id_hit_pair.term_id(), - Hit(hit.section_id(), hit.document_id(), final_score, - best_exact_hit_.hit().is_in_prefix_section(), - best_exact_hit_.hit().is_prefix_hit())); + Hit::TermFrequency final_term_frequency = std::min( + static_cast<int>(Hit::kMaxTermFrequency), + hit.term_frequency() + best_exact_hit_.hit().term_frequency()); + best_exact_hit_ = TermIdHitPair( + term_id_hit_pair.term_id(), + Hit(hit.section_id(), hit.document_id(), final_term_frequency, + best_exact_hit_.hit().is_in_prefix_section(), + best_exact_hit_.hit().is_prefix_hit())); } } @@ -192,10 +193,10 @@ class HitComparator { // {"foot", docid0, sectionid0} // {"fool", docid0, sectionid0} // -// When two or more prefix hits are duplicates, merge into one hit with score as -// the sum of the scores. If there is both an exact and prefix hit for the same -// term, keep the exact hit as it is, update the prefix hit so that its score is -// the sum of the scores. +// When two or more prefix hits are duplicates, merge into one hit with term +// frequency as the sum of the term frequencies. If there is both an exact and +// prefix hit for the same term, keep the exact hit as it is, update the prefix +// hit so that its term frequency is the sum of the term frequencies. void DedupeHits( std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec, const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) { @@ -278,7 +279,7 @@ MainIndexMerger::TranslateAndExpandLiteHits( size_t offset = itr_prefixes->second.first; size_t len = itr_prefixes->second.second; size_t offset_end_exclusive = offset + len; - Hit prefix_hit(hit.section_id(), hit.document_id(), hit.score(), + Hit prefix_hit(hit.section_id(), hit.document_id(), hit.term_frequency(), /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); for (; offset < offset_end_exclusive; ++offset) { // Take the tvi (in the main lexicon) of each prefix term. diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc index 93f4576..8a2f691 100644 --- a/icing/index/main/main-index-merger_test.cc +++ b/icing/index/main/main-index-merger_test.cc @@ -86,10 +86,10 @@ TEST_F(MainIndexMergerTest, TranslateTermNotAdded) { uint32_t fool_term_id, term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit)); @@ -125,10 +125,10 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) { uint32_t fool_term_id, term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit)); @@ -139,7 +139,7 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) { uint32_t foo_term_id, term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN)); Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1, - Hit::kDefaultHitScore, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); uint32_t foot_main_tvi = 5; @@ -173,7 +173,7 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) { TermIdHitPair(foo_term_id, doc1_prefix_hit))); } -TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) { +TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentTermFrequencies) { // 1. Index one doc in the Lite Index: // - Doc0 {"foot" "foo" is_in_prefix_section=TRUE} ICING_ASSERT_OK_AND_ASSIGN( @@ -188,10 +188,11 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE)); - Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57, + Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit)); - Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit)); @@ -201,9 +202,10 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) { ICING_ASSERT_OK_AND_ASSIGN( uint32_t foo_main_term_id, term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN)); - // The prefix hit for 'foot' should have the same score as the exact hit for - // 'foot'. The final prefix hit has score equal to 58. - Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/58, + // The prefix hit for 'foot' should have the same term frequency as the exact + // hit for 'foot'. The final prefix hit has term frequency equal to 58. + Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, + /*term_frequency=*/58, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); uint32_t foot_main_tvi = 5; @@ -223,7 +225,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) { // a. Translate lite term ids to main term ids based on the map // b. Expand 'foot' to have a hit for 'foo' // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the - // latter with score as the sum of the scores. + // latter with term frequency as the sum of the term frequencies. ICING_ASSERT_OK_AND_ASSIGN( std::vector<TermIdHitPair> expanded_term_id_hit_pairs, MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_, @@ -235,7 +237,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) { TermIdHitPair(foo_main_term_id, doc0_prefix_hit))); } -TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) { +TEST_F(MainIndexMergerTest, DedupeWithExactSameTermFrequencies) { // 1. Index one doc in the Lite Index: // - Doc0 {"foot" "foo" is_in_prefix_section=TRUE} ICING_ASSERT_OK_AND_ASSIGN( @@ -250,14 +252,15 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE)); - Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57, + Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit)); - Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57, + Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit)); - // The prefix hit should take the sum as score - 114. - Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/114, + // The prefix hit should take the sum as term_frequency - 114. + Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, + /*term_frequency=*/114, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); @@ -285,7 +288,7 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) { // a. Translate lite term ids to main term ids based on the map // b. Expand 'foot' to have a hit for 'foo' // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the - // latter with score as the sum of the scores. + // latter with term frequency as the sum of the term frequencies. ICING_ASSERT_OK_AND_ASSIGN( std::vector<TermIdHitPair> expanded_term_id_hit_pairs, MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_, @@ -314,10 +317,11 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) { term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE)); Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, - /*score=*/Hit::kMaxHitScore, + /*term_frequency=*/Hit::kMaxTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit)); - Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit)); @@ -327,9 +331,10 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) { ICING_ASSERT_OK_AND_ASSIGN( uint32_t foo_term_id, term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN)); - // The prefix hit should take the sum as score - 256, capped at kMaxHitScore. + // The prefix hit should take the sum as term frequency - 256, capped at + // kMaxTermFrequency. Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, - /*score=*/Hit::kMaxHitScore, + /*term_frequency=*/Hit::kMaxTermFrequency, /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); uint32_t foot_main_tvi = 5; @@ -356,8 +361,8 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) { // 3. TranslateAndExpand should; // a. Translate lite term ids to main term ids based on the map // b. Expand 'foot' and 'fool' to have hits for 'foo' - // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as hit - // score. + // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as + // term frequency. ICING_ASSERT_OK_AND_ASSIGN( std::vector<TermIdHitPair> expanded_term_id_hit_pairs, MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_, diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc index ff1c47a..636f631 100644 --- a/icing/index/main/main-index.cc +++ b/icing/index/main/main-index.cc @@ -579,7 +579,8 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits( } // A backfill hit is a prefix hit in a prefix section. - const Hit backfill_hit(hit.section_id(), hit.document_id(), hit.score(), + const Hit backfill_hit(hit.section_id(), hit.document_id(), + hit.term_frequency(), /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true); if (backfill_hit == last_added_hit) { diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc index 0f87b09..74139be 100644 --- a/icing/index/main/main-index_test.cc +++ b/icing/index/main/main-index_test.cc @@ -145,7 +145,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); @@ -182,7 +182,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForExactReturnsValidAccessor) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); @@ -219,18 +219,18 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit)); ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit)); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit)); - Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore, + Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit)); ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit)); @@ -292,18 +292,18 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit)); ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit)); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit)); - Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore, + Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit)); ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit)); @@ -345,14 +345,14 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t fall_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultHitScore, + Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit)); ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit)); ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit)); ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit)); - Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultHitScore, + Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit)); ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit)); @@ -404,15 +404,15 @@ TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit)); - Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore, + Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit)); @@ -453,15 +453,15 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit)); - Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore, + Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit)); @@ -500,17 +500,17 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) { for (DocumentId document_id = 0; document_id < 2048; ++document_id) { Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id, - Hit::kDefaultHitScore, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0)); Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id, - Hit::kDefaultHitScore, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1)); Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id, - Hit::kDefaultHitScore, + Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2)); } @@ -543,7 +543,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore, + Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true); ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit)); @@ -570,7 +570,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) { ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id, term_id_codec_->EncodeTvi(tvi, TviType::LITE)); - Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore, + Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false); ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit)); diff --git a/icing/index/main/posting-list-accessor_test.cc b/icing/index/main/posting-list-accessor_test.cc index 85f6d4a..a539fe4 100644 --- a/icing/index/main/posting-list-accessor_test.cc +++ b/icing/index/main/posting-list-accessor_test.cc @@ -82,7 +82,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPLKeepOnSameBlock) { ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor, PostingListAccessor::Create(&flash_index_storage)); // Add a single hit. This will fit in a min-sized posting list. - Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore); + Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency); ICING_ASSERT_OK(pl_accessor.PrependHit(hit1)); PostingListAccessor::FinalizeResult result1 = PostingListAccessor::Finalize(std::move(pl_accessor)); @@ -324,14 +324,14 @@ TEST(PostingListAccessorStorageTest, HitsNotDecreasingReturnsInvalidArgument) { FlashIndexStorage::Create(file_name, &filesystem)); ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor, PostingListAccessor::Create(&flash_index_storage)); - Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore); + Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency); ICING_ASSERT_OK(pl_accessor.PrependHit(hit1)); - Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultHitScore); + Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency); EXPECT_THAT(pl_accessor.PrependHit(hit2), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); - Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore); + Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency); EXPECT_THAT(pl_accessor.PrependHit(hit3), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); } @@ -364,7 +364,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPostingListNoHitsAdded) { FlashIndexStorage::Create(file_name, &filesystem)); ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor, PostingListAccessor::Create(&flash_index_storage)); - Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore); + Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency); ICING_ASSERT_OK(pl_accessor.PrependHit(hit1)); PostingListAccessor::FinalizeResult result1 = PostingListAccessor::Finalize(std::move(pl_accessor)); diff --git a/icing/index/main/posting-list-used.cc b/icing/index/main/posting-list-used.cc index a439c45..62e73e5 100644 --- a/icing/index/main/posting-list-used.cc +++ b/icing/index/main/posting-list-used.cc @@ -30,8 +30,8 @@ namespace lib { namespace { -uint32_t GetScoreByteSize(const Hit &hit) { - return hit.has_score() ? sizeof(Hit::Score) : 0; +uint32_t GetTermFrequencyByteSize(const Hit &hit) { + return hit.has_term_frequency() ? sizeof(Hit::TermFrequency) : 0; } } // namespace @@ -153,21 +153,21 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull( uint64_t delta = cur.value() - hit.value(); uint8_t delta_buf[VarInt::kMaxEncodedLen64]; size_t delta_len = VarInt::Encode(delta, delta_buf); - uint32_t cur_score_bytes = GetScoreByteSize(cur); + uint32_t cur_term_frequency_bytes = GetTermFrequencyByteSize(cur); uint32_t pad_end = GetPadEnd(posting_list_utils::kSpecialHitsSize); - if (pad_end >= - posting_list_utils::kSpecialHitsSize + delta_len + cur_score_bytes) { - // Pad area has enough space for delta and score of existing hit - // (cur). Write delta at pad_end - delta_len - cur_score_bytes. + if (pad_end >= posting_list_utils::kSpecialHitsSize + delta_len + + cur_term_frequency_bytes) { + // Pad area has enough space for delta and term_frequency of existing hit + // (cur). Write delta at pad_end - delta_len - cur_term_frequency_bytes. uint8_t *delta_offset = - posting_list_buffer_ + pad_end - delta_len - cur_score_bytes; + posting_list_buffer_ + pad_end - delta_len - cur_term_frequency_bytes; memcpy(delta_offset, delta_buf, delta_len); - // Now copy score. - Hit::Score score = cur.score(); - uint8_t *score_offset = delta_offset + delta_len; - memcpy(score_offset, &score, cur_score_bytes); + // Now copy term_frequency. + Hit::TermFrequency term_frequency = cur.term_frequency(); + uint8_t *term_frequency_offset = delta_offset + delta_len; + memcpy(term_frequency_offset, &term_frequency, cur_term_frequency_bytes); // Now first hit is the new hit, at special position 1. Safe to ignore the // return value because 1 < kNumSpecialHits. @@ -224,12 +224,12 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( uint64_t delta = cur_value - hit.value(); uint8_t delta_buf[VarInt::kMaxEncodedLen64]; size_t delta_len = VarInt::Encode(delta, delta_buf); - uint32_t hit_score_bytes = GetScoreByteSize(hit); + uint32_t hit_term_frequency_bytes = GetTermFrequencyByteSize(hit); // offset now points to one past the end of the first hit. offset += sizeof(Hit::Value); if (posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value) + delta_len + - hit_score_bytes <= + hit_term_frequency_bytes <= offset) { // Enough space for delta in compressed area. @@ -237,15 +237,15 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( offset -= delta_len; memcpy(posting_list_buffer_ + offset, delta_buf, delta_len); - // Prepend new hit with (possibly) its score. We know that there is room - // for 'hit' because of the if statement above, so calling ValueOrDie is - // safe. + // Prepend new hit with (possibly) its term_frequency. We know that there is + // room for 'hit' because of the if statement above, so calling ValueOrDie + // is safe. offset = PrependHitUncompressed(hit, offset).ValueOrDie(); // offset is guaranteed to be valid here. So it's safe to ignore the return // value. The if above will guarantee that offset >= kSpecialHitSize and < // size_in_bytes_ because the if ensures that there is enough room between // offset and kSpecialHitSize to fit the delta of the previous hit, any - // score and the uncompressed hit. + // term_frequency and the uncompressed hit. set_start_byte_offset(offset); } else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) { // Only have space for delta. The new hit must be put in special @@ -273,14 +273,11 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull( // move first hit to special position 1 and put new hit in // special position 0. Hit cur(cur_value); - if (cur.has_score()) { - // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes. - // Therefore, offset must be less than kSpecialHitSize + 5. Since posting - // list size must be divisible by sizeof(Hit) (5), it is guaranteed that - // offset < size_in_bytes, so it is safe to call ValueOrDie here. - cur = Hit(cur_value, ReadScore(offset).ValueOrDie()); - offset += sizeof(Hit::Score); - } + // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes. + // Therefore, offset must be less than kSpecialHitSize + 5. Since posting + // list size must be divisible by sizeof(Hit) (5), it is guaranteed that + // offset < size_in_bytes, so it is safe to ignore the return value here. + ConsumeTermFrequencyIfPresent(&cur, &offset); // Safe to ignore the return value of PadToEnd because offset must be less // than size_in_bytes_. Otherwise, this function already would have returned // FAILED_PRECONDITION. @@ -437,18 +434,17 @@ libtextclassifier3::Status PostingListUsed::GetHitsInternal( val += delta; } Hit hit(val); - if (hit.has_score()) { - auto score_or = ReadScore(offset); - if (!score_or.ok()) { - // This posting list has been corrupted somehow. The first hit of the - // posting list claims to have a score, but there's no more room in the - // posting list for that score to exist. Return an empty vector and zero - // to indicate no hits retrieved. + libtextclassifier3::Status status = + ConsumeTermFrequencyIfPresent(&hit, &offset); + if (!status.ok()) { + // This posting list has been corrupted somehow. The first hit of the + // posting list claims to have a term frequency, but there's no more room + // in the posting list for that term frequency to exist. Return an empty + // vector and zero to indicate no hits retrieved. + if (out != nullptr) { out->clear(); - return absl_ports::InternalError("Posting list has been corrupted!"); } - hit = Hit(val, score_or.ValueOrDie()); - offset += sizeof(Hit::Score); + return absl_ports::InternalError("Posting list has been corrupted!"); } if (out != nullptr) { out->push_back(hit); @@ -475,21 +471,21 @@ libtextclassifier3::Status PostingListUsed::GetHitsInternal( offset -= sizeof(Hit::Value); memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value)); } else { - // val won't fit in compressed area. Also see if there is a score. + // val won't fit in compressed area. Also see if there is a + // term_frequency. Hit hit(val); - if (hit.has_score()) { - auto score_or = ReadScore(offset); - if (!score_or.ok()) { - // This posting list has been corrupted somehow. The first hit of - // the posting list claims to have a score, but there's no more room - // in the posting list for that score to exist. Return an empty - // vector and zero to indicate no hits retrieved. Do not pop - // anything. + libtextclassifier3::Status status = + ConsumeTermFrequencyIfPresent(&hit, &offset); + if (!status.ok()) { + // This posting list has been corrupted somehow. The first hit of + // the posting list claims to have a term frequency, but there's no + // more room in the posting list for that term frequency to exist. + // Return an empty vector and zero to indicate no hits retrieved. Do + // not pop anything. + if (out != nullptr) { out->clear(); - return absl_ports::InternalError( - "Posting list has been corrupted!"); } - hit = Hit(val, score_or.ValueOrDie()); + return absl_ports::InternalError("Posting list has been corrupted!"); } // Okay to ignore the return value here because 1 < kNumSpecialHits. mutable_this->set_special_hit(1, hit); @@ -640,7 +636,7 @@ bool PostingListUsed::set_start_byte_offset(uint32_t offset) { libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed( const Hit &hit, uint32_t offset) { - if (hit.has_score()) { + if (hit.has_term_frequency()) { if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) { return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( "Not enough room to prepend Hit at offset %d.", offset)); @@ -659,16 +655,23 @@ libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed( return offset; } -libtextclassifier3::StatusOr<Hit::Score> PostingListUsed::ReadScore( - uint32_t offset) const { - if (offset + sizeof(Hit::Score) > size_in_bytes_) { +libtextclassifier3::Status PostingListUsed::ConsumeTermFrequencyIfPresent( + Hit *hit, uint32_t *offset) const { + if (!hit->has_term_frequency()) { + // No term frequency to consume. Everything is fine. + return libtextclassifier3::Status::OK; + } + if (*offset + sizeof(Hit::TermFrequency) > size_in_bytes_) { return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf( "offset %d must not point past the end of the posting list of size %d.", - offset, size_in_bytes_)); + *offset, size_in_bytes_)); } - Hit::Score score; - memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score)); - return score; + Hit::TermFrequency term_frequency; + memcpy(&term_frequency, posting_list_buffer_ + *offset, + sizeof(Hit::TermFrequency)); + *hit = Hit(hit->value(), term_frequency); + *offset += sizeof(Hit::TermFrequency); + return libtextclassifier3::Status::OK; } } // namespace lib diff --git a/icing/index/main/posting-list-used.h b/icing/index/main/posting-list-used.h index 8bc9c8d..1b2e24e 100644 --- a/icing/index/main/posting-list-used.h +++ b/icing/index/main/posting-list-used.h @@ -155,12 +155,12 @@ class PostingListUsed { // starts somewhere between [kSpecialHitsSize, kSpecialHitsSize + sizeof(Hit) // - 1] and ends at size_in_bytes - 1. // - // Hit scores are stored after the hit value, compressed or + // Hit term frequencies are stored after the hit value, compressed or // uncompressed. For the first two special hits, we always have a - // space for the score. For hits in the compressed area, we only have - // the score following the hit value of hit.has_score() is true. This - // allows good compression in the common case where hits don't have a - // specific score. + // space for the term frequency. For hits in the compressed area, we only have + // the term frequency following the hit value of hit.has_term_frequency() is + // true. This allows good compression in the common case where hits don't have + // a valid term frequency. // // EXAMPLE // Posting list storage. Posting list size: 20 bytes @@ -175,7 +175,8 @@ class PostingListUsed { // | 16 |Hit::kInvalidVal| 0x000 | 0x07FFF998 | // +-------------+----------------+-----------------+----------------------+ // - // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4, Score=125) + // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4, + // TermFrequency=125) // (Hit 0x07FFF998 - Hit 0x07FFF684 = 788) // +--bytes 0-4--+----- 5-9 ------+-- 10-12 --+-- 13-16 --+- 17 -+-- 18-19 --+ // | 13 |Hit::kInvalidVal| 0x000 | 0x07FFF684| 125 | 788 | @@ -187,9 +188,9 @@ class PostingListUsed { // | 9 |Hit::kInvVal| 0x00 |0x07FFF4D2| 434 | 125 | 788 | // +-------------+------------+--------+----------+---------+------+---------+ // - // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6, Score = 87) - // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196) - // ALMOST FULL! + // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6, + // TermFrequency = 87) + // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196) ALMOST FULL! // +--bytes 0-4-+---- 5-9 ----+- 10-12 -+- 13-14 -+- 15-16 -+- 17 -+- 18-19 -+ // |Hit::kInvVal|0x07FFF40E,87| 0x000 | 196 | 434 | 125 | 788 | // +-------------+------------+---------+---------+---------+------+---------+ @@ -302,13 +303,17 @@ class PostingListUsed { libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed( const Hit &hit, uint32_t offset); - // Reads the score located at offset and returns it. Callers are responsible - // for ensuring that the bytes starting at offset actually represent a score. + // If hit has a term frequency, consumes the term frequency at offset, updates + // hit to include the term frequency and updates offset to reflect that the + // term frequency has been consumed. // // RETURNS: - // - The score located at offset, if successful - // - INVALID_ARGUMENT if offset + sizeof(Hit::Score) >= size_in_bytes_ - libtextclassifier3::StatusOr<Hit::Score> ReadScore(uint32_t offset) const; + // - OK, if successful + // - INVALID_ARGUMENT if hit has a term frequency and offset + + // sizeof(Hit::TermFrequency) >= + // size_in_bytes_ + libtextclassifier3::Status ConsumeTermFrequencyIfPresent( + Hit *hit, uint32_t *offset) const; // A byte array of size size_in_bytes_ containing encoded hits for this // posting list. diff --git a/icing/index/main/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc index f6b5898..044d0c1 100644 --- a/icing/index/main/posting-list-used_test.cc +++ b/icing/index/main/posting-list-used_test.cc @@ -73,37 +73,37 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) { static_cast<void *>(hits_buf.get()), kHitsSize)); // Make used. - Hit hit0(/*section_id=*/0, 0, /*score=*/56); + Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56); pl_used.PrependHit(hit0); // Size = sizeof(uncompressed hit0) int expected_size = sizeof(Hit); EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0))); - Hit hit1(/*section_id=*/0, 1, Hit::kDefaultHitScore); + Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency); pl_used.PrependHit(hit1); // Size = sizeof(uncompressed hit1) - // + sizeof(hit0-hit1) + sizeof(hit0::score) - expected_size += 2 + sizeof(Hit::Score); + // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency) + expected_size += 2 + sizeof(Hit::TermFrequency); EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0))); - Hit hit2(/*section_id=*/0, 2, /*score=*/56); + Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56); pl_used.PrependHit(hit2); // Size = sizeof(uncompressed hit2) // + sizeof(hit1-hit2) - // + sizeof(hit0-hit1) + sizeof(hit0::score) + // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency) expected_size += 2; EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0))); - Hit hit3(/*section_id=*/0, 3, Hit::kDefaultHitScore); + Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency); pl_used.PrependHit(hit3); // Size = sizeof(uncompressed hit3) - // + sizeof(hit2-hit3) + sizeof(hit2::score) + // + sizeof(hit2-hit3) + sizeof(hit2::term_frequency) // + sizeof(hit1-hit2) - // + sizeof(hit0-hit1) + sizeof(hit0::score) - expected_size += 2 + sizeof(Hit::Score); + // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency) + expected_size += 2 + sizeof(Hit::TermFrequency); EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0))); @@ -122,7 +122,7 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) { // Adding hit0: EMPTY -> NOT_FULL // Adding hit1: NOT_FULL -> NOT_FULL // Adding hit2: NOT_FULL -> NOT_FULL - Hit hit0(/*section_id=*/0, 0, Hit::kDefaultHitScore); + Hit hit0(/*section_id=*/0, 0, Hit::kDefaultTermFrequency); Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2); Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2); ICING_EXPECT_OK(pl_used.PrependHit(hit0)); @@ -189,7 +189,8 @@ TEST(PostingListTest, PostingListUsedMinSize) { EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty())); // Add a hit, PL should shift to ALMOST_FULL state - Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false, + Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/0, + /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true); ICING_EXPECT_OK(pl_used.PrependHit(hit0)); // Size = sizeof(uncompressed hit0) @@ -197,9 +198,10 @@ TEST(PostingListTest, PostingListUsedMinSize) { EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size)); EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0))); - // Add the smallest hit possible - no score and a delta of 1. PL should shift - // to FULL state. - Hit hit1(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/true, + // Add the smallest hit possible - no term_frequency and a delta of 1. PL + // should shift to FULL state. + Hit hit1(/*section_id=*/0, 0, /*term_frequency=*/0, + /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false); ICING_EXPECT_OK(pl_used.PrependHit(hit1)); // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0) @@ -208,7 +210,8 @@ TEST(PostingListTest, PostingListUsedMinSize) { EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0))); // Try to add the smallest hit possible. Should fail - Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false, + Hit hit2(/*section_id=*/0, 0, /*term_frequency=*/0, + /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false); EXPECT_THAT(pl_used.PrependHit(hit2), StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); @@ -227,7 +230,7 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) { static_cast<void *>(hits_buf.get()), size)); std::vector<HitElt> hits_in; - hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore)); + hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( @@ -268,7 +271,7 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { static_cast<void *>(hits_buf.get()), size)); std::vector<HitElt> hits_in; - hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore)); + hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency)); hits_in.emplace_back( CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1)); hits_in.emplace_back( @@ -332,7 +335,7 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { // 14-11 Hit #11 // 10 <unused> // 9-5 kSpecialHit - // 4-0 Offset=22 + // 4-0 Offset=11 // ---------------------- byte_size += 11; @@ -423,9 +426,9 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) { TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) { static constexpr int kNumHits = 128; static constexpr int kDeltaSize = 1; - static constexpr int kScoreSize = 1; + static constexpr int kTermFrequencySize = 1; static constexpr size_t kHitsSize = - ((kNumHits * (kDeltaSize + kScoreSize)) / 5) * 5; + ((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5; std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize); @@ -654,5 +657,56 @@ TEST(PostingListTest, MoveToPostingListTooSmall) { IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend()))); } +TEST(PostingListTest, PopHitsWithScores) { + int size = 2 * posting_list_utils::min_posting_list_size(); + std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size); + ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used, + PostingListUsed::CreateFromUnitializedRegion( + static_cast<void *>(hits_buf1.get()), size)); + + // This posting list is 20-bytes. Create four hits that will have deltas of + // two bytes each and all of whom will have a non-default score. This posting + // list will be almost_full. + // + // ---------------------- + // 19 score(Hit #0) + // 18-17 delta(Hit #0) + // 16 score(Hit #1) + // 15-14 delta(Hit #1) + // 13 score(Hit #2) + // 12-11 delta(Hit #2) + // 10 <unused> + // 9-5 Hit #3 + // 4-0 kInvalidHitVal + // ---------------------- + Hit hit0(/*section_id=*/0, /*document_id=*/0, /*score=*/5); + Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2); + Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2); + Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2); + ICING_ASSERT_OK(pl_used.PrependHit(hit0)); + ICING_ASSERT_OK(pl_used.PrependHit(hit1)); + ICING_ASSERT_OK(pl_used.PrependHit(hit2)); + ICING_ASSERT_OK(pl_used.PrependHit(hit3)); + + ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out, pl_used.GetHits()); + EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0)); + + // Now, pop the last hit. The posting list should contain the first three + // hits. + // + // ---------------------- + // 19 score(Hit #0) + // 18-17 delta(Hit #0) + // 16 score(Hit #1) + // 15-14 delta(Hit #1) + // 13-10 <unused> + // 9-5 Hit #2 + // 4-0 kInvalidHitVal + // ---------------------- + ICING_ASSERT_OK(pl_used.PopFrontHits(1)); + ICING_ASSERT_OK_AND_ASSIGN(hits_out, pl_used.GetHits()); + EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0)); +} + } // namespace lib } // namespace icing diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc index 1e20340..a18a183 100644 --- a/icing/jni/icing-search-engine-jni.cc +++ b/icing/jni/icing-search-engine-jni.cc @@ -31,6 +31,11 @@ #include "icing/util/status-macros.h" namespace { + +// JNI string constants +// Matches field name of IcingSearchEngine#nativePointer. +const char kNativePointerField[] = "nativePointer"; + bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes, google::protobuf::MessageLite* protobuf) { int bytes_size = env->GetArrayLength(bytes); @@ -58,8 +63,11 @@ jbyteArray SerializeProtoToJniByteArray( return ret; } -icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer( - jlong native_pointer) { +icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env, + jobject object) { + jclass cls = env->GetObjectClass(object); + jfieldID field_id = env->GetFieldID(cls, kNativePointerField, "J"); + jlong native_pointer = env->GetLongField(object, field_id); return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer); } @@ -99,17 +107,17 @@ Java_com_google_android_icing_IcingSearchEngine_nativeCreate( JNIEXPORT void JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDestroy( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); delete icing; } JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeInitialize( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::InitializeResultProto initialize_result_proto = icing->Initialize(); @@ -119,10 +127,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeInitialize( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema( - JNIEnv* env, jclass clazz, jlong native_pointer, jbyteArray schema_bytes, + JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes, jboolean ignore_errors_and_delete_documents) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::SchemaProto schema_proto; if (!ParseProtoFromJniByteArray(env, schema_bytes, &schema_proto)) { @@ -138,9 +146,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::GetSchemaResultProto get_schema_result_proto = icing->GetSchema(); @@ -149,9 +157,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType( - JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) { + JNIEnv* env, jclass clazz, jobject object, jstring schema_type) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); const char* native_schema_type = env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr); @@ -163,10 +171,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativePut( - JNIEnv* env, jclass clazz, jlong native_pointer, - jbyteArray document_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::DocumentProto document_proto; if (!ParseProtoFromJniByteArray(env, document_bytes, &document_proto)) { @@ -182,10 +189,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativePut( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGet( - JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space, + JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); const char* native_name_space = env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); @@ -198,10 +205,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGet( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage( - JNIEnv* env, jclass clazz, jlong native_pointer, - jbyteArray usage_report_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::UsageReport usage_report; if (!ParseProtoFromJniByteArray(env, usage_report_bytes, &usage_report)) { @@ -217,9 +223,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto = icing->GetAllNamespaces(); @@ -229,9 +235,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage( - JNIEnv* env, jclass clazz, jlong native_pointer, jlong next_page_token) { + JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::SearchResultProto next_page_result_proto = icing->GetNextPage(next_page_token); @@ -241,9 +247,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage( JNIEXPORT void JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken( - JNIEnv* env, jclass clazz, jlong native_pointer, jlong next_page_token) { + JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing->InvalidateNextPageToken(next_page_token); @@ -252,11 +258,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeSearch( - JNIEnv* env, jclass clazz, jlong native_pointer, - jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes, - jbyteArray result_spec_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes, + jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::SearchSpecProto search_spec_proto; if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) { @@ -285,10 +290,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSearch( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDelete( - JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space, + JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); const char* native_name_space = env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); @@ -301,9 +306,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDelete( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace( - JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space) { + JNIEnv* env, jclass clazz, jobject object, jstring name_space) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); const char* native_name_space = env->GetStringUTFChars(name_space, /*isCopy=*/nullptr); @@ -315,9 +320,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType( - JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) { + JNIEnv* env, jclass clazz, jobject object, jstring schema_type) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); const char* native_schema_type = env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr); @@ -329,17 +334,16 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( - JNIEnv* env, jclass clazz, jlong native_pointer, - jbyteArray search_spec_bytes) { + JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::SearchSpecProto search_spec_proto; if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) { ICING_LOG(ERROR) << "Failed to parse SearchSpecProto in nativeSearch"; return nullptr; } - icing::lib::DeleteResultProto delete_result_proto = + icing::lib::DeleteByQueryResultProto delete_result_proto = icing->DeleteByQuery(search_spec_proto); return SerializeProtoToJniByteArray(env, delete_result_proto); @@ -347,9 +351,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::PersistToDiskResultProto persist_to_disk_result_proto = icing->PersistToDisk(); @@ -359,9 +363,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeOptimize( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::OptimizeResultProto optimize_result_proto = icing->Optimize(); @@ -370,9 +374,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeOptimize( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::GetOptimizeInfoResultProto get_optimize_info_result_proto = icing->GetOptimizeInfo(); @@ -382,9 +386,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo( JNIEXPORT jbyteArray JNICALL Java_com_google_android_icing_IcingSearchEngine_nativeReset( - JNIEnv* env, jclass clazz, jlong native_pointer) { + JNIEnv* env, jclass clazz, jobject object) { icing::lib::IcingSearchEngine* icing = - GetIcingSearchEnginePointer(native_pointer); + GetIcingSearchEnginePointer(env, object); icing::lib::ResetResultProto reset_result_proto = icing->Reset(); diff --git a/icing/result/page-result-state.h b/icing/result/page-result-state.h index 85f1dd7..5932b56 100644 --- a/icing/result/page-result-state.h +++ b/icing/result/page-result-state.h @@ -31,12 +31,13 @@ struct PageResultState { uint64_t next_page_token_in, SnippetContext snippet_context_in, std::unordered_map<std::string, ProjectionTree> tree_map, - int num_previously_returned_in) + int num_previously_returned_in, int num_per_page_in) : scored_document_hits(std::move(scored_document_hits_in)), next_page_token(next_page_token_in), snippet_context(std::move(snippet_context_in)), projection_tree_map(std::move(tree_map)), - num_previously_returned(num_previously_returned_in) {} + num_previously_returned(num_previously_returned_in), + requested_page_size(num_per_page_in) {} // Results of one page std::vector<ScoredDocumentHit> scored_document_hits; @@ -52,6 +53,10 @@ struct PageResultState { // Number of results that have been returned in previous pages. int num_previously_returned; + + // The page size for this query. This should always be >= + // scored_document_hits.size(); + int requested_page_size; }; } // namespace lib diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h index 7ace295..a87a8fc 100644 --- a/icing/result/projection-tree.h +++ b/icing/result/projection-tree.h @@ -26,6 +26,8 @@ namespace lib { class ProjectionTree { public: + static constexpr std::string_view kSchemaTypeWildcard = "*"; + struct Node { explicit Node(std::string_view name = "") : name(name) {} diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc index ff6320b..0b8ad4a 100644 --- a/icing/result/result-retriever.cc +++ b/icing/result/result-retriever.cc @@ -31,7 +31,7 @@ namespace lib { namespace { void Project(const std::vector<ProjectionTree::Node>& projection_tree, - proto2::RepeatedPtrField<PropertyProto>* properties) { + google::protobuf::RepeatedPtrField<PropertyProto>* properties) { int num_kept = 0; for (int cur_pos = 0; cur_pos < properties->size(); ++cur_pos) { PropertyProto* prop = properties->Mutable(cur_pos); @@ -97,6 +97,9 @@ ResultRetriever::RetrieveResults( remaining_num_to_snippet = 0; } + auto wildcard_projection_tree_itr = + page_result_state.projection_tree_map.find( + std::string(ProjectionTree::kSchemaTypeWildcard)); for (const auto& scored_document_hit : page_result_state.scored_document_hits) { libtextclassifier3::StatusOr<DocumentProto> document_or = @@ -118,9 +121,14 @@ ResultRetriever::RetrieveResults( // Apply projection auto itr = page_result_state.projection_tree_map.find( document_or.ValueOrDie().schema()); + if (itr != page_result_state.projection_tree_map.end()) { Project(itr->second.root().children, document_or.ValueOrDie().mutable_properties()); + } else if (wildcard_projection_tree_itr != + page_result_state.projection_tree_map.end()) { + Project(wildcard_projection_tree_itr->second.root().children, + document_or.ValueOrDie().mutable_properties()); } SearchResultProto::ResultProto result; diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc index 82e32ee..98cc75a 100644 --- a/icing/result/result-retriever_test.cc +++ b/icing/result/result-retriever_test.cc @@ -30,6 +30,7 @@ #include "icing/proto/term.pb.h" #include "icing/result/projection-tree.h" #include "icing/schema/schema-store.h" +#include "icing/schema/section.h" #include "icing/store/document-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" @@ -91,7 +92,7 @@ class ResultRetrieverTest : public testing::Test { type->set_schema_type("Email"); auto* subj = type->add_properties(); - subj->set_property_name("subject"); + subj->set_property_name("name"); subj->set_data_type(PropertyConfigProto::DataType::STRING); subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL); subj->mutable_string_indexing_config()->set_term_match_type( @@ -136,6 +137,25 @@ class ResultRetrieverTest : public testing::Test { return schema; } + SectionId GetSectionId(const std::string& type, const std::string& property) { + auto type_id_or = schema_store_->GetSchemaTypeId(type); + if (!type_id_or.ok()) { + return kInvalidSectionId; + } + SchemaTypeId type_id = type_id_or.ValueOrDie(); + for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) { + auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id); + if (!metadata_or.ok()) { + break; + } + const SectionMetadata* metadata = metadata_or.ValueOrDie(); + if (metadata->path == property) { + return metadata->id; + } + } + return kInvalidSectionId; + } + const Filesystem filesystem_; const std::string test_dir_; std::unique_ptr<LanguageSegmenter> language_segmenter_; @@ -156,12 +176,20 @@ DocumentProto CreateDocument(int id) { return DocumentBuilder() .SetKey("icing", "Email/" + std::to_string(id)) .SetSchema("Email") - .AddStringProperty("subject", "subject foo " + std::to_string(id)) + .AddStringProperty("name", "subject foo " + std::to_string(id)) .AddStringProperty("body", "body bar " + std::to_string(id)) .SetCreationTimestampMs(1574365086666 + id) .Build(); } +SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) { + SectionIdMask mask = 0; + for (SectionId section_id : section_ids) { + mask |= (1u << section_id); + } + return mask; +} + TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) { EXPECT_THAT( ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(), @@ -204,10 +232,13 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -228,7 +259,8 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); EXPECT_THAT( result_retriever->RetrieveResults(page_result_state), IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2), @@ -249,10 +281,13 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) { doc_store->Put(CreateDocument(/*id=*/2))); DocumentId invalid_document_id = -1; + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {invalid_document_id, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -272,7 +307,8 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); EXPECT_THAT( result_retriever->RetrieveResults(page_result_state), IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2)))); @@ -292,10 +328,13 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) { doc_store->Put(CreateDocument(/*id=*/2))); DocumentId invalid_document_id = -1; + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {invalid_document_id, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -310,16 +349,16 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT)); DocumentId non_existing_document_id = 4; page_result_state.scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {non_existing_document_id, /*hit_section_id_mask=*/0b00001001, - /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {non_existing_document_id, hit_section_id_mask, /*score=*/0}}; EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } @@ -340,9 +379,12 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store->Put(CreateDocument(/*id=*/2))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -358,7 +400,8 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); EXPECT_THAT(result_retriever->RetrieveResults(page_result_state), StatusIs(libtextclassifier3::StatusCode::INTERNAL)); } @@ -378,10 +421,13 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -395,7 +441,8 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> results, result_retriever->RetrieveResults(page_result_state)); @@ -423,10 +470,13 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -439,16 +489,16 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> result, result_retriever->RetrieveResults(page_result_state)); EXPECT_THAT(result, SizeIs(3)); EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1))); - EXPECT_THAT( - GetWindow(result[0].document(), result[0].snippet(), "subject", 0), - Eq("subject foo 1")); - EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0), + EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0), + Eq("subject foo 1")); + EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0), Eq("foo")); EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0), Eq("body bar 1")); @@ -456,10 +506,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) { Eq("bar")); EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2))); - EXPECT_THAT( - GetWindow(result[1].document(), result[1].snippet(), "subject", 0), - Eq("subject foo 2")); - EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "subject", 0), + EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0), + Eq("subject foo 2")); + EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0), Eq("foo")); EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0), Eq("body bar 2")); @@ -467,10 +516,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) { Eq("bar")); EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3))); - EXPECT_THAT( - GetWindow(result[2].document(), result[2].snippet(), "subject", 0), - Eq("subject foo 3")); - EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "subject", 0), + EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0), + Eq("subject foo 3")); + EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0), Eq("foo")); EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0), Eq("body bar 3")); @@ -496,10 +544,13 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) { ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec(); snippet_spec.set_num_to_snippet(1); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -511,16 +562,16 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> result, result_retriever->RetrieveResults(page_result_state)); EXPECT_THAT(result, SizeIs(3)); EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1))); - EXPECT_THAT( - GetWindow(result[0].document(), result[0].snippet(), "subject", 0), - Eq("subject foo 1")); - EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0), + EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0), + Eq("subject foo 1")); + EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0), Eq("foo")); EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0), Eq("body bar 1")); @@ -551,10 +602,13 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -569,7 +623,8 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/3); ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> result, @@ -598,10 +653,13 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -616,7 +674,8 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/3); + /*num_previously_returned_in=*/3, + /*num_per_page_in=*/3); // num_to_snippet = 5, num_previously_returned_in = 3, // We can return 5 - 3 = 2 snippets. @@ -644,10 +703,13 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) { ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3, doc_store->Put(CreateDocument(/*id=*/3))); + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}, + {document_id3, hit_section_id_mask, /*score=*/0}}; ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), @@ -662,7 +724,8 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) { std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::unordered_map<std::string, ProjectionTree>(), - /*num_previously_returned_in=*/6); + /*num_previously_returned_in=*/6, + /*num_per_page_in=*/3); // num_to_snippet = 5, num_previously_returned_in = 6, // We can't return any snippets for this page. @@ -689,7 +752,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -701,7 +764,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); @@ -709,13 +772,16 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { doc_store->Put(document_two)); // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("subject"); + type_property_mask.add_paths("name"); std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; type_projection_tree_map.insert( {"Email", ProjectionTree(type_property_mask)}); @@ -727,14 +793,15 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), language_segmenter_.get(), normalizer_.get())); - // 3. Verify that the returned results only contain the 'subject' property. + // 3. Verify that the returned results only contain the 'name' property. ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> result, result_retriever->RetrieveResults(page_result_state)); @@ -745,7 +812,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .Build(); EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); @@ -754,7 +821,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .Build(); EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); } @@ -781,7 +848,7 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) { .AddStringProperty("name", "Meg Ryan") .AddStringProperty("emailAddress", "shopgirl@aol.com") .Build()) - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -800,17 +867,19 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) { .AddStringProperty("name", "Tom Hanks") .AddStringProperty("emailAddress", "ny152@aol.com") .Build()) - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store->Put(document_two)); - // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); @@ -826,7 +895,8 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -891,7 +961,7 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) { .AddStringProperty("name", "Meg Ryan") .AddStringProperty("emailAddress", "shopgirl@aol.com") .Build()) - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -910,17 +980,19 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) { .AddStringProperty("name", "Tom Hanks") .AddStringProperty("emailAddress", "ny152@aol.com") .Build()) - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, doc_store->Put(document_two)); - // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); @@ -936,7 +1008,8 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -1004,7 +1077,7 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) { .AddStringProperty("name", "Meg Ryan") .AddStringProperty("emailAddress", "shopgirl@aol.com") .Build()) - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -1023,7 +1096,7 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) { .AddStringProperty("name", "Tom Hanks") .AddStringProperty("emailAddress", "ny152@aol.com") .Build()) - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); @@ -1031,9 +1104,12 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) { doc_store->Put(document_two)); // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); @@ -1050,7 +1126,8 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -1110,7 +1187,7 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -1122,7 +1199,7 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); @@ -1130,9 +1207,12 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) { doc_store->Put(document_two)); // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); @@ -1147,7 +1227,8 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -1189,7 +1270,7 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -1201,7 +1282,7 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); @@ -1209,9 +1290,12 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) { doc_store->Put(document_two)); // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); @@ -1227,7 +1311,8 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, @@ -1269,7 +1354,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .AddStringProperty( "body", "Oh what a beautiful morning! Oh what a beautiful day!") .Build(); @@ -1281,7 +1366,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") .AddStringProperty("body", "Count all the sheep and tell them 'Hello'.") .Build(); @@ -1289,13 +1374,16 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { doc_store->Put(document_two)); // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); std::vector<ScoredDocumentHit> scored_document_hits = { - {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0}, - {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}}; + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; ResultSpecProto::TypePropertyMask type_property_mask; type_property_mask.set_schema_type("Email"); - type_property_mask.add_paths("subject"); + type_property_mask.add_paths("name"); type_property_mask.add_paths("nonExistentProperty"); std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; type_projection_tree_map.insert( @@ -1308,14 +1396,15 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { PageResultState page_result_state( std::move(scored_document_hits), /*next_page_token_in=*/1, std::move(snippet_context), std::move(type_projection_tree_map), - /*num_previously_returned_in=*/0); + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ResultRetriever> result_retriever, ResultRetriever::Create(doc_store.get(), schema_store_.get(), language_segmenter_.get(), normalizer_.get())); - // 3. Verify that the returned results only contain the 'subject' property. + // 3. Verify that the returned results only contain the 'name' property. ICING_ASSERT_OK_AND_ASSIGN( std::vector<SearchResultProto::ResultProto> result, result_retriever->RetrieveResults(page_result_state)); @@ -1326,7 +1415,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { .SetKey("namespace", "uri1") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Hello World!") + .AddStringProperty("name", "Hello World!") .Build(); EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); @@ -1335,8 +1424,498 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) { .SetKey("namespace", "uri2") .SetCreationTimestampMs(1000) .SetSchema("Email") - .AddStringProperty("subject", "Goodnight Moon!") + .AddStringProperty("name", "Goodnight Moon!") + .Build(); + EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverTest, ProjectionMultipleTypesNoWildcards) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ResultSpecProto::TypePropertyMask type_property_mask; + type_property_mask.set_schema_type("Email"); + type_property_mask.add_paths("name"); + std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; + type_projection_tree_map.insert( + {"Email", ProjectionTree(type_property_mask)}); + + SnippetContext snippet_context( + /*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::EXACT_ONLY); + PageResultState page_result_state( + std::move(scored_document_hits), /*next_page_token_in=*/1, + std::move(snippet_context), std::move(type_projection_tree_map), + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetriever> result_retriever, + ResultRetriever::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 3. Verify that the returned Email results only contain the 'name' + // property and the returned Person results have all of their properties. + ICING_ASSERT_OK_AND_ASSIGN( + std::vector<SearchResultProto::ResultProto> result, + result_retriever->RetrieveResults(page_result_state)); + ASSERT_THAT(result, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") .Build(); + EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcard) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ResultSpecProto::TypePropertyMask wildcard_type_property_mask; + wildcard_type_property_mask.set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask.add_paths("name"); + std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; + type_projection_tree_map.insert( + {std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(wildcard_type_property_mask)}); + + SnippetContext snippet_context( + /*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::EXACT_ONLY); + PageResultState page_result_state( + std::move(scored_document_hits), /*next_page_token_in=*/1, + std::move(snippet_context), std::move(type_projection_tree_map), + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetriever> result_retriever, + ResultRetriever::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 3. Verify that the returned Email results only contain the 'name' + // property and the returned Person results only contain the 'name' property. + ICING_ASSERT_OK_AND_ASSIGN( + std::vector<SearchResultProto::ResultProto> result, + result_retriever->RetrieveResults(page_result_state)); + ASSERT_THAT(result, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .Build(); + EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcardWithOneOverride) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ResultSpecProto::TypePropertyMask email_type_property_mask; + email_type_property_mask.set_schema_type("Email"); + email_type_property_mask.add_paths("body"); + ResultSpecProto::TypePropertyMask wildcard_type_property_mask; + wildcard_type_property_mask.set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask.add_paths("name"); + std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; + type_projection_tree_map.insert( + {"Email", ProjectionTree(email_type_property_mask)}); + type_projection_tree_map.insert( + {std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(wildcard_type_property_mask)}); + + SnippetContext snippet_context( + /*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::EXACT_ONLY); + PageResultState page_result_state( + std::move(scored_document_hits), /*next_page_token_in=*/1, + std::move(snippet_context), std::move(type_projection_tree_map), + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetriever> result_retriever, + ResultRetriever::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 3. Verify that the returned Email results only contain the 'body' + // property and the returned Person results only contain the 'name' property. + ICING_ASSERT_OK_AND_ASSIGN( + std::vector<SearchResultProto::ResultProto> result, + result_retriever->RetrieveResults(page_result_state)); + ASSERT_THAT(result, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .Build(); + EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverTest, ProjectionSingleTypesWildcardAndOverride) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .AddStringProperty("emailAddress", "mr.body123@gmail.com") + .Build()) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ResultSpecProto::TypePropertyMask email_type_property_mask; + email_type_property_mask.set_schema_type("Email"); + email_type_property_mask.add_paths("sender.name"); + ResultSpecProto::TypePropertyMask wildcard_type_property_mask; + wildcard_type_property_mask.set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask.add_paths("name"); + std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; + type_projection_tree_map.insert( + {"Email", ProjectionTree(email_type_property_mask)}); + type_projection_tree_map.insert( + {std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(wildcard_type_property_mask)}); + + SnippetContext snippet_context( + /*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::EXACT_ONLY); + PageResultState page_result_state( + std::move(scored_document_hits), /*next_page_token_in=*/1, + std::move(snippet_context), std::move(type_projection_tree_map), + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetriever> result_retriever, + ResultRetriever::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 3. Verify that the returned Email results only contain the 'sender.name' + // property and the returned Person results only contain the 'name' property. + ICING_ASSERT_OK_AND_ASSIGN( + std::vector<SearchResultProto::ResultProto> result, + result_retriever->RetrieveResults(page_result_state)); + ASSERT_THAT(result, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .Build()) + .Build(); + EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .Build(); + EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); +} + +TEST_F(ResultRetrieverTest, + ProjectionSingleTypesWildcardAndOverrideNestedProperty) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + // 1. Add two documents + DocumentProto document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddStringProperty("name", "Hello World!") + .AddStringProperty( + "body", "Oh what a beautiful morning! Oh what a beautiful day!") + .AddDocumentProperty( + "sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .AddStringProperty("emailAddress", "mr.body123@gmail.com") + .Build()) + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1, + doc_store->Put(document_one)); + + DocumentProto document_two = + DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .AddStringProperty("name", "Joe Fox") + .AddStringProperty("emailAddress", "ny152@aol.com") + .Build(); + ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2, + doc_store->Put(document_two)); + + // 2. Setup the scored results. + std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"), + GetSectionId("Email", "body")}; + SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids); + std::vector<ScoredDocumentHit> scored_document_hits = { + {document_id1, hit_section_id_mask, /*score=*/0}, + {document_id2, hit_section_id_mask, /*score=*/0}}; + + ResultSpecProto::TypePropertyMask email_type_property_mask; + email_type_property_mask.set_schema_type("Email"); + email_type_property_mask.add_paths("sender.name"); + ResultSpecProto::TypePropertyMask wildcard_type_property_mask; + wildcard_type_property_mask.set_schema_type( + std::string(ProjectionTree::kSchemaTypeWildcard)); + wildcard_type_property_mask.add_paths("sender"); + std::unordered_map<std::string, ProjectionTree> type_projection_tree_map; + type_projection_tree_map.insert( + {"Email", ProjectionTree(email_type_property_mask)}); + type_projection_tree_map.insert( + {std::string(ProjectionTree::kSchemaTypeWildcard), + ProjectionTree(wildcard_type_property_mask)}); + + SnippetContext snippet_context( + /*query_terms_in=*/{}, + ResultSpecProto::SnippetSpecProto::default_instance(), + TermMatchType::EXACT_ONLY); + PageResultState page_result_state( + std::move(scored_document_hits), /*next_page_token_in=*/1, + std::move(snippet_context), std::move(type_projection_tree_map), + /*num_previously_returned_in=*/0, + /*num_per_page_in=*/2); + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ResultRetriever> result_retriever, + ResultRetriever::Create(doc_store.get(), schema_store_.get(), + language_segmenter_.get(), normalizer_.get())); + + // 3. Verify that the returned Email results only contain the 'sender.name' + // property and the returned Person results contain no properties. + ICING_ASSERT_OK_AND_ASSIGN( + std::vector<SearchResultProto::ResultProto> result, + result_retriever->RetrieveResults(page_result_state)); + ASSERT_THAT(result, SizeIs(2)); + + DocumentProto projected_document_one = + DocumentBuilder() + .SetKey("namespace", "uri1") + .SetCreationTimestampMs(1000) + .SetSchema("Email") + .AddDocumentProperty("sender", + DocumentBuilder() + .SetKey("namespace", "uri") + .SetSchema("Person") + .AddStringProperty("name", "Mr. Body") + .Build()) + .Build(); + EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one)); + + DocumentProto projected_document_two = DocumentBuilder() + .SetKey("namespace", "uri2") + .SetCreationTimestampMs(1000) + .SetSchema("Person") + .Build(); EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two)); } diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc index 4488409..0f27d9e 100644 --- a/icing/result/result-state-manager.cc +++ b/icing/result/result-state-manager.cc @@ -39,6 +39,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) { // Gets the number before calling GetNextPage() because num_returned() may // change after returning more results. int num_previously_returned = result_state.num_returned(); + int num_per_page = result_state.num_per_page(); std::vector<ScoredDocumentHit> page_result_document_hits = result_state.GetNextPage(); @@ -52,7 +53,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) { return PageResultState( std::move(page_result_document_hits), kInvalidNextPageToken, std::move(snippet_context_copy), std::move(projection_tree_map_copy), - num_previously_returned); + num_previously_returned, num_per_page); } absl_ports::unique_lock l(&mutex_); @@ -63,7 +64,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) { return PageResultState(std::move(page_result_document_hits), next_page_token, std::move(snippet_context_copy), std::move(projection_tree_map_copy), - num_previously_returned); + num_previously_returned, num_per_page); } uint64_t ResultStateManager::Add(ResultState result_state) { @@ -88,6 +89,7 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage( } int num_returned = state_iterator->second.num_returned(); + int num_per_page = state_iterator->second.num_per_page(); std::vector<ScoredDocumentHit> result_of_page = state_iterator->second.GetNextPage(); if (result_of_page.empty()) { @@ -110,9 +112,9 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage( next_page_token = kInvalidNextPageToken; } - return PageResultState(result_of_page, next_page_token, - std::move(snippet_context_copy), - std::move(projection_tree_map_copy), num_returned); + return PageResultState( + result_of_page, next_page_token, std::move(snippet_context_copy), + std::move(projection_tree_map_copy), num_returned, num_per_page); } void ResultStateManager::InvalidateResultState(uint64_t next_page_token) { diff --git a/icing/result/result-state.h b/icing/result/result-state.h index de36b40..be92b85 100644 --- a/icing/result/result-state.h +++ b/icing/result/result-state.h @@ -15,6 +15,7 @@ #ifndef ICING_RESULT_RESULT_STATE_H_ #define ICING_RESULT_RESULT_STATE_H_ +#include <iostream> #include <vector> #include "icing/proto/scoring.pb.h" @@ -60,6 +61,8 @@ class ResultState { return projection_tree_map_; } + int num_per_page() const { return num_per_page_; } + // The number of results that have already been returned. This number is // increased when GetNextPage() is called. int num_returned() const { return num_returned_; } diff --git a/icing/store/corpus-id.h b/icing/store/corpus-id.h new file mode 100644 index 0000000..a8f21ba --- /dev/null +++ b/icing/store/corpus-id.h @@ -0,0 +1,30 @@ +// Copyright (C) 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_STORE_CORPUS_ID_H_ +#define ICING_STORE_CORPUS_ID_H_ + +#include <cstdint> + +namespace icing { +namespace lib { + +// Identifier for corpus, i.e. a <namespace, schema_type> pair>, in +// DocumentProto. Generated in DocumentStore. +using CorpusId = int32_t; + +} // namespace lib +} // namespace icing + +#endif // ICING_STORE_CORPUS_ID_H_ diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc index ce41715..6a664a3 100644 --- a/icing/store/document-store.cc +++ b/icing/store/document-store.cc @@ -37,9 +37,11 @@ #include "icing/proto/document_wrapper.pb.h" #include "icing/proto/logging.pb.h" #include "icing/schema/schema-store.h" +#include "icing/store/corpus-id.h" #include "icing/store/document-associated-score-data.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/enable-bm25f.h" #include "icing/store/key-mapper.h" #include "icing/store/namespace-id.h" #include "icing/util/clock.h" @@ -62,12 +64,14 @@ constexpr char kScoreCacheFilename[] = "score_cache"; constexpr char kFilterCacheFilename[] = "filter_cache"; constexpr char kNamespaceMapperFilename[] = "namespace_mapper"; constexpr char kUsageStoreDirectoryName[] = "usage_store"; +constexpr char kCorpusIdMapperFilename[] = "corpus_mapper"; constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB // 384 KiB for a KeyMapper would allow each internal array to have a max of // 128 KiB for storage. constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB +constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) { DocumentWrapper document_wrapper; @@ -130,6 +134,10 @@ std::string MakeUsageStoreDirectoryName(const std::string& base_dir) { return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName); } +std::string MakeCorpusMapperFilename(const std::string& base_dir) { + return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename); +} + // TODO(adorokhine): This class internally uses an 8-byte fingerprint of the // Key and stores the key/value in a file-backed-trie that adds an ~80 byte // overhead per key. As we know that these fingerprints are always 8-bytes in @@ -358,6 +366,13 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() { usage_store_, UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_))); + if (enableBm25f()) { + ICING_ASSIGN_OR_RETURN( + corpus_mapper_, KeyMapper<CorpusId>::Create( + *filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize)); + } + // Ensure the usage store is the correct size. ICING_RETURN_IF_ERROR( usage_store_->TruncateTo(document_id_mapper_->num_elements())); @@ -377,6 +392,9 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() { ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache()); ICING_RETURN_IF_ERROR(ResetFilterCache()); ICING_RETURN_IF_ERROR(ResetNamespaceMapper()); + if (enableBm25f()) { + ICING_RETURN_IF_ERROR(ResetCorpusMapper()); + } // Creates a new UsageStore instance. Note that we don't reset the data in // usage store here because we're not able to regenerate the usage scores. @@ -518,6 +536,14 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() { namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(), namespace_mapper_->num_keys())); + if (enableBm25f()) { + // Update corpus maps + std::string corpus = + MakeFingerprint(document_wrapper.document().namespace_(), + document_wrapper.document().schema()); + corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()); + } + int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs( document_wrapper.document().creation_timestamp_ms(), document_wrapper.document().ttl_ms()); @@ -644,6 +670,27 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() { return libtextclassifier3::Status::OK; } +libtextclassifier3::Status DocumentStore::ResetCorpusMapper() { + if (enableBm25f()) { + // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset(). + corpus_mapper_.reset(); + // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR + // that can support error logging. + libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete( + *filesystem_, MakeCorpusMapperFilename(base_dir_)); + if (!status.ok()) { + ICING_LOG(ERROR) << status.error_message() + << "Failed to delete old corpus_id mapper"; + return status; + } + ICING_ASSIGN_OR_RETURN( + corpus_mapper_, KeyMapper<CorpusId>::Create( + *filesystem_, MakeCorpusMapperFilename(base_dir_), + kCorpusMapperMaxSize)); + } + return libtextclassifier3::Status::OK; +} + libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const { Crc32 total_checksum; @@ -697,6 +744,10 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const { total_checksum.Append(std::to_string(score_cache_checksum.Get())); total_checksum.Append(std::to_string(filter_cache_checksum.Get())); total_checksum.Append(std::to_string(namespace_mapper_checksum.Get())); + if (enableBm25f()) { + Crc32 corpus_mapper_checksum = corpus_mapper_->ComputeChecksum(); + total_checksum.Append(std::to_string(corpus_mapper_checksum.Get())); + } return total_checksum; } @@ -787,6 +838,12 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put( NamespaceId namespace_id, namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys())); + if (enableBm25f()) { + // Update corpus maps + ICING_RETURN_IF_ERROR(corpus_mapper_->GetOrPut( + MakeFingerprint(name_space, schema), corpus_mapper_->num_keys())); + } + ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id, schema_store_->GetSchemaTypeId(schema)); @@ -1029,6 +1086,14 @@ libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId( return namespace_mapper_->Get(name_space); } +libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId( + const std::string_view name_space, const std::string_view schema) const { + if (enableBm25f()) { + return corpus_mapper_->Get(MakeFingerprint(name_space, schema)); + } + return absl_ports::NotFoundError("corpus_mapper disabled"); +} + libtextclassifier3::StatusOr<DocumentAssociatedScoreData> DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const { auto score_data_or = score_cache_->Get(document_id); @@ -1077,17 +1142,18 @@ libtextclassifier3::Status DocumentStore::ReportUsage( return usage_store_->AddUsageReport(usage_report, document_id); } -libtextclassifier3::Status DocumentStore::DeleteByNamespace( +DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace( std::string_view name_space, bool soft_delete) { + DeleteByGroupResult result; auto namespace_id_or = namespace_mapper_->Get(name_space); if (!namespace_id_or.ok()) { - return absl_ports::Annotate( + result.status = absl_ports::Annotate( namespace_id_or.status(), absl_ports::StrCat("Failed to find namespace: ", name_space)); + return result; } NamespaceId namespace_id = namespace_id_or.ValueOrDie(); - int num_updated_documents = 0; if (soft_delete) { // To delete an entire namespace, we append a tombstone that only contains // the deleted bit and the name of the deleted namespace. @@ -1100,36 +1166,43 @@ libtextclassifier3::Status DocumentStore::DeleteByNamespace( ICING_LOG(ERROR) << status.error_message() << "Failed to delete namespace. namespace = " << name_space; - return status; + result.status = std::move(status); + return result; } } - ICING_ASSIGN_OR_RETURN( - num_updated_documents, - BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete)); + auto num_deleted_or = + BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete); + if (!num_deleted_or.ok()) { + result.status = std::move(num_deleted_or).status(); + return result; + } - if (num_updated_documents <= 0) { + result.num_docs_deleted = num_deleted_or.ValueOrDie(); + if (result.num_docs_deleted <= 0) { // Treat the fact that no existing documents had this namespace to be the // same as this namespace not existing at all. - return absl_ports::NotFoundError( + result.status = absl_ports::NotFoundError( absl_ports::StrCat("Namespace '", name_space, "' doesn't exist")); + return result; } - return libtextclassifier3::Status::OK; + return result; } -libtextclassifier3::Status DocumentStore::DeleteBySchemaType( +DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType( std::string_view schema_type, bool soft_delete) { + DeleteByGroupResult result; auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type); if (!schema_type_id_or.ok()) { - return absl_ports::Annotate( + result.status = absl_ports::Annotate( schema_type_id_or.status(), absl_ports::StrCat("Failed to find schema type. schema_type: ", schema_type)); + return result; } SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie(); - int num_updated_documents = 0; if (soft_delete) { // To soft-delete an entire schema type, we append a tombstone that only // contains the deleted bit and the name of the deleted schema type. @@ -1142,20 +1215,26 @@ libtextclassifier3::Status DocumentStore::DeleteBySchemaType( ICING_LOG(ERROR) << status.error_message() << "Failed to delete schema_type. schema_type = " << schema_type; - return status; + result.status = std::move(status); + return result; } } - ICING_ASSIGN_OR_RETURN( - num_updated_documents, - BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete)); + auto num_deleted_or = + BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete); + if (!num_deleted_or.ok()) { + result.status = std::move(num_deleted_or).status(); + return result; + } - if (num_updated_documents <= 0) { - return absl_ports::NotFoundError(absl_ports::StrCat( + result.num_docs_deleted = num_deleted_or.ValueOrDie(); + if (result.num_docs_deleted <= 0) { + result.status = absl_ports::NotFoundError(absl_ports::StrCat( "No documents found with schema type '", schema_type, "'")); + return result; } - return libtextclassifier3::Status::OK; + return result; } libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete( @@ -1230,6 +1309,10 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() { ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk()); ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk()); + if (enableBm25f()) { + ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk()); + } + // Update the combined checksum and write to header file. ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum()); ICING_RETURN_IF_ERROR(UpdateHeader(checksum)); @@ -1251,9 +1334,16 @@ libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const { ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage, namespace_mapper_->GetDiskUsage()); - return document_log_disk_usage + document_key_mapper_disk_usage + - document_id_mapper_disk_usage + score_cache_disk_usage + - filter_cache_disk_usage + namespace_mapper_disk_usage; + int64_t disk_usage = document_log_disk_usage + + document_key_mapper_disk_usage + + document_id_mapper_disk_usage + score_cache_disk_usage + + filter_cache_disk_usage + namespace_mapper_disk_usage; + if (enableBm25f()) { + ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage, + corpus_mapper_->GetDiskUsage()); + disk_usage += corpus_mapper_disk_usage; + } + return disk_usage; } libtextclassifier3::Status DocumentStore::UpdateSchemaStore( diff --git a/icing/store/document-store.h b/icing/store/document-store.h index 114fa13..78590a5 100644 --- a/icing/store/document-store.h +++ b/icing/store/document-store.h @@ -30,6 +30,7 @@ #include "icing/proto/document_wrapper.pb.h" #include "icing/proto/logging.pb.h" #include "icing/schema/schema-store.h" +#include "icing/store/corpus-id.h" #include "icing/store/document-associated-score-data.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" @@ -71,6 +72,15 @@ class DocumentStore { int32_t optimizable_docs = 0; }; + struct DeleteByGroupResult { + // Status representing whether or not the operation succeeded. See the + // comments above the function that returns this result to determine what + // possible statuses could be returned. + libtextclassifier3::Status status; + + int num_docs_deleted = 0; + }; + struct CreateResult { // A successfully initialized document store. std::unique_ptr<DocumentStore> document_store; @@ -221,6 +231,15 @@ class DocumentStore { libtextclassifier3::StatusOr<NamespaceId> GetNamespaceId( std::string_view name_space) const; + // Returns the CorpusId associated with the given namespace and schema. + // + // Returns: + // A CorpusId on success + // NOT_FOUND if the key doesn't exist + // INTERNAL_ERROR on IO error + libtextclassifier3::StatusOr<CorpusId> GetCorpusId( + const std::string_view name_space, const std::string_view schema) const; + // Returns the DocumentAssociatedScoreData of the document specified by the // DocumentId. // @@ -284,8 +303,8 @@ class DocumentStore { // OK on success // NOT_FOUND if namespace doesn't exist // INTERNAL_ERROR on IO error - libtextclassifier3::Status DeleteByNamespace(std::string_view name_space, - bool soft_delete = false); + DeleteByGroupResult DeleteByNamespace(std::string_view name_space, + bool soft_delete = false); // Deletes all documents belonging to the given schema type. The documents // will be marked as deleted if 'soft_delete' is true, otherwise they will be @@ -301,8 +320,8 @@ class DocumentStore { // OK on success // NOT_FOUND if schema_type doesn't exist // INTERNAL_ERROR on IO error - libtextclassifier3::Status DeleteBySchemaType(std::string_view schema_type, - bool soft_delete = false); + DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type, + bool soft_delete = false); // Syncs all the data and metadata changes to disk. // @@ -438,6 +457,12 @@ class DocumentStore { // DocumentStore. Namespaces may be removed from the mapper during compaction. std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_; + // Maps a corpus, i.e. a (namespace, schema type) pair, to a densely-assigned + // unique id. A coprus is assigned an + // id when the first document belonging to that corpus is added to the + // DocumentStore. Corpus ids may be removed from the mapper during compaction. + std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_; + // A storage class that caches all usage scores. Usage scores are not // considered as ground truth. Usage scores are associated with document ids // so they need to be updated when document ids change. @@ -503,6 +528,12 @@ class DocumentStore { // Returns OK or any IO errors. libtextclassifier3::Status ResetNamespaceMapper(); + // Resets the unique_ptr to the corpus_mapper, deletes the underlying file, + // and re-creates a new instance of the corpus_mapper. + // + // Returns OK or any IO errors. + libtextclassifier3::Status ResetCorpusMapper(); + // Checks if the header exists already. This does not create the header file // if it doesn't exist. bool HeaderExists(); diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc index 4d8ac10..29bf8bb 100644 --- a/icing/store/document-store_test.cc +++ b/icing/store/document-store_test.cc @@ -33,9 +33,12 @@ #include "icing/schema/schema-store.h" #include "icing/store/document-filter-data.h" #include "icing/store/document-id.h" +#include "icing/store/enable-bm25f.h" #include "icing/store/namespace-id.h" #include "icing/testing/common-matchers.h" #include "icing/testing/fake-clock.h" +#include "icing/testing/platform.h" +#include "icing/testing/test-data.h" #include "icing/testing/tmp-directory.h" #include "icing/util/crc32.h" @@ -98,6 +101,7 @@ class DocumentStoreTest : public ::testing::Test { } void SetUp() override { + setEnableBm25f(true); filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()); filesystem_.CreateDirectoryRecursively(test_dir_.c_str()); filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()); @@ -471,8 +475,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) { // DELETE namespace.1. document1 and document 4 should be deleted. document2 // and document3 should still be retrievable. - ICING_EXPECT_OK( - doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true)); + DocumentStore::DeleteByGroupResult group_result = + doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()), @@ -513,8 +519,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) { // DELETE namespace.1. document1 and document 4 should be deleted. document2 // and document3 should still be retrievable. - ICING_EXPECT_OK( - doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false)); + DocumentStore::DeleteByGroupResult group_result = + doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()), @@ -538,8 +546,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) { int64_t ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace", - /*soft_delete=*/true), + EXPECT_THAT(doc_store + ->DeleteByNamespace("nonexistent_namespace", + /*soft_delete=*/true) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t ground_truth_size_after = filesystem_.GetFileSize( @@ -560,8 +570,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) { int64_t ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace", - /*soft_delete=*/false), + EXPECT_THAT(doc_store + ->DeleteByNamespace("nonexistent_namespace", + /*soft_delete=*/false) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t ground_truth_size_after = filesystem_.GetFileSize( @@ -584,8 +596,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) { // At this point, there are no existing documents with the namespace, even // though Icing's derived files know about this namespace. We should still // return NOT_FOUND since nothing existing has this namespace. - EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(), - /*soft_delete=*/true), + EXPECT_THAT(document_store + ->DeleteByNamespace(test_document1_.namespace_(), + /*soft_delete=*/true) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } @@ -604,8 +618,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) { // At this point, there are no existing documents with the namespace, even // though Icing's derived files know about this namespace. We should still // return NOT_FOUND since nothing existing has this namespace. - EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(), - /*soft_delete=*/false), + EXPECT_THAT(document_store + ->DeleteByNamespace(test_document1_.namespace_(), + /*soft_delete=*/false) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } @@ -642,7 +658,10 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) { // DELETE namespace.1. document1 and document 4 should be deleted. document2 // and document3 should still be retrievable. - ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1")); + DocumentStore::DeleteByGroupResult group_result = + doc_store->DeleteByNamespace("namespace.1"); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); @@ -741,8 +760,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) { // Delete the "email" type and ensure that it works across both // email_document's namespaces. And that other documents aren't affected. - ICING_EXPECT_OK( - document_store->DeleteBySchemaType("email", /*soft_delete=*/true)); + DocumentStore::DeleteByGroupResult group_result = + document_store->DeleteBySchemaType("email", /*soft_delete=*/true); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); EXPECT_THAT(document_store->Get(email_1_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(document_store->Get(email_2_document_id), @@ -753,8 +774,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) { IsOkAndHolds(EqualsProto(person_document))); // Delete the "message" type and check that other documents aren't affected - ICING_EXPECT_OK( - document_store->DeleteBySchemaType("message", /*soft_delete=*/true)); + group_result = + document_store->DeleteBySchemaType("message", /*soft_delete=*/true); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); EXPECT_THAT(document_store->Get(email_1_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(document_store->Get(email_2_document_id), @@ -824,8 +847,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) { // Delete the "email" type and ensure that it works across both // email_document's namespaces. And that other documents aren't affected. - ICING_EXPECT_OK( - document_store->DeleteBySchemaType("email", /*soft_delete=*/false)); + DocumentStore::DeleteByGroupResult group_result = + document_store->DeleteBySchemaType("email", /*soft_delete=*/true); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(2)); EXPECT_THAT(document_store->Get(email_1_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(document_store->Get(email_2_document_id), @@ -836,8 +861,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) { IsOkAndHolds(EqualsProto(person_document))); // Delete the "message" type and check that other documents aren't affected - ICING_EXPECT_OK( - document_store->DeleteBySchemaType("message", /*soft_delete=*/false)); + group_result = + document_store->DeleteBySchemaType("message", /*soft_delete=*/true); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); EXPECT_THAT(document_store->Get(email_1_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); EXPECT_THAT(document_store->Get(email_2_document_id), @@ -861,8 +888,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { int64_t ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type", - /*soft_delete=*/true), + EXPECT_THAT(document_store + ->DeleteBySchemaType("nonexistent_type", + /*soft_delete=*/true) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t ground_truth_size_after = filesystem_.GetFileSize( @@ -884,8 +913,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) { int64_t ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); - EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type", - /*soft_delete=*/false), + EXPECT_THAT(document_store + ->DeleteBySchemaType("nonexistent_type", + /*soft_delete=*/false) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); int64_t ground_truth_size_after = filesystem_.GetFileSize( @@ -906,8 +937,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) { ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(), test_document1_.uri())); - EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(), - /*soft_delete=*/true), + EXPECT_THAT(document_store + ->DeleteBySchemaType(test_document1_.schema(), + /*soft_delete=*/true) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } @@ -923,8 +956,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) { ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(), test_document1_.uri())); - EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(), - /*soft_delete=*/false), + EXPECT_THAT(document_store + ->DeleteBySchemaType(test_document1_.schema(), + /*soft_delete=*/false) + .status, StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } @@ -973,7 +1008,10 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) { document_store->Put(message_document)); // Delete "email". "message" documents should still be retrievable. - ICING_EXPECT_OK(document_store->DeleteBySchemaType("email")); + DocumentStore::DeleteByGroupResult group_result = + document_store->DeleteBySchemaType("email"); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); ground_truth_size_before = filesystem_.GetFileSize( absl_ports::StrCat(document_store_dir_, "/document_log").c_str()); @@ -1054,7 +1092,10 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) { document_store->Put(message_document)); // Delete "email". "message" documents should still be retrievable. - ICING_EXPECT_OK(document_store->DeleteBySchemaType("email")); + DocumentStore::DeleteByGroupResult group_result = + document_store->DeleteBySchemaType("email"); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); EXPECT_THAT(document_store->Get(email_document_id), StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); @@ -1461,6 +1502,12 @@ TEST_F(DocumentStoreTest, GetNamespaceId) { // DocumentStore EXPECT_THAT(doc_store->GetNamespaceId("namespace2"), IsOkAndHolds(Eq(1))); + // DELETE namespace1 - document_namespace1 is deleted. + DocumentStore::DeleteByGroupResult group_result = + doc_store->DeleteByNamespace("namespace1"); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); + // NamespaceMapper doesn't care if the document has been deleted EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0))); } @@ -1497,6 +1544,106 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) { StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); } +TEST_F(DocumentStoreTest, GetCorpusIdReturnsNotFoundWhenFeatureIsDisabled) { + setEnableBm25f(false); + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + DocumentProto document1 = + DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build(); + DocumentProto document2 = + DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build(); + + ICING_ASSERT_OK(doc_store->Put(document1)); + ICING_ASSERT_OK(doc_store->Put(document2)); + + EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND, + HasSubstr("corpus_mapper disabled"))); +} + +TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + DocumentProto document1 = + DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build(); + DocumentProto document2 = + DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build(); + + ICING_ASSERT_OK(doc_store->Put(document1)); + ICING_ASSERT_OK(doc_store->Put(document2)); + + // NamespaceId of 0 since it was the first namespace seen by the DocumentStore + EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"), + IsOkAndHolds(Eq(0))); +} + +TEST_F(DocumentStoreTest, GetCorpusId) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + DocumentProto document_corpus1 = + DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build(); + DocumentProto document_corpus2 = + DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build(); + + ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus1))); + ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus2))); + + // CorpusId of 0 since it was the first corpus seen by the DocumentStore + EXPECT_THAT(doc_store->GetCorpusId("namespace1", "email"), + IsOkAndHolds(Eq(0))); + + // CorpusId of 1 since it was the second corpus seen by the + // DocumentStore + EXPECT_THAT(doc_store->GetCorpusId("namespace2", "email"), + IsOkAndHolds(Eq(1))); + + // DELETE namespace1 - document_corpus1 is deleted. + DocumentStore::DeleteByGroupResult group_result = + doc_store->DeleteByNamespace("namespace1"); + EXPECT_THAT(group_result.status, IsOk()); + EXPECT_THAT(group_result.num_docs_deleted, Eq(1)); + + // CorpusMapper doesn't care if the document has been deleted + EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0))); +} + +TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get())); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + + EXPECT_THAT( + doc_store->GetCorpusId("nonexistent_namespace", "nonexistent_schema"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + + DocumentProto document_corpus = + DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build(); + ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus))); + + EXPECT_THAT(doc_store->GetCorpusId("nonexistent_namespace", "email"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + EXPECT_THAT(doc_store->GetCorpusId("namespace1", "nonexistent_schema"), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); +} + TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) { ICING_ASSERT_OK_AND_ASSIGN( DocumentStore::CreateResult create_result, @@ -2996,6 +3143,54 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) { ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE)); } +TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) { + // The directory testdata/v0/document_store contains only the scoring_cache + // and the document_store_header (holding the crc for the scoring_cache). If + // the current code is compatible with the format of the v0 scoring_cache, + // then an empty document store should be initialized, but the non-empty + // scoring_cache should be retained. + // Since the current document-asscoiated-score-data is compatible with the + // score_cache in testdata/v0/document_store, the document store should be + // initialized without having to re-generate the derived files. + + // Create dst directory + ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true); + + // Get src files + std::string document_store_v0; + if (IsAndroidPlatform() || IsIosPlatform()) { + document_store_v0 = GetTestFilePath( + "icing/testdata/v0/document_store_android_ios_compatible"); + } else { + document_store_v0 = + GetTestFilePath("icing/testdata/v0/document_store"); + } + std::vector<std::string> document_store_files; + Filesystem filesystem; + filesystem.ListDirectory(document_store_v0.c_str(), &document_store_files); + + VLOG(1) << "Copying files " << document_store_v0 << ' ' + << document_store_files.size(); + for (size_t i = 0; i != document_store_files.size(); i++) { + std::string src = + absl_ports::StrCat(document_store_v0, "/", document_store_files[i]); + std::string dst = + absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]); + ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true); + } + + NativeInitializeStats initializeStats; + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_, + schema_store_.get(), &initializeStats)); + std::unique_ptr<DocumentStore> doc_store = + std::move(create_result.document_store); + // Regeneration never happens. + EXPECT_EQ(initializeStats.document_store_recovery_cause(), + NativeInitializeStats::NONE); +} + } // namespace } // namespace lib diff --git a/icing/store/enable-bm25f.h b/icing/store/enable-bm25f.h new file mode 100644 index 0000000..cee94d1 --- /dev/null +++ b/icing/store/enable-bm25f.h @@ -0,0 +1,31 @@ +// Copyright (C) 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_STORE_ENABLE_BM25F_H_ +#define ICING_STORE_ENABLE_BM25F_H_ + +namespace icing { +namespace lib { + +inline bool enable_bm25f_ = false; + +inline bool enableBm25f() { return enable_bm25f_; } + +// Setter for testing purposes. It should never be called in production code. +inline void setEnableBm25f(bool enable_bm25f) { enable_bm25f_ = enable_bm25f; } + +} // namespace lib +} // namespace icing + +#endif // ICING_STORE_ENABLE_BM25F_H_ diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h index 225b498..a15e64e 100644 --- a/icing/testing/common-matchers.h +++ b/icing/testing/common-matchers.h @@ -23,6 +23,8 @@ #include "icing/absl_ports/str_join.h" #include "icing/index/hit/doc-hit-info.h" #include "icing/legacy/core/icing-string-util.h" +#include "icing/proto/search.proto.h" +#include "icing/proto/search.pb.h" #include "icing/schema/schema-store.h" #include "icing/schema/section.h" #include "icing/util/status-macros.h" @@ -65,15 +67,15 @@ MATCHER_P2(EqualsDocHitInfoWithTermFrequency, document_id, SectionIdMask section_mask = kSectionIdMaskNone; bool term_frequency_as_expected = true; - std::vector<Hit::Score> expected_tfs; - std::vector<Hit::Score> actual_tfs; + std::vector<Hit::TermFrequency> expected_tfs; + std::vector<Hit::TermFrequency> actual_tfs; for (auto itr = section_ids_to_term_frequencies_map.begin(); itr != section_ids_to_term_frequencies_map.end(); itr++) { SectionId section_id = itr->first; section_mask |= 1U << section_id; expected_tfs.push_back(itr->second); - actual_tfs.push_back(actual.max_hit_score(section_id)); - if (actual.max_hit_score(section_id) != itr->second) { + actual_tfs.push_back(actual.hit_term_frequency(section_id)); + if (actual.hit_term_frequency(section_id) != itr->second) { term_frequency_as_expected = false; } } @@ -372,6 +374,18 @@ MATCHER_P2(ProtoStatusIs, status_code, error_matcher, "") { return ExplainMatchResult(error_matcher, arg.message(), result_listener); } +MATCHER_P(EqualsSearchResultIgnoreStats, expected, "") { + SearchResultProto actual_copy = arg; + actual_copy.clear_query_stats(); + actual_copy.clear_debug_info(); + + SearchResultProto expected_copy = expected; + expected_copy.clear_query_stats(); + expected_copy.clear_debug_info(); + return ExplainMatchResult(testing::EqualsProto(expected_copy), actual_copy, + result_listener); +} + // TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status #define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \ ICING_STATUS_MACROS_CONCAT_IMPL(x, y) diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc index eba1dfa..7ad8a64 100644 --- a/icing/testing/hit-test-utils.cc +++ b/icing/testing/hit-test-utils.cc @@ -19,17 +19,18 @@ namespace lib { // Returns a hit that has a delta of desired_byte_length from last_hit. Hit CreateHit(Hit last_hit, int desired_byte_length) { - Hit hit = - (last_hit.section_id() == kMinSectionId) - ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score()) - : Hit(last_hit.section_id() - 1, last_hit.document_id(), - last_hit.score()); + Hit hit = (last_hit.section_id() == kMinSectionId) + ? Hit(kMaxSectionId, last_hit.document_id() + 1, + last_hit.term_frequency()) + : Hit(last_hit.section_id() - 1, last_hit.document_id(), + last_hit.term_frequency()); uint8_t buf[5]; while (VarInt::Encode(last_hit.value() - hit.value(), buf) < desired_byte_length) { hit = (hit.section_id() == kMinSectionId) - ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score()) - : Hit(hit.section_id() - 1, hit.document_id(), hit.score()); + ? Hit(kMaxSectionId, hit.document_id() + 1, hit.term_frequency()) + : Hit(hit.section_id() - 1, hit.document_id(), + hit.term_frequency()); } return hit; } @@ -43,7 +44,7 @@ std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits, return hits; } hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid, - Hit::kDefaultHitScore)); + Hit::kDefaultTermFrequency)); while (hits.size() < num_hits) { hits.push_back(CreateHit(hits.back(), desired_byte_length)); } diff --git a/icing/testing/platform.h b/icing/testing/platform.h index 7b7f212..ad612d5 100644 --- a/icing/testing/platform.h +++ b/icing/testing/platform.h @@ -36,6 +36,22 @@ inline bool IsReverseJniTokenization() { return false; } +// Whether the running test is an Android test. +inline bool IsAndroidPlatform() { +#if defined(__ANDROID__) + return true; +#endif // defined(__ANDROID__) + return false; +} + +// Whether the running test is an iOS test. +inline bool IsIosPlatform() { +#if defined(__APPLE__) + return true; +#endif // defined(__APPLE__) + return false; +} + } // namespace lib } // namespace icing diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h index 863f43f..78430cc 100644 --- a/icing/testing/schema-generator.h +++ b/icing/testing/schema-generator.h @@ -40,6 +40,7 @@ class ExactStringPropertyGenerator { } }; +// Schema generator with random number of properties template <typename Rand, typename PropertyGenerator> class RandomSchemaGenerator { public: @@ -71,6 +72,37 @@ class RandomSchemaGenerator { PropertyGenerator* prop_generator_; }; +// Schema generator with number of properties specified by the caller +template <typename PropertyGenerator> +class SchemaGenerator { + public: + explicit SchemaGenerator(int num_properties, + PropertyGenerator* prop_generator) + : num_properties_(num_properties), prop_generator_(prop_generator) {} + + SchemaProto GenerateSchema(int num_types) { + SchemaProto schema; + while (--num_types >= 0) { + SetType(schema.add_types(), "Type" + std::to_string(num_types), + num_properties_); + } + return schema; + } + + private: + void SetType(SchemaTypeConfigProto* type_config, std::string_view name, + int num_properties) const { + type_config->set_schema_type(name.data(), name.length()); + while (--num_properties >= 0) { + std::string prop_name = "Prop" + std::to_string(num_properties); + (*type_config->add_properties()) = (*prop_generator_)(prop_name); + } + } + + int num_properties_; + PropertyGenerator* prop_generator_; +}; + } // namespace lib } // namespace icing diff --git a/icing/util/clock.h b/icing/util/clock.h index 06f1c9d..2bb7818 100644 --- a/icing/util/clock.h +++ b/icing/util/clock.h @@ -36,17 +36,22 @@ int64_t GetSteadyTimeMilliseconds(); class Timer { public: // Creates and starts the timer. - Timer() : start_timestamp_milliseconds_(GetSteadyTimeMilliseconds()) {} + Timer() : start_timestamp_nanoseconds_(GetSteadyTimeNanoseconds()) {} virtual ~Timer() = default; // Returns the elapsed time from when timer started. virtual int64_t GetElapsedMilliseconds() { - return GetSteadyTimeMilliseconds() - start_timestamp_milliseconds_; + return GetElapsedNanoseconds() / 1000000; + } + + // Returns the elapsed time from when timer started. + virtual int64_t GetElapsedNanoseconds() { + return GetSteadyTimeNanoseconds() - start_timestamp_nanoseconds_; } private: - int64_t start_timestamp_milliseconds_; + int64_t start_timestamp_nanoseconds_; }; // Wrapper around real-time clock functions. This is separated primarily so |