aboutsummaryrefslogtreecommitdiff
path: root/icing
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2020-12-29 00:23:52 +0000
committerTim Barron <tjbarron@google.com>2021-01-05 19:03:14 +0000
commit59c2caa38fd8dca3760dad751f4f8e5de8be25f5 (patch)
tree28b5f15e8bbfb73eb7470bef37f554ea4f4847a7 /icing
parent282a5708af10879b12a09a59ad5bbfa253b1e92a (diff)
downloadicing-59c2caa38fd8dca3760dad751f4f8e5de8be25f5.tar.gz
Update Icing from upstream.
Change-Id: Iff50aebffb83529b0454e7c3a6dc6864e7a85f4a
Diffstat (limited to 'icing')
-rw-r--r--icing/file/filesystem.cc14
-rw-r--r--icing/file/filesystem.h3
-rw-r--r--icing/file/mock-filesystem.h2
-rw-r--r--icing/icing-search-engine-with-icu-file_test.cc4
-rw-r--r--icing/icing-search-engine.cc123
-rw-r--r--icing/icing-search-engine.h17
-rw-r--r--icing/icing-search-engine_benchmark.cc177
-rw-r--r--icing/icing-search-engine_test.cc601
-rw-r--r--icing/index/hit/doc-hit-info.cc22
-rw-r--r--icing/index/hit/doc-hit-info.h36
-rw-r--r--icing/index/hit/doc-hit-info_test.cc87
-rw-r--r--icing/index/hit/hit.cc19
-rw-r--r--icing/index/hit/hit.h42
-rw-r--r--icing/index/hit/hit_test.cc81
-rw-r--r--icing/index/index-processor_test.cc14
-rw-r--r--icing/index/index.cc4
-rw-r--r--icing/index/index.h2
-rw-r--r--icing/index/lite/lite-index.cc4
-rw-r--r--icing/index/lite/term-id-hit-pair.h25
-rw-r--r--icing/index/main/doc-hit-info-iterator-term-main.cc6
-rw-r--r--icing/index/main/flash-index-storage_test.cc88
-rw-r--r--icing/index/main/index-block.cc2
-rw-r--r--icing/index/main/index-block_test.cc56
-rw-r--r--icing/index/main/main-index-merger.cc57
-rw-r--r--icing/index/main/main-index-merger_test.cc53
-rw-r--r--icing/index/main/main-index.cc3
-rw-r--r--icing/index/main/main-index_test.cc42
-rw-r--r--icing/index/main/posting-list-accessor_test.cc10
-rw-r--r--icing/index/main/posting-list-used.cc115
-rw-r--r--icing/index/main/posting-list-used.h33
-rw-r--r--icing/index/main/posting-list-used_test.cc96
-rw-r--r--icing/jni/icing-search-engine-jni.cc100
-rw-r--r--icing/result/page-result-state.h9
-rw-r--r--icing/result/projection-tree.h2
-rw-r--r--icing/result/result-retriever.cc10
-rw-r--r--icing/result/result-retriever_test.cc791
-rw-r--r--icing/result/result-state-manager.cc12
-rw-r--r--icing/result/result-state.h3
-rw-r--r--icing/store/corpus-id.h30
-rw-r--r--icing/store/document-store.cc136
-rw-r--r--icing/store/document-store.h39
-rw-r--r--icing/store/document-store_test.cc257
-rw-r--r--icing/store/enable-bm25f.h31
-rw-r--r--icing/testing/common-matchers.h22
-rw-r--r--icing/testing/hit-test-utils.cc17
-rw-r--r--icing/testing/platform.h16
-rw-r--r--icing/testing/schema-generator.h32
-rw-r--r--icing/util/clock.h11
48 files changed, 2492 insertions, 864 deletions
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 4a76c01..6a596f5 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -464,6 +464,20 @@ bool Filesystem::Write(const char* filename, const void* data,
return success;
}
+bool Filesystem::CopyFile(const char* src, const char* dst) const {
+ ScopedFd src_fd(OpenForRead(src));
+ ScopedFd dst_fd(OpenForWrite(dst));
+ if (!src_fd.is_valid() || !dst_fd.is_valid()) {
+ return false;
+ }
+ uint64_t size = GetFileSize(*src_fd);
+ std::unique_ptr<uint8_t[]> buf = std::make_unique<uint8_t[]>(size);
+ if (!Read(*src_fd, buf.get(), size)) {
+ return false;
+ }
+ return Write(*dst_fd, buf.get(), size);
+}
+
bool Filesystem::PWrite(int fd, off_t offset, const void* data,
size_t data_size) const {
size_t write_len = data_size;
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index b85f3a0..d3c7787 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -83,6 +83,9 @@ class Filesystem {
// success or if the directory did not yet exist.
virtual bool DeleteDirectoryRecursively(const char* dir_name) const;
+ // Copies the src file to the dst file.
+ virtual bool CopyFile(const char* src, const char* dst) const;
+
// Returns true if a file exists. False if the file doesn't exist.
// If there is an error getting stat on the file, it logs the error and //
// asserts.
diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h
index b89295e..88475cd 100644
--- a/icing/file/mock-filesystem.h
+++ b/icing/file/mock-filesystem.h
@@ -225,6 +225,8 @@ class MockFilesystem : public Filesystem {
MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char* dir_name),
(const));
+ MOCK_METHOD(bool, CopyFile, (const char* src, const char* dst), (const));
+
MOCK_METHOD(bool, FileExists, (const char* file_name), (const));
MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const));
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 1cb8620..5a9327e 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
@@ -114,7 +115,8 @@ TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
} // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index c40bac9..d915d65 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -749,6 +749,11 @@ ReportUsageResultProto IcingSearchEngine::ReportUsage(
StatusProto* result_status = result_proto.mutable_status();
absl_ports::unique_lock l(&mutex_);
+ if (!initialized_) {
+ result_status->set_code(StatusProto::FAILED_PRECONDITION);
+ result_status->set_message("IcingSearchEngine has not been initialized!");
+ return result_proto;
+ }
libtextclassifier3::Status status =
document_store_->ReportUsage(usage_report);
@@ -761,6 +766,11 @@ GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() {
StatusProto* result_status = result_proto.mutable_status();
absl_ports::shared_lock l(&mutex_);
+ if (!initialized_) {
+ result_status->set_code(StatusProto::FAILED_PRECONDITION);
+ result_status->set_message("IcingSearchEngine has not been initialized!");
+ return result_proto;
+ }
std::vector<std::string> namespaces = document_store_->GetAllNamespaces();
@@ -786,6 +796,10 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
return result_proto;
}
+ NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
@@ -798,6 +812,8 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(1);
return result_proto;
}
@@ -814,18 +830,24 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
return delete_result;
}
+ NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status =
+ DocumentStore::DeleteByGroupResult doc_store_result =
document_store_->DeleteByNamespace(name_space);
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ if (!doc_store_result.status.ok()) {
+ ICING_LOG(ERROR) << doc_store_result.status.error_message()
<< "Failed to delete Namespace: " << name_space;
- TransformStatus(status, result_status);
+ TransformStatus(doc_store_result.status, result_status);
return delete_result;
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
return delete_result;
}
@@ -842,27 +864,33 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
return delete_result;
}
+ NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status =
+ DocumentStore::DeleteByGroupResult doc_store_result =
document_store_->DeleteBySchemaType(schema_type);
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ if (!doc_store_result.status.ok()) {
+ ICING_LOG(ERROR) << doc_store_result.status.error_message()
<< "Failed to delete SchemaType: " << schema_type;
- TransformStatus(status, result_status);
+ TransformStatus(doc_store_result.status, result_status);
return delete_result;
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
return delete_result;
}
-DeleteResultProto IcingSearchEngine::DeleteByQuery(
+DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
const SearchSpecProto& search_spec) {
ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
<< " from doc store";
- DeleteResultProto result_proto;
+ DeleteByQueryResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
absl_ports::unique_lock l(&mutex_);
@@ -872,6 +900,10 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery(
return result_proto;
}
+ NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
libtextclassifier3::Status status =
ValidateSearchSpec(search_spec, performance_configuration_);
if (!status.ok()) {
@@ -898,13 +930,12 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery(
QueryProcessor::QueryResults query_results =
std::move(query_results_or).ValueOrDie();
- ICING_LOG(ERROR) << "Deleting the docs that matched the query.";
- bool found_results = false;
+ ICING_VLOG(2) << "Deleting the docs that matched the query.";
+ int num_deleted = 0;
while (query_results.root_iterator->Advance().ok()) {
- ICING_LOG(ERROR)
- << "Deleting doc "
- << query_results.root_iterator->doc_hit_info().document_id();
- found_results = true;
+ ICING_VLOG(3) << "Deleting doc "
+ << query_results.root_iterator->doc_hit_info().document_id();
+ ++num_deleted;
status = document_store_->Delete(
query_results.root_iterator->doc_hit_info().document_id());
if (!status.ok()) {
@@ -912,13 +943,15 @@ DeleteResultProto IcingSearchEngine::DeleteByQuery(
return result_proto;
}
}
- if (found_results) {
+ if (num_deleted > 0) {
result_proto.mutable_status()->set_code(StatusProto::OK);
} else {
result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
result_proto.mutable_status()->set_message(
"No documents matched the query to delete by!");
}
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(num_deleted);
return result_proto;
}
@@ -1141,6 +1174,9 @@ SearchResultProto IcingSearchEngine::Search(
return result_proto;
}
+ NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
+
libtextclassifier3::Status status = ValidateResultSpec(result_spec);
if (!status.ok()) {
TransformStatus(status, result_status);
@@ -1152,6 +1188,15 @@ SearchResultProto IcingSearchEngine::Search(
return result_proto;
}
+ query_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ query_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
+ query_stats->set_ranking_strategy(scoring_spec.rank_by());
+ query_stats->set_is_first_page(true);
+ query_stats->set_requested_page_size(result_spec.num_per_page());
+
+ std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), language_segmenter_.get(), normalizer_.get(),
@@ -1170,7 +1215,16 @@ SearchResultProto IcingSearchEngine::Search(
}
QueryProcessor::QueryResults query_results =
std::move(query_results_or).ValueOrDie();
+ query_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ int term_count = 0;
+ for (const auto& section_and_terms : query_results.query_terms) {
+ term_count += section_and_terms.second.size();
+ }
+ query_stats->set_num_terms(term_count);
+
+ component_timer = clock_->GetNewTimer();
// Scores but does not rank the results.
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
scoring_processor_or =
@@ -1184,6 +1238,9 @@ SearchResultProto IcingSearchEngine::Search(
std::vector<ScoredDocumentHit> result_document_hits =
scoring_processor->Score(std::move(query_results.root_iterator),
performance_configuration_.num_to_score);
+ query_stats->set_scoring_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ query_stats->set_num_documents_scored(result_document_hits.size());
// Returns early for empty result
if (result_document_hits.empty()) {
@@ -1191,6 +1248,7 @@ SearchResultProto IcingSearchEngine::Search(
return result_proto;
}
+ component_timer = clock_->GetNewTimer();
// Ranks and paginates results
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
result_state_manager_.RankAndPaginate(ResultState(
@@ -1202,7 +1260,10 @@ SearchResultProto IcingSearchEngine::Search(
}
PageResultState page_result_state =
std::move(page_result_state_or).ValueOrDie();
+ query_stats->set_ranking_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ component_timer = clock_->GetNewTimer();
// Retrieves the document protos and snippets if requested
auto result_retriever_or =
ResultRetriever::Create(document_store_.get(), schema_store_.get(),
@@ -1236,6 +1297,14 @@ SearchResultProto IcingSearchEngine::Search(
if (page_result_state.next_page_token != kInvalidNextPageToken) {
result_proto.set_next_page_token(page_result_state.next_page_token);
}
+ query_stats->set_document_retrieval_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
+ query_stats->set_num_results_returned_current_page(
+ result_proto.results_size());
+ query_stats->set_num_results_snippeted(
+ std::min(result_proto.results_size(),
+ result_spec.snippet_spec().num_to_snippet()));
return result_proto;
}
@@ -1252,6 +1321,10 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
return result_proto;
}
+ NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_is_first_page(false);
+
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
result_state_manager_.GetNextPage(next_page_token);
@@ -1268,6 +1341,7 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
PageResultState page_result_state =
std::move(page_result_state_or).ValueOrDie();
+ query_stats->set_requested_page_size(page_result_state.requested_page_size);
// Retrieves the document protos.
auto result_retriever_or =
@@ -1299,6 +1373,21 @@ SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
if (page_result_state.next_page_token != kInvalidNextPageToken) {
result_proto.set_next_page_token(page_result_state.next_page_token);
}
+
+ // The only thing that we're doing is document retrieval. So document
+ // retrieval latency and overall latency are the same and can use the same
+ // timer.
+ query_stats->set_document_retrieval_latency_ms(
+ overall_timer->GetElapsedMilliseconds());
+ query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
+ query_stats->set_num_results_returned_current_page(
+ result_proto.results_size());
+ int num_left_to_snippet =
+ std::max(page_result_state.snippet_context.snippet_spec.num_to_snippet() -
+ page_result_state.num_previously_returned,
+ 0);
+ query_stats->set_num_results_snippeted(
+ std::min(result_proto.results_size(), num_left_to_snippet));
return result_proto;
}
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 70a9c07..b2bb4f1 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -287,7 +287,7 @@ class IcingSearchEngine {
// NOT_FOUND if the query doesn't match any documents
// FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
// INTERNAL_ERROR on IO error
- DeleteResultProto DeleteByQuery(const SearchSpecProto& search_spec)
+ DeleteByQueryResultProto DeleteByQuery(const SearchSpecProto& search_spec)
ICING_LOCKS_EXCLUDED(mutex_);
// Retrieves, scores, ranks, and returns the results according to the specs.
@@ -404,15 +404,18 @@ class IcingSearchEngine {
bool initialized_ ICING_GUARDED_BY(mutex_) = false;
// Abstraction for accessing time values.
- std::unique_ptr<Clock> clock_;
+ const std::unique_ptr<const Clock> clock_;
// Provides key thresholds that affects the running time and memory of major
// components in Icing search engine.
- PerformanceConfiguration performance_configuration_;
-
- // Used to manage pagination state of query results. A lock is not needed here
- // because ResultStateManager has its own reader-writer lock.
- ResultStateManager result_state_manager_;
+ const PerformanceConfiguration performance_configuration_;
+
+ // Used to manage pagination state of query results. Even though
+ // ResultStateManager has its own reader-writer lock, mutex_ must still be
+ // acquired first in order to adhere to the global lock ordering:
+ // 1. mutex_
+ // 2. result_state_manager_.lock_
+ ResultStateManager result_state_manager_ ICING_GUARDED_BY(mutex_);
// Used to provide reader and writer locks
absl_ports::shared_mutex mutex_;
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index a6d96e0..9d33a82 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -117,7 +117,7 @@ std::vector<std::string> CreateNamespaces(int num_namespaces) {
// Creates a vector containing num_words randomly-generated words for use by
// documents.
template <typename Rand>
-std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
+std::vector<std::string> CreateLanguages(int num_words, Rand* r) {
std::vector<std::string> language;
std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
while (--num_words >= 0) {
@@ -175,6 +175,165 @@ class DestructibleDirectory {
std::string dir_;
};
+std::vector<DocumentProto> GenerateRandomDocuments(
+ EvenDistributionTypeSelector* type_selector, int num_docs) {
+ std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+ EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+ std::default_random_engine random;
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+ token_generator(language, &random);
+
+ DocumentGenerator<
+ EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+ generator(&namespace_selector, type_selector, &token_generator,
+ kAvgDocumentSize * kContentSizePct);
+
+ std::vector<DocumentProto> random_docs;
+ random_docs.reserve(num_docs);
+ for (int i = 0; i < num_docs; i++) {
+ random_docs.push_back(generator.generateDoc());
+ }
+ return random_docs;
+}
+
+void BM_IndexLatency(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ int num_docs = state.range(0);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs);
+ Timer timer;
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+ int64_t time_taken_ns = timer.GetElapsedNanoseconds();
+ int64_t time_per_doc_ns = time_taken_ns / num_docs;
+ std::cout << "Number of indexed documents:\t" << num_docs
+ << "\t\tNumber of indexed sections:\t" << state.range(1)
+ << "\t\tTime taken (ms):\t" << time_taken_ns / 1000000
+ << "\t\tTime taken per doc (us):\t" << time_per_doc_ns / 1000
+ << std::endl;
+}
+BENCHMARK(BM_IndexLatency)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1, 1)
+ ->ArgPair(2, 1)
+ ->ArgPair(8, 1)
+ ->ArgPair(32, 1)
+ ->ArgPair(128, 1)
+ ->ArgPair(1 << 10, 1)
+ ->ArgPair(1 << 13, 1)
+ ->ArgPair(1 << 15, 1)
+ ->ArgPair(1 << 17, 1)
+ ->ArgPair(1, 5)
+ ->ArgPair(2, 5)
+ ->ArgPair(8, 5)
+ ->ArgPair(32, 5)
+ ->ArgPair(128, 5)
+ ->ArgPair(1 << 10, 5)
+ ->ArgPair(1 << 13, 5)
+ ->ArgPair(1 << 15, 5)
+ ->ArgPair(1 << 17, 5)
+ ->ArgPair(1, 10)
+ ->ArgPair(2, 10)
+ ->ArgPair(8, 10)
+ ->ArgPair(32, 10)
+ ->ArgPair(128, 10)
+ ->ArgPair(1 << 10, 10)
+ ->ArgPair(1 << 13, 10)
+ ->ArgPair(1 << 15, 10)
+ ->ArgPair(1 << 17, 10);
+
+void BM_IndexThroughput(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ int num_docs = state.range(0);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs);
+ for (auto s : state) {
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+ }
+ state.SetItemsProcessed(state.iterations() * num_docs);
+}
+BENCHMARK(BM_IndexThroughput)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1, 1)
+ ->ArgPair(2, 1)
+ ->ArgPair(8, 1)
+ ->ArgPair(32, 1)
+ ->ArgPair(128, 1)
+ ->ArgPair(1 << 10, 1)
+ ->ArgPair(1 << 13, 1)
+ ->ArgPair(1 << 15, 1)
+ ->ArgPair(1 << 17, 1)
+ ->ArgPair(1, 5)
+ ->ArgPair(2, 5)
+ ->ArgPair(8, 5)
+ ->ArgPair(32, 5)
+ ->ArgPair(128, 5)
+ ->ArgPair(1 << 10, 5)
+ ->ArgPair(1 << 13, 5)
+ ->ArgPair(1 << 15, 5)
+ ->ArgPair(1 << 17, 5)
+ ->ArgPair(1, 10)
+ ->ArgPair(2, 10)
+ ->ArgPair(8, 10)
+ ->ArgPair(32, 10)
+ ->ArgPair(128, 10)
+ ->ArgPair(1 << 10, 10)
+ ->ArgPair(1 << 13, 10)
+ ->ArgPair(1 << 15, 10)
+ ->ArgPair(1 << 17, 10);
+
void BM_MutlipleIndices(benchmark::State& state) {
// Initialize the filesystem
std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -202,11 +361,8 @@ void BM_MutlipleIndices(benchmark::State& state) {
options.set_index_merge_size(kIcingFullIndexSize / num_indices);
auto icing = std::make_unique<IcingSearchEngine>(options);
- InitializeResultProto init_result = icing->Initialize();
- ASSERT_THAT(init_result.status().code(), Eq(StatusProto::OK));
-
- SetSchemaResultProto schema_result = icing->SetSchema(schema);
- ASSERT_THAT(schema_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
icings.push_back(std::move(icing));
}
@@ -214,7 +370,7 @@ void BM_MutlipleIndices(benchmark::State& state) {
std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
EvenDistributionNamespaceSelector namespace_selector(namespaces);
- std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
UniformDistributionLanguageTokenGenerator<std::default_random_engine>
token_generator(language, &random);
@@ -231,8 +387,7 @@ void BM_MutlipleIndices(benchmark::State& state) {
ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN));
continue;
}
- put_result = icings.at(i % icings.size())->Put(doc);
- ASSERT_THAT(put_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk());
}
// QUERY!
@@ -255,13 +410,13 @@ void BM_MutlipleIndices(benchmark::State& state) {
continue;
}
result = icings.at(0)->Search(search_spec, scoring_spec, result_spec);
- ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(result.status(), ProtoIsOk());
while (!result.results().empty()) {
num_results += result.results_size();
if (!icings.empty()) {
result = icings.at(0)->GetNextPage(result.next_page_token());
}
- ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(result.status(), ProtoIsOk());
}
}
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 8d69d78..f4249f3 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -397,22 +397,26 @@ TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// The query token is also truncated to length of 1, so "me"->"m" matches "m"
search_spec.set_query("me");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// The query token is still truncated to length of 1, so "massage"->"m"
// matches "m"
search_spec.set_query("massage");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -445,9 +449,11 @@ TEST_F(IcingSearchEngineTest,
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
@@ -801,9 +807,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
SchemaProto schema_with_indexed_property = CreateMessageSchema();
// Index restoration should be triggered here because new schema requires more
@@ -815,9 +822,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
@@ -1093,9 +1101,11 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
@@ -1127,7 +1137,8 @@ TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
@@ -1143,8 +1154,10 @@ TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
@@ -1163,8 +1176,10 @@ TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
StatusProto::INVALID_ARGUMENT);
expected_search_result_proto.mutable_status()->set_message(
"ResultSpecProto.num_per_page cannot be negative.");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
@@ -1205,17 +1220,19 @@ TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("foo");
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
}
}
@@ -1236,7 +1253,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
@@ -1276,7 +1294,8 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1284,8 +1303,9 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
document3;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1294,13 +1314,15 @@ TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
// Because there are no more results, we should not return the next page
// token.
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
@@ -1343,7 +1365,8 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1351,8 +1374,9 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
document3;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1361,13 +1385,15 @@ TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
// Because there are no more results, we should not return the next page
// token.
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
@@ -1479,7 +1505,8 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Now document1 is still to be fetched.
// Invalidates token
@@ -1488,8 +1515,9 @@ TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
// Tries to fetch the second page, no result since it's invalidated
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -1521,7 +1549,8 @@ TEST_F(IcingSearchEngineTest,
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Now document1 is still to be fetched.
OptimizeResultProto optimize_result_proto;
@@ -1533,8 +1562,9 @@ TEST_F(IcingSearchEngineTest,
// invalidated during Optimize()
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
@@ -1855,7 +1885,13 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
.AddStringProperty("subject", "message body2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -1873,7 +1909,14 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
// Delete the first type. The first doc should be irretrievable. The
// second should still be present.
- EXPECT_THAT(icing.DeleteBySchemaType("message").status(), ProtoIsOk());
+ DeleteBySchemaTypeResultProto result_proto =
+ icing.DeleteBySchemaType("message");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -1896,9 +1939,11 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("message");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
@@ -1965,9 +2010,11 @@ TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
@@ -1993,7 +2040,12 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -2016,7 +2068,14 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
// Delete namespace1. Document1 and document2 should be irretrievable.
// Document3 should still be present.
- EXPECT_THAT(icing.DeleteByNamespace("namespace1").status(), ProtoIsOk());
+ DeleteByNamespaceResultProto result_proto =
+ icing.DeleteByNamespace("namespace1");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(2);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2046,9 +2105,11 @@ TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("message");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
@@ -2110,9 +2171,11 @@ TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQuery) {
@@ -2131,7 +2194,12 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -2152,7 +2220,13 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
SearchSpecProto search_spec;
search_spec.set_query("body1");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+ DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2175,9 +2249,11 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
@@ -2242,9 +2318,11 @@ TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
document2;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
@@ -2304,16 +2382,20 @@ TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Search() works right after Optimize()
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
@@ -2364,9 +2446,11 @@ TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -2422,9 +2506,11 @@ TEST_F(IcingSearchEngineTest,
// Searching old content returns nothing because original file directory is
// missing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("n");
@@ -2432,9 +2518,10 @@ TEST_F(IcingSearchEngineTest,
new_document;
// Searching new content returns the new document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
@@ -2490,9 +2577,11 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// Searching old content returns nothing because original files are missing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("n");
@@ -2500,9 +2589,10 @@ TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
new_document;
// Searching new content returns the new document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
@@ -2551,9 +2641,11 @@ TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is returned as part of search results
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
@@ -2600,9 +2692,11 @@ TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is not returned as part of search results
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
@@ -2637,9 +2731,11 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// The message isn't indexed, so we get nothing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// With just the schema type filter, we can search for the message
search_spec.Clear();
@@ -2648,9 +2744,10 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
message_document;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Since SchemaTypeIds are assigned based on order in the SchemaProto, this
// will force a change in the DocumentStore's cached SchemaTypeIds
@@ -2679,9 +2776,10 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
search_spec.add_schema_type_filters("message");
// We can still search for the message document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
@@ -2708,9 +2806,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
@@ -2724,9 +2824,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2757,9 +2859,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Change the header's magic value
@@ -2777,9 +2881,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2810,9 +2916,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Change the header's checksum value
@@ -2831,9 +2939,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2945,9 +3055,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
{
@@ -3025,9 +3137,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2_with_additional_property;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
@@ -3090,9 +3204,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
@@ -3112,9 +3228,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend we lost the entire index
@@ -3125,9 +3243,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
@@ -3147,9 +3267,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend index is corrupted
@@ -3162,9 +3284,11 @@ TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
@@ -3222,9 +3346,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
@@ -3280,9 +3405,10 @@ TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
// order.
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
@@ -3335,9 +3461,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
@@ -3405,9 +3532,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3462,9 +3590,10 @@ TEST_F(IcingSearchEngineTest,
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
@@ -3531,9 +3660,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3588,9 +3718,10 @@ TEST_F(IcingSearchEngineTest,
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
@@ -3656,9 +3787,10 @@ TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
@@ -3717,9 +3849,10 @@ TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3797,9 +3930,11 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
@@ -3821,9 +3956,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
// Can't search for it
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStats(empty_result));
}
TEST_F(IcingSearchEngineTest, PersistToDisk) {
@@ -5481,6 +5617,101 @@ TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
EqualsProto(projected_document_one));
}
+TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.add_namespace_filters("namespace");
+ search_spec.add_schema_type_filters(document1.schema());
+ search_spec.set_query("message");
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
+
+ // Check the stats
+ NativeQueryStats exp_stats;
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(1);
+ exp_stats.set_num_schema_types_filtered(1);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_documents_scored(5);
+ exp_stats.set_num_results_snippeted(2);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ exp_stats = NativeQueryStats();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_results_snippeted(1);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = NativeQueryStats();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_snippeted(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/hit/doc-hit-info.cc b/icing/index/hit/doc-hit-info.cc
index 80dbbde..8e418c8 100644
--- a/icing/index/hit/doc-hit-info.cc
+++ b/icing/index/hit/doc-hit-info.cc
@@ -34,26 +34,28 @@ bool DocHitInfo::operator<(const DocHitInfo& other) const {
}
// Doesn't matter which way we compare this array, as long as
// DocHitInfo is unequal when it is unequal.
- return memcmp(max_hit_score_, other.max_hit_score_, sizeof(max_hit_score_)) <
- 0;
+ return memcmp(hit_term_frequency_, other.hit_term_frequency_,
+ sizeof(hit_term_frequency_)) < 0;
}
-void DocHitInfo::UpdateSection(SectionId section_id, Hit::Score hit_score) {
+void DocHitInfo::UpdateSection(SectionId section_id,
+ Hit::TermFrequency hit_term_frequency) {
SectionIdMask section_id_mask = (1u << section_id);
- if (hit_section_ids_mask() & section_id_mask) {
- max_hit_score_[section_id] =
- std::max(max_hit_score_[section_id], hit_score);
- } else {
- max_hit_score_[section_id] = hit_score;
- hit_section_ids_mask_ |= section_id_mask;
+ if ((hit_section_ids_mask() & section_id_mask)) {
+ // If the sectionId is already embedded in the hit_section_ids_mask,
+ // then the term frequencies should always match. So there is no
+ // need to update anything.
+ return;
}
+ hit_term_frequency_[section_id] = hit_term_frequency;
+ hit_section_ids_mask_ |= section_id_mask;
}
void DocHitInfo::MergeSectionsFrom(const DocHitInfo& other) {
SectionIdMask other_mask = other.hit_section_ids_mask();
while (other_mask) {
SectionId section_id = __builtin_ctz(other_mask);
- UpdateSection(section_id, other.max_hit_score(section_id));
+ UpdateSection(section_id, other.hit_term_frequency(section_id));
other_mask &= ~(1u << section_id);
}
}
diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
index 32ba97e..8171960 100644
--- a/icing/index/hit/doc-hit-info.h
+++ b/icing/index/hit/doc-hit-info.h
@@ -26,17 +26,18 @@ namespace icing {
namespace lib {
// DocHitInfo provides a collapsed view of all hits for a specific term and doc.
-// Hits contain a document_id, section_id and a hit score. The information in
-// multiple hits is collapse into a DocHitInfo by providing a SectionIdMask of
-// all sections that contained a hit for this term as well as the highest hit
-// score of any hit for each section.
+// Hits contain a document_id, section_id and a term frequency. The
+// information in multiple hits is collapse into a DocHitInfo by providing a
+// SectionIdMask of all sections that contained a hit for this term as well as
+// the highest term frequency of any hit for each section.
class DocHitInfo {
public:
explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId,
SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
: document_id_(document_id_in),
hit_section_ids_mask_(hit_section_ids_mask) {
- memset(max_hit_score_, Hit::kDefaultHitScore, sizeof(max_hit_score_));
+ memset(hit_term_frequency_, Hit::kDefaultTermFrequency,
+ sizeof(hit_term_frequency_));
}
DocumentId document_id() const { return document_id_; }
@@ -49,8 +50,8 @@ class DocHitInfo {
hit_section_ids_mask_ = section_id_mask;
}
- Hit::Score max_hit_score(SectionId section_id) const {
- return max_hit_score_[section_id];
+ Hit::TermFrequency hit_term_frequency(SectionId section_id) const {
+ return hit_term_frequency_[section_id];
}
bool operator<(const DocHitInfo& other) const;
@@ -58,12 +59,14 @@ class DocHitInfo {
return (*this < other) == (other < *this);
}
- // Updates the hit_section_ids_mask and max_hit_score for the section, if
- // necessary.
- void UpdateSection(SectionId section_id, Hit::Score hit_score);
+ // Updates the hit_section_ids_mask and hit_term_frequency for the
+ // section, if necessary.
+ void UpdateSection(SectionId section_id,
+ Hit::TermFrequency hit_term_frequency);
- // Merges the sections of other into this. The hit_section_ids_masks are or'd
- // and the max hit score for each section between the two is set.
+ // Merges the sections of other into this. The hit_section_ids_masks are or'd;
+ // if this.hit_term_frequency_[sectionId] has already been defined,
+ // other.hit_term_frequency_[sectionId] value is ignored.
//
// This does not affect the DocumentId of this or other. If callers care about
// only merging sections for DocHitInfos with the same DocumentId, callers
@@ -73,14 +76,15 @@ class DocHitInfo {
private:
DocumentId document_id_;
SectionIdMask hit_section_ids_mask_;
- Hit::Score max_hit_score_[kMaxSectionId + 1];
+ Hit::TermFrequency hit_term_frequency_[kMaxSectionId + 1];
} __attribute__((packed));
static_assert(sizeof(DocHitInfo) == 22, "");
// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan");
-static_assert(sizeof(Hit::Score) == 1,
- "Change how max_hit_score_ is initialized if changing the type "
- "of Hit::Score");
+static_assert(
+ sizeof(Hit::TermFrequency) == 1,
+ "Change how hit_term_frequency_ is initialized if changing the type "
+ "of Hit::TermFrequency");
} // namespace lib
} // namespace icing
diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
index 1e1880f..15c0de9 100644
--- a/icing/index/hit/doc-hit-info_test.cc
+++ b/icing/index/hit/doc-hit-info_test.cc
@@ -31,50 +31,43 @@ using ::testing::Ne;
constexpr DocumentId kSomeDocumentId = 12;
constexpr DocumentId kSomeOtherDocumentId = 54;
-TEST(DocHitInfoTest, InitialMaxHitScores) {
+TEST(DocHitInfoTest, InitialMaxHitTermFrequencies) {
DocHitInfo info(kSomeDocumentId);
for (SectionId i = 0; i <= kMaxSectionId; ++i) {
- EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kDefaultHitScore));
+ EXPECT_THAT(info.hit_term_frequency(i), Eq(Hit::kDefaultTermFrequency));
}
}
-TEST(DocHitInfoTest, UpdateHitScores) {
+TEST(DocHitInfoTest, UpdateHitTermFrequenciesForTheFirstTime) {
DocHitInfo info(kSomeDocumentId);
- ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kDefaultHitScore));
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(Hit::kDefaultTermFrequency));
- // Updating a section for the first time, should change its max hit score,
- // even though the hit score (16) may be lower than the current value returned
- // by info.max_hit_score(3) (kDefaultHitScore)
+ // Updating a section for the first time, should change its hit
+ // term_frequency
info.UpdateSection(3, 16);
- EXPECT_THAT(info.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
+}
- // Updating a section with a hit score lower than the previously set one
- // should not update max hit score.
+TEST(DocHitInfoTest, UpdateSectionLowerHitTermFrequencyHasNoEffect) {
+ DocHitInfo info(kSomeDocumentId);
+ info.UpdateSection(3, 16);
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(16));
+
+ // Updating a section with a term frequency lower than the previously set
+ // one should have no effect.
info.UpdateSection(3, 15);
- EXPECT_THAT(info.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
+}
- // Updating a section with a hit score higher than the previously set one
- // should update the max hit score.
- info.UpdateSection(3, 17);
- EXPECT_THAT(info.max_hit_score(3), Eq(17));
-
- // Updating a section with kDefaultHitScore should *never* set the
- // max_hit_score to kDefaultHitScore (unless it already was kDefaultHitScore)
- // because kDefaultHitScore is the lowest possible valid hit score.
- info.UpdateSection(3, Hit::kDefaultHitScore);
- EXPECT_THAT(info.max_hit_score(3), Eq(17));
-
- // Updating a section with kMaxHitScore should *always* set the max hit
- // score to kMaxHitScore (regardless of what value kMaxHitScore is
- // defined with).
- info.UpdateSection(3, Hit::kMaxHitScore);
- EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
-
- // Updating a section that has had kMaxHitScore explicitly set, should
- // *never* change the max hit score (regardless of what value kMaxHitScore
- // is defined with).
+TEST(DocHitInfoTest, UpdateSectionHigherHitTermFrequencyHasNoEffect) {
+ DocHitInfo info(kSomeDocumentId);
info.UpdateSection(3, 16);
- EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(16));
+
+ // Updating a section with a term frequency higher than the previously set
+ // one should have no effect.
+ info.UpdateSection(3, 17);
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
}
TEST(DocHitInfoTest, UpdateSectionIdMask) {
@@ -99,7 +92,7 @@ TEST(DocHitInfoTest, MergeSectionsFromDifferentDocumentId) {
DocHitInfo info2(kSomeOtherDocumentId);
info2.UpdateSection(7, 12);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+ EXPECT_THAT(info1.hit_term_frequency(7), Eq(12));
EXPECT_THAT(info1.document_id(), Eq(kSomeDocumentId));
}
@@ -110,7 +103,7 @@ TEST(DocHitInfoTest, MergeSectionsFromKeepsOldSection) {
info1.UpdateSection(3, 16);
DocHitInfo info2(kSomeDocumentId);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info1.hit_term_frequency(3), Eq(16));
}
TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) {
@@ -120,29 +113,29 @@ TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) {
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(7, 12);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+ EXPECT_THAT(info1.hit_term_frequency(7), Eq(12));
}
-TEST(DocHitInfoTest, MergeSectionsFromSetsHigherHitScore) {
- // Merging should override the value of a section in info1 if the same section
- // is present in info2 with a higher hit score.
+TEST(DocHitInfoTest, MergeSectionsFromHigherHitTermFrequencyHasNoEffect) {
+ // Merging should not override the value of a section in info1 if the same
+ // section is present in info2.
DocHitInfo info1(kSomeDocumentId);
info1.UpdateSection(2, 77);
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(2, 89);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(2), Eq(89));
+ EXPECT_THAT(info1.hit_term_frequency(2), Eq(77));
}
-TEST(DocHitInfoTest, MergeSectionsFromDoesNotSetLowerHitScore) {
+TEST(DocHitInfoTest, MergeSectionsFromLowerHitScoreHasNoEffect) {
// Merging should not override the hit score of a section in info1 if the same
- // section is present in info2 but with a lower hit score.
+ // section is present in info2.
DocHitInfo info1(kSomeDocumentId);
info1.UpdateSection(5, 108);
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(5, 13);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(5), Eq(108));
+ EXPECT_THAT(info1.hit_term_frequency(5), Eq(108));
}
TEST(DocHitInfoTest, Comparison) {
@@ -156,7 +149,7 @@ TEST(DocHitInfoTest, Comparison) {
DocHitInfo high_section_id_info(kDocumentId);
high_section_id_info.UpdateSection(1, 12);
- high_section_id_info.UpdateSection(6, Hit::kDefaultHitScore);
+ high_section_id_info.UpdateSection(6, Hit::kDefaultTermFrequency);
std::vector<DocHitInfo> infos{info, high_document_id_info,
high_section_id_info};
@@ -166,10 +159,10 @@ TEST(DocHitInfoTest, Comparison) {
// There are no requirements for how DocHitInfos with the same DocumentIds and
// hit masks will compare, but they must not be equal.
- DocHitInfo different_hit_score_info(kDocumentId);
- different_hit_score_info.UpdateSection(1, 76);
- EXPECT_THAT(info < different_hit_score_info,
- Ne(different_hit_score_info < info));
+ DocHitInfo different_term_frequency_info(kDocumentId);
+ different_term_frequency_info.UpdateSection(1, 76);
+ EXPECT_THAT(info < different_term_frequency_info,
+ Ne(different_term_frequency_info < info));
}
} // namespace lib
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index d089dd5..2a5a0d9 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -30,8 +30,9 @@ enum FlagOffset {
// This hit represents a prefix of a longer term. If exact matches are
// required, then this hit should be ignored.
kPrefixHit = 1,
- // Whether or not the hit has a hit score other than kDefaultHitScore.
- kHasScore = 2,
+ // Whether or not the hit has a term_frequency other than
+ // kDefaultTermFrequency.
+ kHasTermFrequency = 2,
kNumFlags = 3,
};
static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
@@ -51,9 +52,10 @@ inline DocumentId InvertDocumentId(DocumentId document_id) {
} // namespace
-Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
- bool is_in_prefix_section, bool is_prefix_hit)
- : score_(score) {
+Hit::Hit(SectionId section_id, DocumentId document_id,
+ Hit::TermFrequency term_frequency, bool is_in_prefix_section,
+ bool is_prefix_hit)
+ : term_frequency_(term_frequency) {
// Values are stored so that when sorted, they appear in document_id
// descending, section_id ascending, order. Also, all else being
// equal, non-prefix hits sort before prefix hits. So inverted
@@ -64,7 +66,8 @@ Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
kSectionIdBits + kNumFlags, kDocumentIdBits,
&temp_value);
bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
- bit_util::BitfieldSet(score != kDefaultHitScore, kHasScore, 1, &temp_value);
+ bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
+ kHasTermFrequency, 1, &temp_value);
bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
value_ = temp_value;
@@ -81,8 +84,8 @@ SectionId Hit::section_id() const {
return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
}
-bool Hit::has_score() const {
- return bit_util::BitfieldGet(value(), kHasScore, 1);
+bool Hit::has_term_frequency() const {
+ return bit_util::BitfieldGet(value(), kHasTermFrequency, 1);
}
bool Hit::is_prefix_hit() const {
diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index 53553f0..525a5e5 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h
@@ -31,18 +31,17 @@ namespace lib {
// - a SectionId
// referring to the document and section that the hit corresponds to, as well as
// metadata about the hit:
-// - whether the Hit has a Score other than the default value
+// - whether the Hit has a TermFrequency other than the default value
// - whether the Hit does not appear exactly in the document, but instead
// represents a term that is a prefix of a term in the document
// - whether the Hit came from a section that has prefix expansion enabled
-// and a score for the hit. Ranging from [0,255] a higher score indicates a
-// higher quality hit.
+// and a term frequency for the hit.
// The hit is the most basic unit of the index and, when grouped together by
// term, can be used to encode what terms appear in what documents.
class Hit {
public:
// The datatype used to encode Hit information: the document_id, section_id
- // and the has_score, prefix hit and in prefix section flags.
+ // and the has_term_frequency, prefix hit and in prefix section flags.
using Value = uint32_t;
// WARNING: Changing this value will invalidate any pre-existing posting lists
@@ -53,28 +52,27 @@ class Hit {
// the max in a descending sort.
static constexpr Value kMaxDocumentIdSortValue = 0;
- // A score reflecting the "quality" of this hit. The higher the score, the
- // higher quality the hit.
- // The score is being repurposed for term frequency.
- // TODO(b/173156700): refactor Score to TermFrequency.
- using Score = uint8_t;
- // Max Score is 255.
- static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max();
- // Default value of term frequency is 1.
- static constexpr Score kDefaultHitScore = 1;
+ // The Term Frequency of a Hit.
+ using TermFrequency = uint8_t;
+ // Max TermFrequency is 255.
+ static constexpr TermFrequency kMaxTermFrequency =
+ std::numeric_limits<TermFrequency>::max();
+ static constexpr TermFrequency kDefaultTermFrequency = 1;
- explicit Hit(Value value = kInvalidValue, Score score = kDefaultHitScore)
- : value_(value), score_(score) {}
- Hit(SectionId section_id, DocumentId document_id, Score score,
- bool is_in_prefix_section = false, bool is_prefix_hit = false);
+ explicit Hit(Value value = kInvalidValue,
+ TermFrequency term_frequency = kDefaultTermFrequency)
+ : value_(value), term_frequency_(term_frequency) {}
+ Hit(SectionId section_id, DocumentId document_id,
+ TermFrequency term_frequency, bool is_in_prefix_section = false,
+ bool is_prefix_hit = false);
bool is_valid() const { return value() != kInvalidValue; }
Value value() const { return value_; }
DocumentId document_id() const;
SectionId section_id() const;
- // Whether or not the hit contains a non-default score.
- bool has_score() const;
- Score score() const { return score_; }
+ // Whether or not the hit contains a valid term frequency.
+ bool has_term_frequency() const;
+ TermFrequency term_frequency() const { return term_frequency_; }
bool is_prefix_hit() const;
bool is_in_prefix_section() const;
@@ -86,10 +84,10 @@ class Hit {
};
private:
- // Value and score must be in this order.
+ // Value and TermFrequency must be in this order.
// Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags.
Value value_;
- Score score_;
+ TermFrequency term_frequency_;
} __attribute__((packed));
static_assert(sizeof(Hit) == 5, "");
// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
index 8c883d1..d47ca37 100644
--- a/icing/index/hit/hit_test.cc
+++ b/icing/index/hit/hit_test.cc
@@ -33,46 +33,46 @@ using ::testing::Not;
static constexpr DocumentId kSomeDocumentId = 24;
static constexpr SectionId kSomeSectionid = 5;
-static constexpr Hit::Score kSomeHitScore = 57;
+static constexpr Hit::TermFrequency kSomeTermFrequency = 57;
-TEST(HitTest, HasScoreFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
- EXPECT_THAT(h1.has_score(), IsFalse());
- EXPECT_THAT(h1.score(), Eq(Hit::kDefaultHitScore));
+TEST(HitTest, HasTermFrequencyFlag) {
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+ EXPECT_THAT(h1.has_term_frequency(), IsFalse());
+ EXPECT_THAT(h1.term_frequency(), Eq(Hit::kDefaultTermFrequency));
- Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore);
- EXPECT_THAT(h2.has_score(), IsTrue());
- EXPECT_THAT(h2.score(), Eq(kSomeHitScore));
+ Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency);
+ EXPECT_THAT(h2.has_term_frequency(), IsTrue());
+ EXPECT_THAT(h2.term_frequency(), Eq(kSomeTermFrequency));
}
TEST(HitTest, IsPrefixHitFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
}
TEST(HitTest, IsInPrefixSectionFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
}
TEST(HitTest, Accessors) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
}
@@ -88,48 +88,53 @@ TEST(HitTest, Valid) {
Hit explicit_valid(kSomeValue);
EXPECT_THAT(explicit_valid.is_valid(), IsTrue());
- Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, kSomeHitScore);
+ Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId,
+ kSomeTermFrequency);
EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
- Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeHitScore);
+ Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId,
+ kSomeTermFrequency);
EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
- Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeHitScore);
+ Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency);
EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
- Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeHitScore);
+ Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency);
EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
}
TEST(HitTest, Comparison) {
- Hit hit(1, 243, Hit::kDefaultHitScore);
+ Hit hit(1, 243, Hit::kDefaultTermFrequency);
// DocumentIds are sorted in ascending order. So a hit with a lower
// document_id should be considered greater than one with a higher
// document_id.
- Hit higher_document_id_hit(1, 2409, Hit::kDefaultHitScore);
- Hit higher_section_id_hit(15, 243, Hit::kDefaultHitScore);
- // Whether or not a hit score was set is considered, but the score itself is
- // not.
- Hit hitscore_hit(1, 243, 12);
- Hit prefix_hit(1, 243, Hit::kDefaultHitScore,
+ Hit higher_document_id_hit(1, 2409, Hit::kDefaultTermFrequency);
+ Hit higher_section_id_hit(15, 243, Hit::kDefaultTermFrequency);
+ // Whether or not a term frequency was set is considered, but the term
+ // frequency itself is not.
+ Hit term_frequency_hit(1, 243, 12);
+ Hit prefix_hit(1, 243, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
- Hit hit_in_prefix_section(1, 243, Hit::kDefaultHitScore,
+ Hit hit_in_prefix_section(1, 243, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/false);
- std::vector<Hit> hits{
- hit, higher_document_id_hit, higher_section_id_hit, hitscore_hit,
- prefix_hit, hit_in_prefix_section};
+ std::vector<Hit> hits{hit,
+ higher_document_id_hit,
+ higher_section_id_hit,
+ term_frequency_hit,
+ prefix_hit,
+ hit_in_prefix_section};
std::sort(hits.begin(), hits.end());
- EXPECT_THAT(hits,
- ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
- prefix_hit, hitscore_hit, higher_section_id_hit));
-
- Hit higher_hitscore_hit(1, 243, 108);
- // Hit score value is not considered when comparing hits.
- EXPECT_THAT(hitscore_hit, Not(Lt(higher_hitscore_hit)));
- EXPECT_THAT(higher_hitscore_hit, Not(Lt(hitscore_hit)));
+ EXPECT_THAT(
+ hits, ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
+ prefix_hit, term_frequency_hit, higher_section_id_hit));
+
+ Hit higher_term_frequency_hit(1, 243, 108);
+ // The term frequency value is not considered when comparing hits.
+ EXPECT_THAT(term_frequency_hit, Not(Lt(higher_term_frequency_hit)));
+ EXPECT_THAT(higher_term_frequency_hit, Not(Lt(term_frequency_hit)));
}
} // namespace
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 3168dad..bdd9575 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -294,7 +294,8 @@ TEST_F(IndexProcessorTest, OneDoc) {
index_->GetIterator("hello", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
- std::unordered_map<SectionId, Hit::Score> expectedMap{{kExactSectionId, 1}};
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+ {kExactSectionId, 1}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId0, expectedMap)));
@@ -316,7 +317,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
std::string coffeeRepeatedString = "coffee";
- for (int i = 0; i < Hit::kMaxHitScore + 1; i++) {
+ for (int i = 0; i < Hit::kMaxTermFrequency + 1; i++) {
coffeeRepeatedString += " coffee";
}
@@ -335,9 +336,10 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
index_->GetIterator("world", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
std::vector<DocHitInfo> hits = GetHits(std::move(itr));
- std::unordered_map<SectionId, Hit::Score> expectedMap1{
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{
{kPrefixedSectionId, 2}};
- std::unordered_map<SectionId, Hit::Score> expectedMap2{{kExactSectionId, 1}};
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap2{
+ {kExactSectionId, 1}};
EXPECT_THAT(
hits, ElementsAre(
EqualsDocHitInfoWithTermFrequency(kDocumentId1, expectedMap1),
@@ -347,7 +349,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
TermMatchType::EXACT_ONLY));
hits = GetHits(std::move(itr));
- std::unordered_map<SectionId, Hit::Score> expectedMap{
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
{kPrefixedSectionId, 2}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId1, expectedMap)));
@@ -356,7 +358,7 @@ TEST_F(IndexProcessorTest, MultipleDocs) {
index_->GetIterator("coffee", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
hits = GetHits(std::move(itr));
- expectedMap = {{kExactSectionId, Hit::kMaxHitScore}};
+ expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}};
EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
kDocumentId1, expectedMap)));
}
diff --git a/icing/index/index.cc b/icing/index/index.cc
index f0c8bbd..bd41b51 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -287,7 +287,7 @@ libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
tvi = tvi_or.ValueOrDie();
if (seen_tokens_.find(tvi) != seen_tokens_.end()) {
ICING_VLOG(1) << "Updating term frequency for term " << term;
- if (seen_tokens_[tvi] != Hit::kMaxHitScore) {
+ if (seen_tokens_[tvi] != Hit::kMaxTermFrequency) {
++seen_tokens_[tvi];
}
return libtextclassifier3::Status::OK;
@@ -310,7 +310,7 @@ libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() {
for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) {
- Hit hit(section_id_, document_id_, /*score=*/itr->second,
+ Hit hit(section_id_, document_id_, /*term_frequency=*/itr->second,
term_match_type_ == TermMatchType::PREFIX);
ICING_ASSIGN_OR_RETURN(
uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE));
diff --git a/icing/index/index.h b/icing/index/index.h
index 32f2b17..a4ea719 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -206,7 +206,7 @@ class Index {
// The Editor is able to store previously seen terms as TermIds. This is
// is more efficient than a client doing this externally because TermIds are
// not exposed to clients.
- std::unordered_map<uint32_t, Hit::Score> seen_tokens_;
+ std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_;
const TermIdCodec* term_id_codec_;
LiteIndex* lite_index_;
DocumentId document_id_;
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index ea4bcaf..e0379b8 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -365,7 +365,7 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
last_document_id = document_id;
}
if (hits_out != nullptr) {
- hits_out->back().UpdateSection(hit.section_id(), hit.score());
+ hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
}
}
return count;
@@ -448,7 +448,7 @@ uint32_t LiteIndex::Seek(uint32_t term_id) {
// Binary search for our term_id. Make sure we get the first
// element. Using kBeginSortValue ensures this for the hit value.
TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultHitScore));
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
const TermIdHitPair::Value* array =
hit_buffer_.array_cast<TermIdHitPair::Value>();
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 191f766..61ec502 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -29,39 +29,42 @@ namespace lib {
class TermIdHitPair {
public:
- // Layout bits: 24 termid + 32 hit value + 8 hit score.
+ // Layout bits: 24 termid + 32 hit value + 8 hit term frequency.
using Value = uint64_t;
static constexpr int kTermIdBits = 24;
static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
- static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8;
+ static constexpr int kHitTermFrequencyBits = sizeof(Hit::TermFrequency) * 8;
static const Value kInvalidValue;
explicit TermIdHitPair(Value v = kInvalidValue) : value_(v) {}
TermIdHitPair(uint32_t term_id, const Hit& hit) {
- static_assert(
- kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8,
- "TermIdHitPairTooBig");
+ static_assert(kTermIdBits + kHitValueBits + kHitTermFrequencyBits <=
+ sizeof(Value) * 8,
+ "TermIdHitPairTooBig");
value_ = 0;
// Term id goes into the most significant bits because it takes
// precedent in sorts.
- bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits,
+ bit_util::BitfieldSet(term_id, kHitValueBits + kHitTermFrequencyBits,
+ kTermIdBits, &value_);
+ bit_util::BitfieldSet(hit.value(), kHitTermFrequencyBits, kHitValueBits,
+ &value_);
+ bit_util::BitfieldSet(hit.term_frequency(), 0, kHitTermFrequencyBits,
&value_);
- bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_);
- bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_);
}
uint32_t term_id() const {
- return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits,
+ return bit_util::BitfieldGet(value_, kHitValueBits + kHitTermFrequencyBits,
kTermIdBits);
}
Hit hit() const {
- return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits),
- bit_util::BitfieldGet(value_, 0, kHitScoreBits));
+ return Hit(
+ bit_util::BitfieldGet(value_, kHitTermFrequencyBits, kHitValueBits),
+ bit_util::BitfieldGet(value_, 0, kHitTermFrequencyBits));
}
Value value() const { return value_; }
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index a60764d..5553c1e 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -114,7 +114,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermMainExact::RetrieveMoreHits() {
hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
}
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score());
+ cached_doc_hit_infos_.back().UpdateSection(hit.section_id(),
+ hit.term_frequency());
}
return libtextclassifier3::Status::OK;
}
@@ -162,7 +163,8 @@ DocHitInfoIteratorTermMainPrefix::RetrieveMoreHits() {
hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
}
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score());
+ cached_doc_hit_infos_.back().UpdateSection(hit.section_id(),
+ hit.term_frequency());
}
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/main/flash-index-storage_test.cc b/icing/index/main/flash-index-storage_test.cc
index cf899b3..7e15524 100644
--- a/icing/index/main/flash-index-storage_test.cc
+++ b/icing/index/main/flash-index-storage_test.cc
@@ -160,10 +160,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -183,10 +183,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -217,10 +217,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemory) {
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -256,10 +256,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -279,10 +279,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -313,10 +313,10 @@ TEST_F(FlashIndexStorageTest, FreeListNotInMemory) {
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -354,10 +354,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -377,10 +377,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -425,10 +425,10 @@ TEST_F(FlashIndexStorageTest, FreeListInMemoryPersistence) {
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -466,10 +466,10 @@ TEST_F(FlashIndexStorageTest, DifferentSizedPostingLists) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -492,10 +492,10 @@ TEST_F(FlashIndexStorageTest, DifferentSizedPostingLists) {
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc
index 652dbc6..4590d06 100644
--- a/icing/index/main/index-block.cc
+++ b/icing/index/main/index-block.cc
@@ -51,7 +51,7 @@ libtextclassifier3::Status ValidatePostingListBytes(uint32_t posting_list_bytes,
uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock(
uint32_t block_size, int posting_list_index_bits) {
- // Assume 50% compressed and most don't have scores.
+ // Assume 50% compressed and most don't have term frequencies.
uint32_t bytes_per_hit = sizeof(Hit::Value) / 2;
return (block_size - sizeof(BlockHeader)) /
((1u << posting_list_index_bits) * bytes_per_hit);
diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc
index 493055f..322918d 100644
--- a/icing/index/main/index-block_test.cc
+++ b/icing/index/main/index-block_test.cc
@@ -105,11 +105,11 @@ TEST(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> test_hits{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
PostingListIndex allocated_index;
{
@@ -152,18 +152,18 @@ TEST(IndexBlockTest, IndexBlockMultiplePostingLists) {
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
std::vector<Hit> hits_in_posting_list2{
- Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore),
- Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
- Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
PostingListIndex allocated_index_1;
PostingListIndex allocated_index_2;
@@ -242,11 +242,11 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Add hits to the first posting list.
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
block.AllocatePostingList());
@@ -261,11 +261,11 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
// Add hits to the second posting list.
std::vector<Hit> hits_in_posting_list2{
- Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultHitScore),
- Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
- Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultHitScore),
+ Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
block.AllocatePostingList());
@@ -288,9 +288,9 @@ TEST(IndexBlockTest, IndexBlockReallocatingPostingLists) {
EXPECT_TRUE(block.has_free_posting_lists());
std::vector<Hit> hits_in_posting_list3{
- Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultHitScore),
- Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2),
+ Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
block.AllocatePostingList());
diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
index 500774d..f49dc74 100644
--- a/icing/index/main/main-index-merger.cc
+++ b/icing/index/main/main-index-merger.cc
@@ -62,14 +62,15 @@ class HitSelector {
(*hits)[pos++] = best_exact_hit_;
const Hit& prefix_hit = best_prefix_hit_.hit();
// The prefix hit has score equal to the sum of the scores, capped at
- // kMaxHitScore.
- Hit::Score final_score =
- std::min(static_cast<int>(Hit::kMaxHitScore),
- prefix_hit.score() + best_exact_hit_.hit().score());
+ // kMaxTermFrequency.
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ prefix_hit.term_frequency() + best_exact_hit_.hit().term_frequency());
best_prefix_hit_ = TermIdHitPair(
best_prefix_hit_.term_id(),
- Hit(prefix_hit.section_id(), prefix_hit.document_id(), final_score,
- prefix_hit.is_in_prefix_section(), prefix_hit.is_prefix_hit()));
+ Hit(prefix_hit.section_id(), prefix_hit.document_id(),
+ final_term_frequency, prefix_hit.is_in_prefix_section(),
+ prefix_hit.is_prefix_hit()));
(*hits)[pos++] = best_prefix_hit_;
// Ensure sorted.
if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
@@ -97,15 +98,15 @@ class HitSelector {
} else {
const Hit& hit = term_id_hit_pair.hit();
// Create a new prefix hit with term_frequency as the sum of the term
- // frequencies. The term frequency is capped at kMaxHitScore.
- Hit::Score final_score =
- std::min(static_cast<int>(Hit::kMaxHitScore),
- hit.score() + best_prefix_hit_.hit().score());
- best_prefix_hit_ =
- TermIdHitPair(term_id_hit_pair.term_id(),
- Hit(hit.section_id(), hit.document_id(), final_score,
- best_prefix_hit_.hit().is_in_prefix_section(),
- best_prefix_hit_.hit().is_prefix_hit()));
+ // frequencies. The term frequency is capped at kMaxTermFrequency.
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ hit.term_frequency() + best_prefix_hit_.hit().term_frequency());
+ best_prefix_hit_ = TermIdHitPair(
+ term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+ best_prefix_hit_.hit().is_in_prefix_section(),
+ best_prefix_hit_.hit().is_prefix_hit()));
}
}
@@ -116,14 +117,14 @@ class HitSelector {
const Hit& hit = term_id_hit_pair.hit();
// Create a new exact hit with term_frequency as the sum of the term
// frequencies. The term frequency is capped at kMaxHitScore.
- Hit::Score final_score =
- std::min(static_cast<int>(Hit::kMaxHitScore),
- hit.score() + best_exact_hit_.hit().score());
- best_exact_hit_ =
- TermIdHitPair(term_id_hit_pair.term_id(),
- Hit(hit.section_id(), hit.document_id(), final_score,
- best_exact_hit_.hit().is_in_prefix_section(),
- best_exact_hit_.hit().is_prefix_hit()));
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ hit.term_frequency() + best_exact_hit_.hit().term_frequency());
+ best_exact_hit_ = TermIdHitPair(
+ term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+ best_exact_hit_.hit().is_in_prefix_section(),
+ best_exact_hit_.hit().is_prefix_hit()));
}
}
@@ -192,10 +193,10 @@ class HitComparator {
// {"foot", docid0, sectionid0}
// {"fool", docid0, sectionid0}
//
-// When two or more prefix hits are duplicates, merge into one hit with score as
-// the sum of the scores. If there is both an exact and prefix hit for the same
-// term, keep the exact hit as it is, update the prefix hit so that its score is
-// the sum of the scores.
+// When two or more prefix hits are duplicates, merge into one hit with term
+// frequency as the sum of the term frequencies. If there is both an exact and
+// prefix hit for the same term, keep the exact hit as it is, update the prefix
+// hit so that its term frequency is the sum of the term frequencies.
void DedupeHits(
std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec,
const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) {
@@ -278,7 +279,7 @@ MainIndexMerger::TranslateAndExpandLiteHits(
size_t offset = itr_prefixes->second.first;
size_t len = itr_prefixes->second.second;
size_t offset_end_exclusive = offset + len;
- Hit prefix_hit(hit.section_id(), hit.document_id(), hit.score(),
+ Hit prefix_hit(hit.section_id(), hit.document_id(), hit.term_frequency(),
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
for (; offset < offset_end_exclusive; ++offset) {
// Take the tvi (in the main lexicon) of each prefix term.
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 93f4576..8a2f691 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -86,10 +86,10 @@ TEST_F(MainIndexMergerTest, TranslateTermNotAdded) {
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -125,10 +125,10 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) {
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -139,7 +139,7 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) {
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1,
- Hit::kDefaultHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -173,7 +173,7 @@ TEST_F(MainIndexMergerTest, PrefixExpansion) {
TermIdHitPair(foo_term_id, doc1_prefix_hit)));
}
-TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
+TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentTermFrequencies) {
// 1. Index one doc in the Lite Index:
// - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
ICING_ASSERT_OK_AND_ASSIGN(
@@ -188,10 +188,11 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
@@ -201,9 +202,10 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_main_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- // The prefix hit for 'foot' should have the same score as the exact hit for
- // 'foot'. The final prefix hit has score equal to 58.
- Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/58,
+ // The prefix hit for 'foot' should have the same term frequency as the exact
+ // hit for 'foot'. The final prefix hit has term frequency equal to 58.
+ Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/58,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -223,7 +225,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
// c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
- // latter with score as the sum of the scores.
+ // latter with term frequency as the sum of the term frequencies.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
@@ -235,7 +237,7 @@ TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
TermIdHitPair(foo_main_term_id, doc0_prefix_hit)));
}
-TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
+TEST_F(MainIndexMergerTest, DedupeWithExactSameTermFrequencies) {
// 1. Index one doc in the Lite Index:
// - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
ICING_ASSERT_OK_AND_ASSIGN(
@@ -250,14 +252,15 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
- // The prefix hit should take the sum as score - 114.
- Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/114,
+ // The prefix hit should take the sum as term_frequency - 114.
+ Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/114,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/true);
@@ -285,7 +288,7 @@ TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
// c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
- // latter with score as the sum of the scores.
+ // latter with term frequency as the sum of the term frequencies.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
@@ -314,10 +317,11 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0,
- /*score=*/Hit::kMaxHitScore,
+ /*term_frequency=*/Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit));
@@ -327,9 +331,10 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- // The prefix hit should take the sum as score - 256, capped at kMaxHitScore.
+ // The prefix hit should take the sum as term frequency - 256, capped at
+ // kMaxTermFrequency.
Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
- /*score=*/Hit::kMaxHitScore,
+ /*term_frequency=*/Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -356,8 +361,8 @@ TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' and 'fool' to have hits for 'foo'
- // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as hit
- // score.
+ // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as
+ // term frequency.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index ff1c47a..636f631 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -579,7 +579,8 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
}
// A backfill hit is a prefix hit in a prefix section.
- const Hit backfill_hit(hit.section_id(), hit.document_id(), hit.score(),
+ const Hit backfill_hit(hit.section_id(), hit.document_id(),
+ hit.term_frequency(),
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/true);
if (backfill_hit == last_added_hit) {
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index 0f87b09..74139be 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -145,7 +145,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -182,7 +182,7 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForExactReturnsValidAccessor) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -219,18 +219,18 @@ TEST_F(MainIndexTest, MergeIndexToEmpty) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -292,18 +292,18 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -345,14 +345,14 @@ TEST_F(MainIndexTest, MergeIndexToPreexisting) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fall_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultHitScore,
+ Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit));
- Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultHitScore,
+ Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit));
@@ -404,15 +404,15 @@ TEST_F(MainIndexTest, ExactRetrievedInPrefixSearch) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit));
@@ -453,15 +453,15 @@ TEST_F(MainIndexTest, PrefixNotRetrievedInExactSearch) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit));
@@ -500,17 +500,17 @@ TEST_F(MainIndexTest, SearchChainedPostingLists) {
for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id,
- Hit::kDefaultHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id,
- Hit::kDefaultHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id,
- Hit::kDefaultHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
}
@@ -543,7 +543,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
@@ -570,7 +570,7 @@ TEST_F(MainIndexTest, MergeIndexBackfilling) {
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
diff --git a/icing/index/main/posting-list-accessor_test.cc b/icing/index/main/posting-list-accessor_test.cc
index 85f6d4a..a539fe4 100644
--- a/icing/index/main/posting-list-accessor_test.cc
+++ b/icing/index/main/posting-list-accessor_test.cc
@@ -82,7 +82,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPLKeepOnSameBlock) {
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add a single hit. This will fit in a min-sized posting list.
- Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultHitScore);
+ Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
@@ -324,14 +324,14 @@ TEST(PostingListAccessorStorageTest, HitsNotDecreasingReturnsInvalidArgument) {
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
- Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultHitScore);
+ Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
- Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultHitScore);
+ Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit3),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -364,7 +364,7 @@ TEST(PostingListAccessorStorageTest, PreexistingPostingListNoHitsAdded) {
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
diff --git a/icing/index/main/posting-list-used.cc b/icing/index/main/posting-list-used.cc
index a439c45..62e73e5 100644
--- a/icing/index/main/posting-list-used.cc
+++ b/icing/index/main/posting-list-used.cc
@@ -30,8 +30,8 @@ namespace lib {
namespace {
-uint32_t GetScoreByteSize(const Hit &hit) {
- return hit.has_score() ? sizeof(Hit::Score) : 0;
+uint32_t GetTermFrequencyByteSize(const Hit &hit) {
+ return hit.has_term_frequency() ? sizeof(Hit::TermFrequency) : 0;
}
} // namespace
@@ -153,21 +153,21 @@ libtextclassifier3::Status PostingListUsed::PrependHitToAlmostFull(
uint64_t delta = cur.value() - hit.value();
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = VarInt::Encode(delta, delta_buf);
- uint32_t cur_score_bytes = GetScoreByteSize(cur);
+ uint32_t cur_term_frequency_bytes = GetTermFrequencyByteSize(cur);
uint32_t pad_end = GetPadEnd(posting_list_utils::kSpecialHitsSize);
- if (pad_end >=
- posting_list_utils::kSpecialHitsSize + delta_len + cur_score_bytes) {
- // Pad area has enough space for delta and score of existing hit
- // (cur). Write delta at pad_end - delta_len - cur_score_bytes.
+ if (pad_end >= posting_list_utils::kSpecialHitsSize + delta_len +
+ cur_term_frequency_bytes) {
+ // Pad area has enough space for delta and term_frequency of existing hit
+ // (cur). Write delta at pad_end - delta_len - cur_term_frequency_bytes.
uint8_t *delta_offset =
- posting_list_buffer_ + pad_end - delta_len - cur_score_bytes;
+ posting_list_buffer_ + pad_end - delta_len - cur_term_frequency_bytes;
memcpy(delta_offset, delta_buf, delta_len);
- // Now copy score.
- Hit::Score score = cur.score();
- uint8_t *score_offset = delta_offset + delta_len;
- memcpy(score_offset, &score, cur_score_bytes);
+ // Now copy term_frequency.
+ Hit::TermFrequency term_frequency = cur.term_frequency();
+ uint8_t *term_frequency_offset = delta_offset + delta_len;
+ memcpy(term_frequency_offset, &term_frequency, cur_term_frequency_bytes);
// Now first hit is the new hit, at special position 1. Safe to ignore the
// return value because 1 < kNumSpecialHits.
@@ -224,12 +224,12 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
uint64_t delta = cur_value - hit.value();
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = VarInt::Encode(delta, delta_buf);
- uint32_t hit_score_bytes = GetScoreByteSize(hit);
+ uint32_t hit_term_frequency_bytes = GetTermFrequencyByteSize(hit);
// offset now points to one past the end of the first hit.
offset += sizeof(Hit::Value);
if (posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value) + delta_len +
- hit_score_bytes <=
+ hit_term_frequency_bytes <=
offset) {
// Enough space for delta in compressed area.
@@ -237,15 +237,15 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
offset -= delta_len;
memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
- // Prepend new hit with (possibly) its score. We know that there is room
- // for 'hit' because of the if statement above, so calling ValueOrDie is
- // safe.
+ // Prepend new hit with (possibly) its term_frequency. We know that there is
+ // room for 'hit' because of the if statement above, so calling ValueOrDie
+ // is safe.
offset = PrependHitUncompressed(hit, offset).ValueOrDie();
// offset is guaranteed to be valid here. So it's safe to ignore the return
// value. The if above will guarantee that offset >= kSpecialHitSize and <
// size_in_bytes_ because the if ensures that there is enough room between
// offset and kSpecialHitSize to fit the delta of the previous hit, any
- // score and the uncompressed hit.
+ // term_frequency and the uncompressed hit.
set_start_byte_offset(offset);
} else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) {
// Only have space for delta. The new hit must be put in special
@@ -273,14 +273,11 @@ libtextclassifier3::Status PostingListUsed::PrependHitToNotFull(
// move first hit to special position 1 and put new hit in
// special position 0.
Hit cur(cur_value);
- if (cur.has_score()) {
- // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
- // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
- // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
- // offset < size_in_bytes, so it is safe to call ValueOrDie here.
- cur = Hit(cur_value, ReadScore(offset).ValueOrDie());
- offset += sizeof(Hit::Score);
- }
+ // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
+ // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
+ // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
+ // offset < size_in_bytes, so it is safe to ignore the return value here.
+ ConsumeTermFrequencyIfPresent(&cur, &offset);
// Safe to ignore the return value of PadToEnd because offset must be less
// than size_in_bytes_. Otherwise, this function already would have returned
// FAILED_PRECONDITION.
@@ -437,18 +434,17 @@ libtextclassifier3::Status PostingListUsed::GetHitsInternal(
val += delta;
}
Hit hit(val);
- if (hit.has_score()) {
- auto score_or = ReadScore(offset);
- if (!score_or.ok()) {
- // This posting list has been corrupted somehow. The first hit of the
- // posting list claims to have a score, but there's no more room in the
- // posting list for that score to exist. Return an empty vector and zero
- // to indicate no hits retrieved.
+ libtextclassifier3::Status status =
+ ConsumeTermFrequencyIfPresent(&hit, &offset);
+ if (!status.ok()) {
+ // This posting list has been corrupted somehow. The first hit of the
+ // posting list claims to have a term frequency, but there's no more room
+ // in the posting list for that term frequency to exist. Return an empty
+ // vector and zero to indicate no hits retrieved.
+ if (out != nullptr) {
out->clear();
- return absl_ports::InternalError("Posting list has been corrupted!");
}
- hit = Hit(val, score_or.ValueOrDie());
- offset += sizeof(Hit::Score);
+ return absl_ports::InternalError("Posting list has been corrupted!");
}
if (out != nullptr) {
out->push_back(hit);
@@ -475,21 +471,21 @@ libtextclassifier3::Status PostingListUsed::GetHitsInternal(
offset -= sizeof(Hit::Value);
memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value));
} else {
- // val won't fit in compressed area. Also see if there is a score.
+ // val won't fit in compressed area. Also see if there is a
+ // term_frequency.
Hit hit(val);
- if (hit.has_score()) {
- auto score_or = ReadScore(offset);
- if (!score_or.ok()) {
- // This posting list has been corrupted somehow. The first hit of
- // the posting list claims to have a score, but there's no more room
- // in the posting list for that score to exist. Return an empty
- // vector and zero to indicate no hits retrieved. Do not pop
- // anything.
+ libtextclassifier3::Status status =
+ ConsumeTermFrequencyIfPresent(&hit, &offset);
+ if (!status.ok()) {
+ // This posting list has been corrupted somehow. The first hit of
+ // the posting list claims to have a term frequency, but there's no
+ // more room in the posting list for that term frequency to exist.
+ // Return an empty vector and zero to indicate no hits retrieved. Do
+ // not pop anything.
+ if (out != nullptr) {
out->clear();
- return absl_ports::InternalError(
- "Posting list has been corrupted!");
}
- hit = Hit(val, score_or.ValueOrDie());
+ return absl_ports::InternalError("Posting list has been corrupted!");
}
// Okay to ignore the return value here because 1 < kNumSpecialHits.
mutable_this->set_special_hit(1, hit);
@@ -640,7 +636,7 @@ bool PostingListUsed::set_start_byte_offset(uint32_t offset) {
libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed(
const Hit &hit, uint32_t offset) {
- if (hit.has_score()) {
+ if (hit.has_term_frequency()) {
if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Not enough room to prepend Hit at offset %d.", offset));
@@ -659,16 +655,23 @@ libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed(
return offset;
}
-libtextclassifier3::StatusOr<Hit::Score> PostingListUsed::ReadScore(
- uint32_t offset) const {
- if (offset + sizeof(Hit::Score) > size_in_bytes_) {
+libtextclassifier3::Status PostingListUsed::ConsumeTermFrequencyIfPresent(
+ Hit *hit, uint32_t *offset) const {
+ if (!hit->has_term_frequency()) {
+ // No term frequency to consume. Everything is fine.
+ return libtextclassifier3::Status::OK;
+ }
+ if (*offset + sizeof(Hit::TermFrequency) > size_in_bytes_) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"offset %d must not point past the end of the posting list of size %d.",
- offset, size_in_bytes_));
+ *offset, size_in_bytes_));
}
- Hit::Score score;
- memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score));
- return score;
+ Hit::TermFrequency term_frequency;
+ memcpy(&term_frequency, posting_list_buffer_ + *offset,
+ sizeof(Hit::TermFrequency));
+ *hit = Hit(hit->value(), term_frequency);
+ *offset += sizeof(Hit::TermFrequency);
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/index/main/posting-list-used.h b/icing/index/main/posting-list-used.h
index 8bc9c8d..1b2e24e 100644
--- a/icing/index/main/posting-list-used.h
+++ b/icing/index/main/posting-list-used.h
@@ -155,12 +155,12 @@ class PostingListUsed {
// starts somewhere between [kSpecialHitsSize, kSpecialHitsSize + sizeof(Hit)
// - 1] and ends at size_in_bytes - 1.
//
- // Hit scores are stored after the hit value, compressed or
+ // Hit term frequencies are stored after the hit value, compressed or
// uncompressed. For the first two special hits, we always have a
- // space for the score. For hits in the compressed area, we only have
- // the score following the hit value of hit.has_score() is true. This
- // allows good compression in the common case where hits don't have a
- // specific score.
+ // space for the term frequency. For hits in the compressed area, we only have
+ // the term frequency following the hit value of hit.has_term_frequency() is
+ // true. This allows good compression in the common case where hits don't have
+ // a valid term frequency.
//
// EXAMPLE
// Posting list storage. Posting list size: 20 bytes
@@ -175,7 +175,8 @@ class PostingListUsed {
// | 16 |Hit::kInvalidVal| 0x000 | 0x07FFF998 |
// +-------------+----------------+-----------------+----------------------+
//
- // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4, Score=125)
+ // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4,
+ // TermFrequency=125)
// (Hit 0x07FFF998 - Hit 0x07FFF684 = 788)
// +--bytes 0-4--+----- 5-9 ------+-- 10-12 --+-- 13-16 --+- 17 -+-- 18-19 --+
// | 13 |Hit::kInvalidVal| 0x000 | 0x07FFF684| 125 | 788 |
@@ -187,9 +188,9 @@ class PostingListUsed {
// | 9 |Hit::kInvVal| 0x00 |0x07FFF4D2| 434 | 125 | 788 |
// +-------------+------------+--------+----------+---------+------+---------+
//
- // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6, Score = 87)
- // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196)
- // ALMOST FULL!
+ // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6,
+ // TermFrequency = 87)
+ // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196) ALMOST FULL!
// +--bytes 0-4-+---- 5-9 ----+- 10-12 -+- 13-14 -+- 15-16 -+- 17 -+- 18-19 -+
// |Hit::kInvVal|0x07FFF40E,87| 0x000 | 196 | 434 | 125 | 788 |
// +-------------+------------+---------+---------+---------+------+---------+
@@ -302,13 +303,17 @@ class PostingListUsed {
libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed(
const Hit &hit, uint32_t offset);
- // Reads the score located at offset and returns it. Callers are responsible
- // for ensuring that the bytes starting at offset actually represent a score.
+ // If hit has a term frequency, consumes the term frequency at offset, updates
+ // hit to include the term frequency and updates offset to reflect that the
+ // term frequency has been consumed.
//
// RETURNS:
- // - The score located at offset, if successful
- // - INVALID_ARGUMENT if offset + sizeof(Hit::Score) >= size_in_bytes_
- libtextclassifier3::StatusOr<Hit::Score> ReadScore(uint32_t offset) const;
+ // - OK, if successful
+ // - INVALID_ARGUMENT if hit has a term frequency and offset +
+ // sizeof(Hit::TermFrequency) >=
+ // size_in_bytes_
+ libtextclassifier3::Status ConsumeTermFrequencyIfPresent(
+ Hit *hit, uint32_t *offset) const;
// A byte array of size size_in_bytes_ containing encoded hits for this
// posting list.
diff --git a/icing/index/main/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc
index f6b5898..044d0c1 100644
--- a/icing/index/main/posting-list-used_test.cc
+++ b/icing/index/main/posting-list-used_test.cc
@@ -73,37 +73,37 @@ TEST(PostingListTest, PostingListUsedPrependHitNotFull) {
static_cast<void *>(hits_buf.get()), kHitsSize));
// Make used.
- Hit hit0(/*section_id=*/0, 0, /*score=*/56);
+ Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
pl_used.PrependHit(hit0);
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
- Hit hit1(/*section_id=*/0, 1, Hit::kDefaultHitScore);
+ Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency);
pl_used.PrependHit(hit1);
// Size = sizeof(uncompressed hit1)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
- expected_size += 2 + sizeof(Hit::Score);
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+ expected_size += 2 + sizeof(Hit::TermFrequency);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
- Hit hit2(/*section_id=*/0, 2, /*score=*/56);
+ Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56);
pl_used.PrependHit(hit2);
// Size = sizeof(uncompressed hit2)
// + sizeof(hit1-hit2)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
expected_size += 2;
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
- Hit hit3(/*section_id=*/0, 3, Hit::kDefaultHitScore);
+ Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency);
pl_used.PrependHit(hit3);
// Size = sizeof(uncompressed hit3)
- // + sizeof(hit2-hit3) + sizeof(hit2::score)
+ // + sizeof(hit2-hit3) + sizeof(hit2::term_frequency)
// + sizeof(hit1-hit2)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
- expected_size += 2 + sizeof(Hit::Score);
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+ expected_size += 2 + sizeof(Hit::TermFrequency);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(),
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
@@ -122,7 +122,7 @@ TEST(PostingListTest, PostingListUsedPrependHitAlmostFull) {
// Adding hit0: EMPTY -> NOT_FULL
// Adding hit1: NOT_FULL -> NOT_FULL
// Adding hit2: NOT_FULL -> NOT_FULL
- Hit hit0(/*section_id=*/0, 0, Hit::kDefaultHitScore);
+ Hit hit0(/*section_id=*/0, 0, Hit::kDefaultTermFrequency);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
@@ -189,7 +189,8 @@ TEST(PostingListTest, PostingListUsedMinSize) {
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty()));
// Add a hit, PL should shift to ALMOST_FULL state
- Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
+ Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
// Size = sizeof(uncompressed hit0)
@@ -197,9 +198,10 @@ TEST(PostingListTest, PostingListUsedMinSize) {
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
- // Add the smallest hit possible - no score and a delta of 1. PL should shift
- // to FULL state.
- Hit hit1(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/true,
+ // Add the smallest hit possible - no term_frequency and a delta of 1. PL
+ // should shift to FULL state.
+ Hit hit1(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/true,
/*is_prefix_hit=*/false);
ICING_EXPECT_OK(pl_used.PrependHit(hit1));
// Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
@@ -208,7 +210,8 @@ TEST(PostingListTest, PostingListUsedMinSize) {
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
// Try to add the smallest hit possible. Should fail
- Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
+ Hit hit2(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false);
EXPECT_THAT(pl_used.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -227,7 +230,7 @@ TEST(PostingListTest, PostingListPrependHitArrayMinSizePostingList) {
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
@@ -268,7 +271,7 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kDefaultHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
@@ -332,7 +335,7 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
// 14-11 Hit #11
// 10 <unused>
// 9-5 kSpecialHit
- // 4-0 Offset=22
+ // 4-0 Offset=11
// ----------------------
byte_size += 11;
@@ -423,9 +426,9 @@ TEST(PostingListTest, PostingListPrependHitArrayPostingList) {
TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) {
static constexpr int kNumHits = 128;
static constexpr int kDeltaSize = 1;
- static constexpr int kScoreSize = 1;
+ static constexpr int kTermFrequencySize = 1;
static constexpr size_t kHitsSize =
- ((kNumHits * (kDeltaSize + kScoreSize)) / 5) * 5;
+ ((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
@@ -654,5 +657,56 @@ TEST(PostingListTest, MoveToPostingListTooSmall) {
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
+TEST(PostingListTest, PopHitsWithScores) {
+ int size = 2 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+
+ // This posting list is 20-bytes. Create four hits that will have deltas of
+ // two bytes each and all of whom will have a non-default score. This posting
+ // list will be almost_full.
+ //
+ // ----------------------
+ // 19 score(Hit #0)
+ // 18-17 delta(Hit #0)
+ // 16 score(Hit #1)
+ // 15-14 delta(Hit #1)
+ // 13 score(Hit #2)
+ // 12-11 delta(Hit #2)
+ // 10 <unused>
+ // 9-5 Hit #3
+ // 4-0 kInvalidHitVal
+ // ----------------------
+ Hit hit0(/*section_id=*/0, /*document_id=*/0, /*score=*/5);
+ Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
+ Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
+ Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2);
+ ICING_ASSERT_OK(pl_used.PrependHit(hit0));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit1));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit2));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit3));
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out, pl_used.GetHits());
+ EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0));
+
+ // Now, pop the last hit. The posting list should contain the first three
+ // hits.
+ //
+ // ----------------------
+ // 19 score(Hit #0)
+ // 18-17 delta(Hit #0)
+ // 16 score(Hit #1)
+ // 15-14 delta(Hit #1)
+ // 13-10 <unused>
+ // 9-5 Hit #2
+ // 4-0 kInvalidHitVal
+ // ----------------------
+ ICING_ASSERT_OK(pl_used.PopFrontHits(1));
+ ICING_ASSERT_OK_AND_ASSIGN(hits_out, pl_used.GetHits());
+ EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index 1e20340..a18a183 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -31,6 +31,11 @@
#include "icing/util/status-macros.h"
namespace {
+
+// JNI string constants
+// Matches field name of IcingSearchEngine#nativePointer.
+const char kNativePointerField[] = "nativePointer";
+
bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
google::protobuf::MessageLite* protobuf) {
int bytes_size = env->GetArrayLength(bytes);
@@ -58,8 +63,11 @@ jbyteArray SerializeProtoToJniByteArray(
return ret;
}
-icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(
- jlong native_pointer) {
+icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
+ jobject object) {
+ jclass cls = env->GetObjectClass(object);
+ jfieldID field_id = env->GetFieldID(cls, kNativePointerField, "J");
+ jlong native_pointer = env->GetLongField(object, field_id);
return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer);
}
@@ -99,17 +107,17 @@ Java_com_google_android_icing_IcingSearchEngine_nativeCreate(
JNIEXPORT void JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDestroy(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
delete icing;
}
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::InitializeResultProto initialize_result_proto =
icing->Initialize();
@@ -119,10 +127,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
- JNIEnv* env, jclass clazz, jlong native_pointer, jbyteArray schema_bytes,
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
jboolean ignore_errors_and_delete_documents) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SchemaProto schema_proto;
if (!ParseProtoFromJniByteArray(env, schema_bytes, &schema_proto)) {
@@ -138,9 +146,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetSchemaResultProto get_schema_result_proto = icing->GetSchema();
@@ -149,9 +157,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+ JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_schema_type =
env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
@@ -163,10 +171,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativePut(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray document_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::DocumentProto document_proto;
if (!ParseProtoFromJniByteArray(env, document_bytes, &document_proto)) {
@@ -182,10 +189,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativePut(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGet(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space,
jstring uri) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -198,10 +205,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGet(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray usage_report_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::UsageReport usage_report;
if (!ParseProtoFromJniByteArray(env, usage_report_bytes, &usage_report)) {
@@ -217,9 +223,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto =
icing->GetAllNamespaces();
@@ -229,9 +235,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jlong native_pointer, jlong next_page_token) {
+ JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchResultProto next_page_result_proto =
icing->GetNextPage(next_page_token);
@@ -241,9 +247,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage(
JNIEXPORT void JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jlong native_pointer, jlong next_page_token) {
+ JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing->InvalidateNextPageToken(next_page_token);
@@ -252,11 +258,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes,
- jbyteArray result_spec_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
+ jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchSpecProto search_spec_proto;
if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
@@ -285,10 +290,10 @@ Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space,
jstring uri) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -301,9 +306,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space) {
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -315,9 +320,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+ JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_schema_type =
env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
@@ -329,17 +334,16 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray search_spec_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchSpecProto search_spec_proto;
if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
ICING_LOG(ERROR) << "Failed to parse SearchSpecProto in nativeSearch";
return nullptr;
}
- icing::lib::DeleteResultProto delete_result_proto =
+ icing::lib::DeleteByQueryResultProto delete_result_proto =
icing->DeleteByQuery(search_spec_proto);
return SerializeProtoToJniByteArray(env, delete_result_proto);
@@ -347,9 +351,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::PersistToDiskResultProto persist_to_disk_result_proto =
icing->PersistToDisk();
@@ -359,9 +363,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::OptimizeResultProto optimize_result_proto = icing->Optimize();
@@ -370,9 +374,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetOptimizeInfoResultProto get_optimize_info_result_proto =
icing->GetOptimizeInfo();
@@ -382,9 +386,9 @@ Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeReset(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::ResetResultProto reset_result_proto = icing->Reset();
diff --git a/icing/result/page-result-state.h b/icing/result/page-result-state.h
index 85f1dd7..5932b56 100644
--- a/icing/result/page-result-state.h
+++ b/icing/result/page-result-state.h
@@ -31,12 +31,13 @@ struct PageResultState {
uint64_t next_page_token_in,
SnippetContext snippet_context_in,
std::unordered_map<std::string, ProjectionTree> tree_map,
- int num_previously_returned_in)
+ int num_previously_returned_in, int num_per_page_in)
: scored_document_hits(std::move(scored_document_hits_in)),
next_page_token(next_page_token_in),
snippet_context(std::move(snippet_context_in)),
projection_tree_map(std::move(tree_map)),
- num_previously_returned(num_previously_returned_in) {}
+ num_previously_returned(num_previously_returned_in),
+ requested_page_size(num_per_page_in) {}
// Results of one page
std::vector<ScoredDocumentHit> scored_document_hits;
@@ -52,6 +53,10 @@ struct PageResultState {
// Number of results that have been returned in previous pages.
int num_previously_returned;
+
+ // The page size for this query. This should always be >=
+ // scored_document_hits.size();
+ int requested_page_size;
};
} // namespace lib
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
index 7ace295..a87a8fc 100644
--- a/icing/result/projection-tree.h
+++ b/icing/result/projection-tree.h
@@ -26,6 +26,8 @@ namespace lib {
class ProjectionTree {
public:
+ static constexpr std::string_view kSchemaTypeWildcard = "*";
+
struct Node {
explicit Node(std::string_view name = "") : name(name) {}
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index ff6320b..0b8ad4a 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc
@@ -31,7 +31,7 @@ namespace lib {
namespace {
void Project(const std::vector<ProjectionTree::Node>& projection_tree,
- proto2::RepeatedPtrField<PropertyProto>* properties) {
+ google::protobuf::RepeatedPtrField<PropertyProto>* properties) {
int num_kept = 0;
for (int cur_pos = 0; cur_pos < properties->size(); ++cur_pos) {
PropertyProto* prop = properties->Mutable(cur_pos);
@@ -97,6 +97,9 @@ ResultRetriever::RetrieveResults(
remaining_num_to_snippet = 0;
}
+ auto wildcard_projection_tree_itr =
+ page_result_state.projection_tree_map.find(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
for (const auto& scored_document_hit :
page_result_state.scored_document_hits) {
libtextclassifier3::StatusOr<DocumentProto> document_or =
@@ -118,9 +121,14 @@ ResultRetriever::RetrieveResults(
// Apply projection
auto itr = page_result_state.projection_tree_map.find(
document_or.ValueOrDie().schema());
+
if (itr != page_result_state.projection_tree_map.end()) {
Project(itr->second.root().children,
document_or.ValueOrDie().mutable_properties());
+ } else if (wildcard_projection_tree_itr !=
+ page_result_state.projection_tree_map.end()) {
+ Project(wildcard_projection_tree_itr->second.root().children,
+ document_or.ValueOrDie().mutable_properties());
}
SearchResultProto::ResultProto result;
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 82e32ee..98cc75a 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -30,6 +30,7 @@
#include "icing/proto/term.pb.h"
#include "icing/result/projection-tree.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -91,7 +92,7 @@ class ResultRetrieverTest : public testing::Test {
type->set_schema_type("Email");
auto* subj = type->add_properties();
- subj->set_property_name("subject");
+ subj->set_property_name("name");
subj->set_data_type(PropertyConfigProto::DataType::STRING);
subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
subj->mutable_string_indexing_config()->set_term_match_type(
@@ -136,6 +137,25 @@ class ResultRetrieverTest : public testing::Test {
return schema;
}
+ SectionId GetSectionId(const std::string& type, const std::string& property) {
+ auto type_id_or = schema_store_->GetSchemaTypeId(type);
+ if (!type_id_or.ok()) {
+ return kInvalidSectionId;
+ }
+ SchemaTypeId type_id = type_id_or.ValueOrDie();
+ for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+ auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+ if (!metadata_or.ok()) {
+ break;
+ }
+ const SectionMetadata* metadata = metadata_or.ValueOrDie();
+ if (metadata->path == property) {
+ return metadata->id;
+ }
+ }
+ return kInvalidSectionId;
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
@@ -156,12 +176,20 @@ DocumentProto CreateDocument(int id) {
return DocumentBuilder()
.SetKey("icing", "Email/" + std::to_string(id))
.SetSchema("Email")
- .AddStringProperty("subject", "subject foo " + std::to_string(id))
+ .AddStringProperty("name", "subject foo " + std::to_string(id))
.AddStringProperty("body", "body bar " + std::to_string(id))
.SetCreationTimestampMs(1574365086666 + id)
.Build();
}
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+ SectionIdMask mask = 0;
+ for (SectionId section_id : section_ids) {
+ mask |= (1u << section_id);
+ }
+ return mask;
+}
+
TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) {
EXPECT_THAT(
ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(),
@@ -204,10 +232,13 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -228,7 +259,8 @@ TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
@@ -249,10 +281,13 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
doc_store->Put(CreateDocument(/*id=*/2)));
DocumentId invalid_document_id = -1;
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {invalid_document_id, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -272,7 +307,8 @@ TEST_F(ResultRetrieverTest, IgnoreErrors) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
@@ -292,10 +328,13 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
doc_store->Put(CreateDocument(/*id=*/2)));
DocumentId invalid_document_id = -1;
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {invalid_document_id, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -310,16 +349,16 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
DocumentId non_existing_document_id = 4;
page_result_state.scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {non_existing_document_id, /*hit_section_id_mask=*/0b00001001,
- /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {non_existing_document_id, hit_section_id_mask, /*score=*/0}};
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -340,9 +379,12 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(CreateDocument(/*id=*/2)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -358,7 +400,8 @@ TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -378,10 +421,13 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -395,7 +441,8 @@ TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> results,
result_retriever->RetrieveResults(page_result_state));
@@ -423,10 +470,13 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -439,16 +489,16 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(
- GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
+ Eq("subject foo 1"));
+ EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
Eq("body bar 1"));
@@ -456,10 +506,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
Eq("bar"));
EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(
- GetWindow(result[1].document(), result[1].snippet(), "subject", 0),
- Eq("subject foo 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0),
+ Eq("subject foo 2"));
+ EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
Eq("body bar 2"));
@@ -467,10 +516,9 @@ TEST_F(ResultRetrieverTest, SimpleSnippeted) {
Eq("bar"));
EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(
- GetWindow(result[2].document(), result[2].snippet(), "subject", 0),
- Eq("subject foo 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0),
+ Eq("subject foo 3"));
+ EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
Eq("body bar 3"));
@@ -496,10 +544,13 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
snippet_spec.set_num_to_snippet(1);
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -511,16 +562,16 @@ TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(
- GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
+ Eq("subject foo 1"));
+ EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
Eq("body bar 1"));
@@ -551,10 +602,13 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -569,7 +623,8 @@ TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
@@ -598,10 +653,13 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -616,7 +674,8 @@ TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/3);
+ /*num_previously_returned_in=*/3,
+ /*num_per_page_in=*/3);
// num_to_snippet = 5, num_previously_returned_in = 3,
// We can return 5 - 3 = 2 snippets.
@@ -644,10 +703,13 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -662,7 +724,8 @@ TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context),
std::unordered_map<std::string, ProjectionTree>(),
- /*num_previously_returned_in=*/6);
+ /*num_previously_returned_in=*/6,
+ /*num_per_page_in=*/3);
// num_to_snippet = 5, num_previously_returned_in = 6,
// We can't return any snippets for this page.
@@ -689,7 +752,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -701,7 +764,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
@@ -709,13 +772,16 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
doc_store->Put(document_two));
// 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("subject");
+ type_property_mask.add_paths("name");
std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
type_projection_tree_map.insert(
{"Email", ProjectionTree(type_property_mask)});
@@ -727,14 +793,15 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
language_segmenter_.get(), normalizer_.get()));
- // 3. Verify that the returned results only contain the 'subject' property.
+ // 3. Verify that the returned results only contain the 'name' property.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
@@ -745,7 +812,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.Build();
EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
@@ -754,7 +821,7 @@ TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.Build();
EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
}
@@ -781,7 +848,7 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
.AddStringProperty("name", "Meg Ryan")
.AddStringProperty("emailAddress", "shopgirl@aol.com")
.Build())
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -800,17 +867,19 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
.AddStringProperty("name", "Tom Hanks")
.AddStringProperty("emailAddress", "ny152@aol.com")
.Build())
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(document_two));
- // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
@@ -826,7 +895,8 @@ TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -891,7 +961,7 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
.AddStringProperty("name", "Meg Ryan")
.AddStringProperty("emailAddress", "shopgirl@aol.com")
.Build())
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -910,17 +980,19 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
.AddStringProperty("name", "Tom Hanks")
.AddStringProperty("emailAddress", "ny152@aol.com")
.Build())
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(document_two));
- // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
@@ -936,7 +1008,8 @@ TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -1004,7 +1077,7 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
.AddStringProperty("name", "Meg Ryan")
.AddStringProperty("emailAddress", "shopgirl@aol.com")
.Build())
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -1023,7 +1096,7 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
.AddStringProperty("name", "Tom Hanks")
.AddStringProperty("emailAddress", "ny152@aol.com")
.Build())
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
@@ -1031,9 +1104,12 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
doc_store->Put(document_two));
// 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
@@ -1050,7 +1126,8 @@ TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -1110,7 +1187,7 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -1122,7 +1199,7 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
@@ -1130,9 +1207,12 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
doc_store->Put(document_two));
// 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
@@ -1147,7 +1227,8 @@ TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -1189,7 +1270,7 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -1201,7 +1282,7 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
@@ -1209,9 +1290,12 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
doc_store->Put(document_two));
// 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
@@ -1227,7 +1311,8 @@ TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -1269,7 +1354,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.AddStringProperty(
"body", "Oh what a beautiful morning! Oh what a beautiful day!")
.Build();
@@ -1281,7 +1366,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
.AddStringProperty("body",
"Count all the sheep and tell them 'Hello'.")
.Build();
@@ -1289,13 +1374,16 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
doc_store->Put(document_two));
// 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00001001, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00001001, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ResultSpecProto::TypePropertyMask type_property_mask;
type_property_mask.set_schema_type("Email");
- type_property_mask.add_paths("subject");
+ type_property_mask.add_paths("name");
type_property_mask.add_paths("nonExistentProperty");
std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
type_projection_tree_map.insert(
@@ -1308,14 +1396,15 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
std::move(snippet_context), std::move(type_projection_tree_map),
- /*num_previously_returned_in=*/0);
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
language_segmenter_.get(), normalizer_.get()));
- // 3. Verify that the returned results only contain the 'subject' property.
+ // 3. Verify that the returned results only contain the 'name' property.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
@@ -1326,7 +1415,7 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
.SetKey("namespace", "uri1")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty("name", "Hello World!")
.Build();
EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
@@ -1335,8 +1424,498 @@ TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
.SetKey("namespace", "uri2")
.SetCreationTimestampMs(1000)
.SetSchema("Email")
- .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesNoWildcards) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results have all of their properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
.Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcard) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcardWithOneOverride) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("body");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'body'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionSingleTypesWildcardAndOverride) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("sender.name");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest,
+ ProjectionSingleTypesWildcardAndOverrideNestedProperty) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("sender.name");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("sender");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results contain no properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .Build();
EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
}
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index 4488409..0f27d9e 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -39,6 +39,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
// Gets the number before calling GetNextPage() because num_returned() may
// change after returning more results.
int num_previously_returned = result_state.num_returned();
+ int num_per_page = result_state.num_per_page();
std::vector<ScoredDocumentHit> page_result_document_hits =
result_state.GetNextPage();
@@ -52,7 +53,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
return PageResultState(
std::move(page_result_document_hits), kInvalidNextPageToken,
std::move(snippet_context_copy), std::move(projection_tree_map_copy),
- num_previously_returned);
+ num_previously_returned, num_per_page);
}
absl_ports::unique_lock l(&mutex_);
@@ -63,7 +64,7 @@ ResultStateManager::RankAndPaginate(ResultState result_state) {
return PageResultState(std::move(page_result_document_hits), next_page_token,
std::move(snippet_context_copy),
std::move(projection_tree_map_copy),
- num_previously_returned);
+ num_previously_returned, num_per_page);
}
uint64_t ResultStateManager::Add(ResultState result_state) {
@@ -88,6 +89,7 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
}
int num_returned = state_iterator->second.num_returned();
+ int num_per_page = state_iterator->second.num_per_page();
std::vector<ScoredDocumentHit> result_of_page =
state_iterator->second.GetNextPage();
if (result_of_page.empty()) {
@@ -110,9 +112,9 @@ libtextclassifier3::StatusOr<PageResultState> ResultStateManager::GetNextPage(
next_page_token = kInvalidNextPageToken;
}
- return PageResultState(result_of_page, next_page_token,
- std::move(snippet_context_copy),
- std::move(projection_tree_map_copy), num_returned);
+ return PageResultState(
+ result_of_page, next_page_token, std::move(snippet_context_copy),
+ std::move(projection_tree_map_copy), num_returned, num_per_page);
}
void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index de36b40..be92b85 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -15,6 +15,7 @@
#ifndef ICING_RESULT_RESULT_STATE_H_
#define ICING_RESULT_RESULT_STATE_H_
+#include <iostream>
#include <vector>
#include "icing/proto/scoring.pb.h"
@@ -60,6 +61,8 @@ class ResultState {
return projection_tree_map_;
}
+ int num_per_page() const { return num_per_page_; }
+
// The number of results that have already been returned. This number is
// increased when GetNextPage() is called.
int num_returned() const { return num_returned_; }
diff --git a/icing/store/corpus-id.h b/icing/store/corpus-id.h
new file mode 100644
index 0000000..a8f21ba
--- /dev/null
+++ b/icing/store/corpus-id.h
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_CORPUS_ID_H_
+#define ICING_STORE_CORPUS_ID_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+// Identifier for corpus, i.e. a <namespace, schema_type> pair>, in
+// DocumentProto. Generated in DocumentStore.
+using CorpusId = int32_t;
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_CORPUS_ID_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index ce41715..6a664a3 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -37,9 +37,11 @@
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/corpus-id.h"
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/enable-bm25f.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
#include "icing/util/clock.h"
@@ -62,12 +64,14 @@ constexpr char kScoreCacheFilename[] = "score_cache";
constexpr char kFilterCacheFilename[] = "filter_cache";
constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
constexpr char kUsageStoreDirectoryName[] = "usage_store";
+constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB
// 384 KiB for a KeyMapper would allow each internal array to have a max of
// 128 KiB for storage.
constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB
+constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB
DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
DocumentWrapper document_wrapper;
@@ -130,6 +134,10 @@ std::string MakeUsageStoreDirectoryName(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
}
+std::string MakeCorpusMapperFilename(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
+}
+
// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
// overhead per key. As we know that these fingerprints are always 8-bytes in
@@ -358,6 +366,13 @@ libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
usage_store_,
UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+ if (enableBm25f()) {
+ ICING_ASSIGN_OR_RETURN(
+ corpus_mapper_, KeyMapper<CorpusId>::Create(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize));
+ }
+
// Ensure the usage store is the correct size.
ICING_RETURN_IF_ERROR(
usage_store_->TruncateTo(document_id_mapper_->num_elements()));
@@ -377,6 +392,9 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
ICING_RETURN_IF_ERROR(ResetFilterCache());
ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+ if (enableBm25f()) {
+ ICING_RETURN_IF_ERROR(ResetCorpusMapper());
+ }
// Creates a new UsageStore instance. Note that we don't reset the data in
// usage store here because we're not able to regenerate the usage scores.
@@ -518,6 +536,14 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
namespace_mapper_->num_keys()));
+ if (enableBm25f()) {
+ // Update corpus maps
+ std::string corpus =
+ MakeFingerprint(document_wrapper.document().namespace_(),
+ document_wrapper.document().schema());
+ corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys());
+ }
+
int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
document_wrapper.document().creation_timestamp_ms(),
document_wrapper.document().ttl_ms());
@@ -644,6 +670,27 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
+ if (enableBm25f()) {
+ // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+ corpus_mapper_.reset();
+ // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+ // that can support error logging.
+ libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_));
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << status.error_message()
+ << "Failed to delete old corpus_id mapper";
+ return status;
+ }
+ ICING_ASSIGN_OR_RETURN(
+ corpus_mapper_, KeyMapper<CorpusId>::Create(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
Crc32 total_checksum;
@@ -697,6 +744,10 @@ libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
total_checksum.Append(std::to_string(score_cache_checksum.Get()));
total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
+ if (enableBm25f()) {
+ Crc32 corpus_mapper_checksum = corpus_mapper_->ComputeChecksum();
+ total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
+ }
return total_checksum;
}
@@ -787,6 +838,12 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
NamespaceId namespace_id,
namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
+ if (enableBm25f()) {
+ // Update corpus maps
+ ICING_RETURN_IF_ERROR(corpus_mapper_->GetOrPut(
+ MakeFingerprint(name_space, schema), corpus_mapper_->num_keys()));
+ }
+
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
schema_store_->GetSchemaTypeId(schema));
@@ -1029,6 +1086,14 @@ libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
return namespace_mapper_->Get(name_space);
}
+libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
+ const std::string_view name_space, const std::string_view schema) const {
+ if (enableBm25f()) {
+ return corpus_mapper_->Get(MakeFingerprint(name_space, schema));
+ }
+ return absl_ports::NotFoundError("corpus_mapper disabled");
+}
+
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
auto score_data_or = score_cache_->Get(document_id);
@@ -1077,17 +1142,18 @@ libtextclassifier3::Status DocumentStore::ReportUsage(
return usage_store_->AddUsageReport(usage_report, document_id);
}
-libtextclassifier3::Status DocumentStore::DeleteByNamespace(
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
std::string_view name_space, bool soft_delete) {
+ DeleteByGroupResult result;
auto namespace_id_or = namespace_mapper_->Get(name_space);
if (!namespace_id_or.ok()) {
- return absl_ports::Annotate(
+ result.status = absl_ports::Annotate(
namespace_id_or.status(),
absl_ports::StrCat("Failed to find namespace: ", name_space));
+ return result;
}
NamespaceId namespace_id = namespace_id_or.ValueOrDie();
- int num_updated_documents = 0;
if (soft_delete) {
// To delete an entire namespace, we append a tombstone that only contains
// the deleted bit and the name of the deleted namespace.
@@ -1100,36 +1166,43 @@ libtextclassifier3::Status DocumentStore::DeleteByNamespace(
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete namespace. namespace = "
<< name_space;
- return status;
+ result.status = std::move(status);
+ return result;
}
}
- ICING_ASSIGN_OR_RETURN(
- num_updated_documents,
- BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete));
+ auto num_deleted_or =
+ BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete);
+ if (!num_deleted_or.ok()) {
+ result.status = std::move(num_deleted_or).status();
+ return result;
+ }
- if (num_updated_documents <= 0) {
+ result.num_docs_deleted = num_deleted_or.ValueOrDie();
+ if (result.num_docs_deleted <= 0) {
// Treat the fact that no existing documents had this namespace to be the
// same as this namespace not existing at all.
- return absl_ports::NotFoundError(
+ result.status = absl_ports::NotFoundError(
absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
+ return result;
}
- return libtextclassifier3::Status::OK;
+ return result;
}
-libtextclassifier3::Status DocumentStore::DeleteBySchemaType(
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
std::string_view schema_type, bool soft_delete) {
+ DeleteByGroupResult result;
auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
if (!schema_type_id_or.ok()) {
- return absl_ports::Annotate(
+ result.status = absl_ports::Annotate(
schema_type_id_or.status(),
absl_ports::StrCat("Failed to find schema type. schema_type: ",
schema_type));
+ return result;
}
SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
- int num_updated_documents = 0;
if (soft_delete) {
// To soft-delete an entire schema type, we append a tombstone that only
// contains the deleted bit and the name of the deleted schema type.
@@ -1142,20 +1215,26 @@ libtextclassifier3::Status DocumentStore::DeleteBySchemaType(
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete schema_type. schema_type = "
<< schema_type;
- return status;
+ result.status = std::move(status);
+ return result;
}
}
- ICING_ASSIGN_OR_RETURN(
- num_updated_documents,
- BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete));
+ auto num_deleted_or =
+ BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete);
+ if (!num_deleted_or.ok()) {
+ result.status = std::move(num_deleted_or).status();
+ return result;
+ }
- if (num_updated_documents <= 0) {
- return absl_ports::NotFoundError(absl_ports::StrCat(
+ result.num_docs_deleted = num_deleted_or.ValueOrDie();
+ if (result.num_docs_deleted <= 0) {
+ result.status = absl_ports::NotFoundError(absl_ports::StrCat(
"No documents found with schema type '", schema_type, "'"));
+ return result;
}
- return libtextclassifier3::Status::OK;
+ return result;
}
libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
@@ -1230,6 +1309,10 @@ libtextclassifier3::Status DocumentStore::PersistToDisk() {
ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
+ if (enableBm25f()) {
+ ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
+ }
+
// Update the combined checksum and write to header file.
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -1251,9 +1334,16 @@ libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
namespace_mapper_->GetDiskUsage());
- return document_log_disk_usage + document_key_mapper_disk_usage +
- document_id_mapper_disk_usage + score_cache_disk_usage +
- filter_cache_disk_usage + namespace_mapper_disk_usage;
+ int64_t disk_usage = document_log_disk_usage +
+ document_key_mapper_disk_usage +
+ document_id_mapper_disk_usage + score_cache_disk_usage +
+ filter_cache_disk_usage + namespace_mapper_disk_usage;
+ if (enableBm25f()) {
+ ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
+ corpus_mapper_->GetDiskUsage());
+ disk_usage += corpus_mapper_disk_usage;
+ }
+ return disk_usage;
}
libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 114fa13..78590a5 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -30,6 +30,7 @@
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/corpus-id.h"
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
@@ -71,6 +72,15 @@ class DocumentStore {
int32_t optimizable_docs = 0;
};
+ struct DeleteByGroupResult {
+ // Status representing whether or not the operation succeeded. See the
+ // comments above the function that returns this result to determine what
+ // possible statuses could be returned.
+ libtextclassifier3::Status status;
+
+ int num_docs_deleted = 0;
+ };
+
struct CreateResult {
// A successfully initialized document store.
std::unique_ptr<DocumentStore> document_store;
@@ -221,6 +231,15 @@ class DocumentStore {
libtextclassifier3::StatusOr<NamespaceId> GetNamespaceId(
std::string_view name_space) const;
+ // Returns the CorpusId associated with the given namespace and schema.
+ //
+ // Returns:
+ // A CorpusId on success
+ // NOT_FOUND if the key doesn't exist
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<CorpusId> GetCorpusId(
+ const std::string_view name_space, const std::string_view schema) const;
+
// Returns the DocumentAssociatedScoreData of the document specified by the
// DocumentId.
//
@@ -284,8 +303,8 @@ class DocumentStore {
// OK on success
// NOT_FOUND if namespace doesn't exist
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status DeleteByNamespace(std::string_view name_space,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteByNamespace(std::string_view name_space,
+ bool soft_delete = false);
// Deletes all documents belonging to the given schema type. The documents
// will be marked as deleted if 'soft_delete' is true, otherwise they will be
@@ -301,8 +320,8 @@ class DocumentStore {
// OK on success
// NOT_FOUND if schema_type doesn't exist
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status DeleteBySchemaType(std::string_view schema_type,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type,
+ bool soft_delete = false);
// Syncs all the data and metadata changes to disk.
//
@@ -438,6 +457,12 @@ class DocumentStore {
// DocumentStore. Namespaces may be removed from the mapper during compaction.
std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
+ // Maps a corpus, i.e. a (namespace, schema type) pair, to a densely-assigned
+ // unique id. A coprus is assigned an
+ // id when the first document belonging to that corpus is added to the
+ // DocumentStore. Corpus ids may be removed from the mapper during compaction.
+ std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_;
+
// A storage class that caches all usage scores. Usage scores are not
// considered as ground truth. Usage scores are associated with document ids
// so they need to be updated when document ids change.
@@ -503,6 +528,12 @@ class DocumentStore {
// Returns OK or any IO errors.
libtextclassifier3::Status ResetNamespaceMapper();
+ // Resets the unique_ptr to the corpus_mapper, deletes the underlying file,
+ // and re-creates a new instance of the corpus_mapper.
+ //
+ // Returns OK or any IO errors.
+ libtextclassifier3::Status ResetCorpusMapper();
+
// Checks if the header exists already. This does not create the header file
// if it doesn't exist.
bool HeaderExists();
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 4d8ac10..29bf8bb 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -33,9 +33,12 @@
#include "icing/schema/schema-store.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/enable-bm25f.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/platform.h"
+#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/crc32.h"
@@ -98,6 +101,7 @@ class DocumentStoreTest : public ::testing::Test {
}
void SetUp() override {
+ setEnableBm25f(true);
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
@@ -471,8 +475,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
@@ -513,8 +519,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) {
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
@@ -538,8 +546,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/true),
+ EXPECT_THAT(doc_store
+ ->DeleteByNamespace("nonexistent_namespace",
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -560,8 +570,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/false),
+ EXPECT_THAT(doc_store
+ ->DeleteByNamespace("nonexistent_namespace",
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -584,8 +596,10 @@ TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteByNamespace(test_document1_.namespace_(),
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -604,8 +618,10 @@ TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) {
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteByNamespace(test_document1_.namespace_(),
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -642,7 +658,10 @@ TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
@@ -741,8 +760,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) {
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -753,8 +774,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) {
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true));
+ group_result =
+ document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -824,8 +847,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("email", /*soft_delete=*/false));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -836,8 +861,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("message", /*soft_delete=*/false));
+ group_result =
+ document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -861,8 +888,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType("nonexistent_type",
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -884,8 +913,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType("nonexistent_type",
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -906,8 +937,10 @@ TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType(test_document1_.schema(),
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -923,8 +956,10 @@ TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType(test_document1_.schema(),
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -973,7 +1008,10 @@ TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
document_store->Put(message_document));
// Delete "email". "message" documents should still be retrievable.
- ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
@@ -1054,7 +1092,10 @@ TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
document_store->Put(message_document));
// Delete "email". "message" documents should still be retrievable.
- ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1461,6 +1502,12 @@ TEST_F(DocumentStoreTest, GetNamespaceId) {
// DocumentStore
EXPECT_THAT(doc_store->GetNamespaceId("namespace2"), IsOkAndHolds(Eq(1)));
+ // DELETE namespace1 - document_namespace1 is deleted.
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
// NamespaceMapper doesn't care if the document has been deleted
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
@@ -1497,6 +1544,106 @@ TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
+TEST_F(DocumentStoreTest, GetCorpusIdReturnsNotFoundWhenFeatureIsDisabled) {
+ setEnableBm25f(false);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("corpus_mapper disabled")));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ // NamespaceId of 0 since it was the first namespace seen by the DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
+ IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document_corpus1 =
+ DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+ DocumentProto document_corpus2 =
+ DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus1)));
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus2)));
+
+ // CorpusId of 0 since it was the first corpus seen by the DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace1", "email"),
+ IsOkAndHolds(Eq(0)));
+
+ // CorpusId of 1 since it was the second corpus seen by the
+ // DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace2", "email"),
+ IsOkAndHolds(Eq(1)));
+
+ // DELETE namespace1 - document_corpus1 is deleted.
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
+ // CorpusMapper doesn't care if the document has been deleted
+ EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ EXPECT_THAT(
+ doc_store->GetCorpusId("nonexistent_namespace", "nonexistent_schema"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ DocumentProto document_corpus =
+ DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus)));
+
+ EXPECT_THAT(doc_store->GetCorpusId("nonexistent_namespace", "email"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->GetCorpusId("namespace1", "nonexistent_schema"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -2996,6 +3143,54 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
}
+TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
+ // The directory testdata/v0/document_store contains only the scoring_cache
+ // and the document_store_header (holding the crc for the scoring_cache). If
+ // the current code is compatible with the format of the v0 scoring_cache,
+ // then an empty document store should be initialized, but the non-empty
+ // scoring_cache should be retained.
+ // Since the current document-asscoiated-score-data is compatible with the
+ // score_cache in testdata/v0/document_store, the document store should be
+ // initialized without having to re-generate the derived files.
+
+ // Create dst directory
+ ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true);
+
+ // Get src files
+ std::string document_store_v0;
+ if (IsAndroidPlatform() || IsIosPlatform()) {
+ document_store_v0 = GetTestFilePath(
+ "icing/testdata/v0/document_store_android_ios_compatible");
+ } else {
+ document_store_v0 =
+ GetTestFilePath("icing/testdata/v0/document_store");
+ }
+ std::vector<std::string> document_store_files;
+ Filesystem filesystem;
+ filesystem.ListDirectory(document_store_v0.c_str(), &document_store_files);
+
+ VLOG(1) << "Copying files " << document_store_v0 << ' '
+ << document_store_files.size();
+ for (size_t i = 0; i != document_store_files.size(); i++) {
+ std::string src =
+ absl_ports::StrCat(document_store_v0, "/", document_store_files[i]);
+ std::string dst =
+ absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]);
+ ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
+ }
+
+ NativeInitializeStats initializeStats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(), &initializeStats));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ // Regeneration never happens.
+ EXPECT_EQ(initializeStats.document_store_recovery_cause(),
+ NativeInitializeStats::NONE);
+}
+
} // namespace
} // namespace lib
diff --git a/icing/store/enable-bm25f.h b/icing/store/enable-bm25f.h
new file mode 100644
index 0000000..cee94d1
--- /dev/null
+++ b/icing/store/enable-bm25f.h
@@ -0,0 +1,31 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_ENABLE_BM25F_H_
+#define ICING_STORE_ENABLE_BM25F_H_
+
+namespace icing {
+namespace lib {
+
+inline bool enable_bm25f_ = false;
+
+inline bool enableBm25f() { return enable_bm25f_; }
+
+// Setter for testing purposes. It should never be called in production code.
+inline void setEnableBm25f(bool enable_bm25f) { enable_bm25f_ = enable_bm25f; }
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_ENABLE_BM25F_H_
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 225b498..a15e64e 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -23,6 +23,8 @@
#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/search.proto.h"
+#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/util/status-macros.h"
@@ -65,15 +67,15 @@ MATCHER_P2(EqualsDocHitInfoWithTermFrequency, document_id,
SectionIdMask section_mask = kSectionIdMaskNone;
bool term_frequency_as_expected = true;
- std::vector<Hit::Score> expected_tfs;
- std::vector<Hit::Score> actual_tfs;
+ std::vector<Hit::TermFrequency> expected_tfs;
+ std::vector<Hit::TermFrequency> actual_tfs;
for (auto itr = section_ids_to_term_frequencies_map.begin();
itr != section_ids_to_term_frequencies_map.end(); itr++) {
SectionId section_id = itr->first;
section_mask |= 1U << section_id;
expected_tfs.push_back(itr->second);
- actual_tfs.push_back(actual.max_hit_score(section_id));
- if (actual.max_hit_score(section_id) != itr->second) {
+ actual_tfs.push_back(actual.hit_term_frequency(section_id));
+ if (actual.hit_term_frequency(section_id) != itr->second) {
term_frequency_as_expected = false;
}
}
@@ -372,6 +374,18 @@ MATCHER_P2(ProtoStatusIs, status_code, error_matcher, "") {
return ExplainMatchResult(error_matcher, arg.message(), result_listener);
}
+MATCHER_P(EqualsSearchResultIgnoreStats, expected, "") {
+ SearchResultProto actual_copy = arg;
+ actual_copy.clear_query_stats();
+ actual_copy.clear_debug_info();
+
+ SearchResultProto expected_copy = expected;
+ expected_copy.clear_query_stats();
+ expected_copy.clear_debug_info();
+ return ExplainMatchResult(testing::EqualsProto(expected_copy), actual_copy,
+ result_listener);
+}
+
// TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status
#define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
index eba1dfa..7ad8a64 100644
--- a/icing/testing/hit-test-utils.cc
+++ b/icing/testing/hit-test-utils.cc
@@ -19,17 +19,18 @@ namespace lib {
// Returns a hit that has a delta of desired_byte_length from last_hit.
Hit CreateHit(Hit last_hit, int desired_byte_length) {
- Hit hit =
- (last_hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score())
- : Hit(last_hit.section_id() - 1, last_hit.document_id(),
- last_hit.score());
+ Hit hit = (last_hit.section_id() == kMinSectionId)
+ ? Hit(kMaxSectionId, last_hit.document_id() + 1,
+ last_hit.term_frequency())
+ : Hit(last_hit.section_id() - 1, last_hit.document_id(),
+ last_hit.term_frequency());
uint8_t buf[5];
while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
desired_byte_length) {
hit = (hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score())
- : Hit(hit.section_id() - 1, hit.document_id(), hit.score());
+ ? Hit(kMaxSectionId, hit.document_id() + 1, hit.term_frequency())
+ : Hit(hit.section_id() - 1, hit.document_id(),
+ hit.term_frequency());
}
return hit;
}
@@ -43,7 +44,7 @@ std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
return hits;
}
hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid,
- Hit::kDefaultHitScore));
+ Hit::kDefaultTermFrequency));
while (hits.size() < num_hits) {
hits.push_back(CreateHit(hits.back(), desired_byte_length));
}
diff --git a/icing/testing/platform.h b/icing/testing/platform.h
index 7b7f212..ad612d5 100644
--- a/icing/testing/platform.h
+++ b/icing/testing/platform.h
@@ -36,6 +36,22 @@ inline bool IsReverseJniTokenization() {
return false;
}
+// Whether the running test is an Android test.
+inline bool IsAndroidPlatform() {
+#if defined(__ANDROID__)
+ return true;
+#endif // defined(__ANDROID__)
+ return false;
+}
+
+// Whether the running test is an iOS test.
+inline bool IsIosPlatform() {
+#if defined(__APPLE__)
+ return true;
+#endif // defined(__APPLE__)
+ return false;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index 863f43f..78430cc 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -40,6 +40,7 @@ class ExactStringPropertyGenerator {
}
};
+// Schema generator with random number of properties
template <typename Rand, typename PropertyGenerator>
class RandomSchemaGenerator {
public:
@@ -71,6 +72,37 @@ class RandomSchemaGenerator {
PropertyGenerator* prop_generator_;
};
+// Schema generator with number of properties specified by the caller
+template <typename PropertyGenerator>
+class SchemaGenerator {
+ public:
+ explicit SchemaGenerator(int num_properties,
+ PropertyGenerator* prop_generator)
+ : num_properties_(num_properties), prop_generator_(prop_generator) {}
+
+ SchemaProto GenerateSchema(int num_types) {
+ SchemaProto schema;
+ while (--num_types >= 0) {
+ SetType(schema.add_types(), "Type" + std::to_string(num_types),
+ num_properties_);
+ }
+ return schema;
+ }
+
+ private:
+ void SetType(SchemaTypeConfigProto* type_config, std::string_view name,
+ int num_properties) const {
+ type_config->set_schema_type(name.data(), name.length());
+ while (--num_properties >= 0) {
+ std::string prop_name = "Prop" + std::to_string(num_properties);
+ (*type_config->add_properties()) = (*prop_generator_)(prop_name);
+ }
+ }
+
+ int num_properties_;
+ PropertyGenerator* prop_generator_;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/util/clock.h b/icing/util/clock.h
index 06f1c9d..2bb7818 100644
--- a/icing/util/clock.h
+++ b/icing/util/clock.h
@@ -36,17 +36,22 @@ int64_t GetSteadyTimeMilliseconds();
class Timer {
public:
// Creates and starts the timer.
- Timer() : start_timestamp_milliseconds_(GetSteadyTimeMilliseconds()) {}
+ Timer() : start_timestamp_nanoseconds_(GetSteadyTimeNanoseconds()) {}
virtual ~Timer() = default;
// Returns the elapsed time from when timer started.
virtual int64_t GetElapsedMilliseconds() {
- return GetSteadyTimeMilliseconds() - start_timestamp_milliseconds_;
+ return GetElapsedNanoseconds() / 1000000;
+ }
+
+ // Returns the elapsed time from when timer started.
+ virtual int64_t GetElapsedNanoseconds() {
+ return GetSteadyTimeNanoseconds() - start_timestamp_nanoseconds_;
}
private:
- int64_t start_timestamp_milliseconds_;
+ int64_t start_timestamp_nanoseconds_;
};
// Wrapper around real-time clock functions. This is separated primarily so