diff options
author | Tim Barron <tjbarron@google.com> | 2021-10-21 16:01:05 -0700 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2021-10-21 16:01:05 -0700 |
commit | da1b8986e7c873efa45529b8adc4a32490eb9c3c (patch) | |
tree | 1cc9dbe185e88e71c7c82ede8ba02578a36ef78f /icing/scoring/score-and-rank_benchmark.cc | |
parent | 8555f998fccca3aea3f6f67d44fce04775ddea97 (diff) | |
download | icing-da1b8986e7c873efa45529b8adc4a32490eb9c3c.tar.gz |
Sync from upstream.
Descriptions:
================
Replace refs to c lib headers w/ c++ stdlib equivalents.
================
Update IDF component of BM25F Calculator in IcingLib
================
Expose QuerySuggestions API.
================
Change the tokenizer used in QuerySuggest.
================
Add SectionWeights API to Icing.
================
Apply SectionWeights to BM25F Scoring.
================
Replaces uses of u_strTo/FromUTF32 w/ u_strTo/FromUTF8.
Bug: 152934343
Bug: 202308641
Bug: 203700301
Change-Id: Ic884a84e5ff4c9c04b2cd6dd1fce90765aa4446e
Diffstat (limited to 'icing/scoring/score-and-rank_benchmark.cc')
-rw-r--r-- | icing/scoring/score-and-rank_benchmark.cc | 125 |
1 files changed, 122 insertions, 3 deletions
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc index e940e98..cc1d995 100644 --- a/icing/scoring/score-and-rank_benchmark.cc +++ b/icing/scoring/score-and-rank_benchmark.cc @@ -117,7 +117,8 @@ void BM_ScoreAndRankDocumentHitsByDocumentScore(benchmark::State& state) { scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ScoringProcessor> scoring_processor, - ScoringProcessor::Create(scoring_spec, document_store.get())); + ScoringProcessor::Create(scoring_spec, document_store.get(), + schema_store.get())); int num_to_score = state.range(0); int num_of_documents = state.range(1); @@ -220,7 +221,8 @@ void BM_ScoreAndRankDocumentHitsByCreationTime(benchmark::State& state) { ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ScoringProcessor> scoring_processor, - ScoringProcessor::Create(scoring_spec, document_store.get())); + ScoringProcessor::Create(scoring_spec, document_store.get(), + schema_store.get())); int num_to_score = state.range(0); int num_of_documents = state.range(1); @@ -322,7 +324,8 @@ void BM_ScoreAndRankDocumentHitsNoScoring(benchmark::State& state) { scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE); ICING_ASSERT_OK_AND_ASSIGN( std::unique_ptr<ScoringProcessor> scoring_processor, - ScoringProcessor::Create(scoring_spec, document_store.get())); + ScoringProcessor::Create(scoring_spec, document_store.get(), + schema_store.get())); int num_to_score = state.range(0); int num_of_documents = state.range(1); @@ -390,6 +393,122 @@ BENCHMARK(BM_ScoreAndRankDocumentHitsNoScoring) ->ArgPair(10000, 18000) ->ArgPair(10000, 20000); +void BM_ScoreAndRankDocumentHitsByRelevanceScoring(benchmark::State& state) { + const std::string base_dir = GetTestTempDir() + "/score_and_rank_benchmark"; + const std::string document_store_dir = base_dir + "/document_store"; + const std::string schema_store_dir = base_dir + "/schema_store"; + + // Creates file directories + Filesystem filesystem; + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); + filesystem.CreateDirectoryRecursively(document_store_dir.c_str()); + filesystem.CreateDirectoryRecursively(schema_store_dir.c_str()); + + Clock clock; + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<SchemaStore> schema_store, + SchemaStore::Create(&filesystem, base_dir, &clock)); + + ICING_ASSERT_OK_AND_ASSIGN( + DocumentStore::CreateResult create_result, + DocumentStore::Create(&filesystem, document_store_dir, &clock, + schema_store.get())); + std::unique_ptr<DocumentStore> document_store = + std::move(create_result.document_store); + + ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType())); + + ScoringSpecProto scoring_spec; + scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<ScoringProcessor> scoring_processor, + ScoringProcessor::Create(scoring_spec, document_store.get(), + schema_store.get())); + + int num_to_score = state.range(0); + int num_of_documents = state.range(1); + + std::mt19937 random_generator; + std::uniform_int_distribution<int> distribution( + 1, std::numeric_limits<int>::max()); + + SectionId section_id = 0; + SectionIdMask section_id_mask = 1U << section_id; + + // Puts documents into document store + std::vector<DocHitInfo> doc_hit_infos; + for (int i = 0; i < num_of_documents; i++) { + ICING_ASSERT_OK_AND_ASSIGN( + DocumentId document_id, + document_store->Put(CreateEmailDocument( + /*id=*/i, /*document_score=*/1, + /*creation_timestamp_ms=*/1), + /*num_tokens=*/10)); + DocHitInfo doc_hit = DocHitInfo(document_id, section_id_mask); + // Set five matches for term "foo" for each document hit. + doc_hit.UpdateSection(section_id, /*hit_term_frequency=*/5); + doc_hit_infos.push_back(doc_hit); + } + + ScoredDocumentHitComparator scored_document_hit_comparator( + /*is_descending=*/true); + + for (auto _ : state) { + // Creates a dummy DocHitInfoIterator with results, we need to pause the + // timer here so that the cost of copying test data is not included. + state.PauseTiming(); + std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator = + std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo"); + // Create a query term iterator that assigns the document hits to term + // "foo". + std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>> + query_term_iterators; + query_term_iterators["foo"] = + std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo"); + state.ResumeTiming(); + + std::vector<ScoredDocumentHit> scored_document_hits = + scoring_processor->Score(std::move(doc_hit_info_iterator), num_to_score, + &query_term_iterators); + + BuildHeapInPlace(&scored_document_hits, scored_document_hit_comparator); + // Ranks and gets the first page, 20 is a common page size + std::vector<ScoredDocumentHit> results = + PopTopResultsFromHeap(&scored_document_hits, /*num_results=*/20, + scored_document_hit_comparator); + } + + // Clean up + document_store.reset(); + schema_store.reset(); + filesystem.DeleteDirectoryRecursively(base_dir.c_str()); +} +BENCHMARK(BM_ScoreAndRankDocumentHitsByRelevanceScoring) + // num_to_score, num_of_documents in document store + ->ArgPair(1000, 30000) + ->ArgPair(3000, 30000) + ->ArgPair(5000, 30000) + ->ArgPair(7000, 30000) + ->ArgPair(9000, 30000) + ->ArgPair(11000, 30000) + ->ArgPair(13000, 30000) + ->ArgPair(15000, 30000) + ->ArgPair(17000, 30000) + ->ArgPair(19000, 30000) + ->ArgPair(21000, 30000) + ->ArgPair(23000, 30000) + ->ArgPair(25000, 30000) + ->ArgPair(27000, 30000) + ->ArgPair(29000, 30000) + // Starting from this line, we're trying to see if num_of_documents affects + // performance + ->ArgPair(10000, 10000) + ->ArgPair(10000, 12000) + ->ArgPair(10000, 14000) + ->ArgPair(10000, 16000) + ->ArgPair(10000, 18000) + ->ArgPair(10000, 20000); + } // namespace } // namespace lib |