diff options
author | Tim Barron <tjbarron@google.com> | 2022-12-12 18:03:05 -0800 |
---|---|---|
committer | Tim Barron <tjbarron@google.com> | 2022-12-12 18:03:05 -0800 |
commit | 8ddc32ad433ea147de80dcfac2afe58962360f18 (patch) | |
tree | 50c30cb98396499bf1d6caf33b383f7f4bbc7e58 /icing/monkey_test | |
parent | 2658f90984737e5bf6c76d82024103dccd4d51c6 (diff) | |
download | icing-8ddc32ad433ea147de80dcfac2afe58962360f18.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
Add ScoringSpec into JoinSpec. Rename joined_document to child_document.
======================================================================
Create JoinedScoredDocumentHit class and refactor ScoredDocumentHitsRanker.
======================================================================
Implement initial Join workflow
======================================================================
Implement the Lexer for Icing Advanced Query Language
======================================================================
Create struct Options for PersistentHashMap
======================================================================
Premapping FileBackedVector
======================================================================
Create class PersistentHashMapKeyMapper
======================================================================
Add integer sections into TokenizedDocument and rename string sections
======================================================================
Create NumericIndex interface and DocHitInfoIteratorNumeric
======================================================================
Implement DummyNumericIndex and unit test
======================================================================
Change PostingListAccessor::Finalize to rvalue member function
======================================================================
Define the Abstract Syntax Tree for Icing's list_filter parser.
======================================================================
Refactor query processing and score
======================================================================
Refactor IcingSearchEngine for AppSearch Dynamite Module 0p APIs
======================================================================
Implement the Lexer for Icing Advanced Scoring Language
======================================================================
Add a common interface for IcingSearchEngine and dynamite client
======================================================================
Implement a subset of the query grammar.
======================================================================
Refactor index processor
======================================================================
Add integer index into IcingSearchEngine and IndexProcessor
======================================================================
Implement the parser for Icing Advanced Scoring Language
======================================================================
Implement IntegerIndexData and PostingListUsedIntegerIndexDataSerializer
======================================================================
Add PostingListAccessor abstract class for common components and methods
======================================================================
Implement PostingListIntegerIndexDataAccessor
======================================================================
Create PostingListIntegerIndexDataAccessorTest
======================================================================
Fix Icing Segmentation tests for word connectors that changed in ICU 72.
======================================================================
Modify the Advanced Query grammar to allow functions to accept expressions.
======================================================================
Implement QueryVisitor.
======================================================================
Enable the Advanced Query Parser to handle member functions
======================================================================
Refactor the Scorer class to support the Advanced Scoring Language
======================================================================
Integrate advanced query parser with the query processor.
======================================================================
Implement support for JoinSpec in Icing.
======================================================================
Implement the Advanced Scoring Language for basic functions and operators
======================================================================
Bug: 208654892
Bug: 249829533
Bug: 256022027
Bug: 261474063
Bug: 240333360
Bug: 193919210
Change-Id: I5f5bdc6249282ecc4b014b4fbdf8e2d1f8b20c19
Diffstat (limited to 'icing/monkey_test')
-rw-r--r-- | icing/monkey_test/icing-monkey-test-runner.cc | 63 | ||||
-rw-r--r-- | icing/monkey_test/icing-monkey-test-runner.h | 36 | ||||
-rw-r--r-- | icing/monkey_test/icing-search-engine_monkey_test.cc | 62 | ||||
-rw-r--r-- | icing/monkey_test/monkey-test-generators.cc | 15 | ||||
-rw-r--r-- | icing/monkey_test/monkey-test-generators.h | 7 |
5 files changed, 125 insertions, 58 deletions
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc index 2dd5a03..a2a6c9b 100644 --- a/icing/monkey_test/icing-monkey-test-runner.cc +++ b/icing/monkey_test/icing-monkey-test-runner.cc @@ -40,42 +40,12 @@ using ::testing::Le; using ::testing::SizeIs; using ::testing::UnorderedElementsAreArray; -inline constexpr int kNumTypes = 30; -const std::vector<int> kPossibleNumProperties = {0, - 1, - 2, - 4, - 8, - 16, - kTotalNumSections / 2, - kTotalNumSections, - kTotalNumSections + 1, - kTotalNumSections * 2}; -inline constexpr int kNumNamespaces = 100; -inline constexpr int kNumURIs = 1000; - -// Merge per 131072 hits -const int kIndexMergeSize = 1024 * 1024; - -// An array of pairs of monkey test APIs with frequencies. -// If f_sum is the sum of all the frequencies, an operation with frequency f -// means for every f_sum iterations, the operation is expected to run f times. -const std::vector< - std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>> - kMonkeyAPISchedules = {{&IcingMonkeyTestRunner::DoPut, 500}, - {&IcingMonkeyTestRunner::DoSearch, 200}, - {&IcingMonkeyTestRunner::DoGet, 70}, - {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50}, - {&IcingMonkeyTestRunner::DoDelete, 50}, - {&IcingMonkeyTestRunner::DoDeleteByNamespace, 50}, - {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50}, - {&IcingMonkeyTestRunner::DoDeleteByQuery, 20}, - {&IcingMonkeyTestRunner::DoOptimize, 5}, - {&IcingMonkeyTestRunner::ReloadFromDisk, 5}}; - -SchemaProto GenerateRandomSchema(MonkeyTestRandomEngine* random) { +SchemaProto GenerateRandomSchema( + const IcingMonkeyTestRunnerConfiguration& config, + MonkeyTestRandomEngine* random) { MonkeySchemaGenerator schema_generator(random); - return schema_generator.GenerateSchema(kNumTypes, kPossibleNumProperties); + return schema_generator.GenerateSchema(config.num_types, + config.possible_num_properties); } SearchSpecProto GenerateRandomSearchSpecProto( @@ -166,18 +136,20 @@ void SortDocuments(std::vector<DocumentProto>& documents) { } // namespace -IcingMonkeyTestRunner::IcingMonkeyTestRunner(uint32_t seed) - : random_(seed), in_memory_icing_() { - ICING_LOG(INFO) << "Monkey test runner started with seed: " << seed; +IcingMonkeyTestRunner::IcingMonkeyTestRunner( + const IcingMonkeyTestRunnerConfiguration& config) + : config_(config), random_(config.seed), in_memory_icing_() { + ICING_LOG(INFO) << "Monkey test runner started with seed: " << config_.seed; - SchemaProto schema = GenerateRandomSchema(&random_); + SchemaProto schema = GenerateRandomSchema(config_, &random_); ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString(); in_memory_icing_ = std::make_unique<InMemoryIcingSearchEngine>(&random_, std::move(schema)); document_generator_ = std::make_unique<MonkeyDocumentGenerator>( - &random_, in_memory_icing_->GetSchema(), kNumNamespaces, kNumURIs); + &random_, in_memory_icing_->GetSchema(), config_.possible_num_tokens_, + config_.num_namespaces, config_.num_uris); std::string dir = GetTestTempDir() + "/icing/monkey"; filesystem_.DeleteDirectoryRecursively(dir.c_str()); @@ -190,13 +162,13 @@ void IcingMonkeyTestRunner::Run(uint32_t num) { "CreateIcingSearchEngineWithSchema() first"; uint32_t frequency_sum = 0; - for (const auto& schedule : kMonkeyAPISchedules) { + for (const auto& schedule : config_.monkey_api_schedules) { frequency_sum += schedule.second; } std::uniform_int_distribution<> dist(0, frequency_sum - 1); for (; num; --num) { int p = dist(random_); - for (const auto& schedule : kMonkeyAPISchedules) { + for (const auto& schedule : config_.monkey_api_schedules) { if (p < schedule.second) { ASSERT_NO_FATAL_FAILURE(schedule.first(this)); break; @@ -404,6 +376,11 @@ void IcingMonkeyTestRunner::DoSearch() { search_result = icing_->GetNextPage(search_result.next_page_token()); ASSERT_THAT(search_result.status(), ProtoIsOk()); } + // The maximum number of scored documents allowed in Icing is 30000, in which + // case we are not able to compare the results with the in-memory Icing. + if (exp_documents.size() >= 30000) { + return; + } if (snippet_spec.num_matches_per_property() > 0) { ASSERT_THAT(num_snippeted, Eq(std::min<uint32_t>(exp_documents.size(), @@ -432,7 +409,7 @@ void IcingMonkeyTestRunner::DoOptimize() { void IcingMonkeyTestRunner::CreateIcingSearchEngine() { IcingSearchEngineOptions icing_options; - icing_options.set_index_merge_size(kIndexMergeSize); + icing_options.set_index_merge_size(config_.index_merge_size); icing_options.set_base_dir(icing_dir_->dir()); icing_ = std::make_unique<IcingSearchEngine>(icing_options); ASSERT_THAT(icing_->Initialize().status(), ProtoIsOk()); diff --git a/icing/monkey_test/icing-monkey-test-runner.h b/icing/monkey_test/icing-monkey-test-runner.h index 5f5649c..fbaaaaa 100644 --- a/icing/monkey_test/icing-monkey-test-runner.h +++ b/icing/monkey_test/icing-monkey-test-runner.h @@ -26,9 +26,42 @@ namespace icing { namespace lib { +class IcingMonkeyTestRunner; + +struct IcingMonkeyTestRunnerConfiguration { + explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types, + int num_namespaces, int num_uris, + int index_merge_size) + : seed(seed), + num_types(num_types), + num_namespaces(num_namespaces), + num_uris(num_uris), + index_merge_size(index_merge_size) {} + + uint32_t seed; + int num_types; + int num_namespaces; + int num_uris; + int index_merge_size; + + // The possible number of properties that may appear in generated schema + // types. + std::vector<int> possible_num_properties; + + // The possible number of tokens that may appear in generated documents, with + // a noise factor from 0.5 to 1 applied. + std::vector<int> possible_num_tokens_; + + // An array of pairs of monkey test APIs with frequencies. + // If f_sum is the sum of all the frequencies, an operation with frequency f + // means for every f_sum iterations, the operation is expected to run f times. + std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>> + monkey_api_schedules; +}; + class IcingMonkeyTestRunner { public: - IcingMonkeyTestRunner(uint32_t seed = std::random_device()()); + IcingMonkeyTestRunner(const IcingMonkeyTestRunnerConfiguration& config); IcingMonkeyTestRunner(const IcingMonkeyTestRunner&) = delete; IcingMonkeyTestRunner& operator=(const IcingMonkeyTestRunner&) = delete; @@ -54,6 +87,7 @@ class IcingMonkeyTestRunner { void DoOptimize(); private: + IcingMonkeyTestRunnerConfiguration config_; MonkeyTestRandomEngine random_; Filesystem filesystem_; std::unique_ptr<DestructibleDirectory> icing_dir_; diff --git a/icing/monkey_test/icing-search-engine_monkey_test.cc b/icing/monkey_test/icing-search-engine_monkey_test.cc index ad887b8..a24e57f 100644 --- a/icing/monkey_test/icing-search-engine_monkey_test.cc +++ b/icing/monkey_test/icing-search-engine_monkey_test.cc @@ -20,11 +20,71 @@ namespace icing { namespace lib { TEST(IcingSearchEngineMonkeyTest, MonkeyTest) { + IcingMonkeyTestRunnerConfiguration config( + /*seed=*/std::random_device()(), + /*num_types=*/30, + /*num_namespaces=*/100, + /*num_uris=*/1000, + /*index_merge_size=*/1024 * 1024); + config.possible_num_properties = {0, + 1, + 2, + 4, + 8, + 16, + kTotalNumSections / 2, + kTotalNumSections, + kTotalNumSections + 1, + kTotalNumSections * 2}; + config.possible_num_tokens_ = {0, 1, 4, 16, 64, 256}; + config.monkey_api_schedules = { + {&IcingMonkeyTestRunner::DoPut, 500}, + {&IcingMonkeyTestRunner::DoSearch, 200}, + {&IcingMonkeyTestRunner::DoGet, 70}, + {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50}, + {&IcingMonkeyTestRunner::DoDelete, 50}, + {&IcingMonkeyTestRunner::DoDeleteByNamespace, 50}, + {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50}, + {&IcingMonkeyTestRunner::DoDeleteByQuery, 20}, + {&IcingMonkeyTestRunner::DoOptimize, 5}, + {&IcingMonkeyTestRunner::ReloadFromDisk, 5}}; uint32_t num_iterations = IsAndroidArm() ? 1000 : 5000; - IcingMonkeyTestRunner runner; + IcingMonkeyTestRunner runner(config); ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema()); ASSERT_NO_FATAL_FAILURE(runner.Run(num_iterations)); } +TEST(DISABLED_IcingSearchEngineMonkeyTest, MonkeyManyDocTest) { + IcingMonkeyTestRunnerConfiguration config( + /*seed=*/std::random_device()(), + /*num_types=*/30, + /*num_namespaces=*/200, + /*num_uris=*/100000, + /*index_merge_size=*/1024 * 1024); + + // Due to the large amount of documents, we need to make each document smaller + // to finish the test. + config.possible_num_properties = {0, 1, 2}; + config.possible_num_tokens_ = {0, 1, 4}; + + // No deletion is performed to preserve a large number of documents. + config.monkey_api_schedules = { + {&IcingMonkeyTestRunner::DoPut, 500}, + {&IcingMonkeyTestRunner::DoSearch, 200}, + {&IcingMonkeyTestRunner::DoGet, 70}, + {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50}, + {&IcingMonkeyTestRunner::DoOptimize, 5}, + {&IcingMonkeyTestRunner::ReloadFromDisk, 5}}; + IcingMonkeyTestRunner runner(config); + ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema()); + // Pre-fill with 4 million documents + SetLoggingLevel(LogSeverity::WARNING); + for (int i = 0; i < 4000000; i++) { + ASSERT_NO_FATAL_FAILURE(runner.DoPut()); + } + SetLoggingLevel(LogSeverity::INFO); + ASSERT_NO_FATAL_FAILURE(runner.Run(1000)); +} + } // namespace lib } // namespace icing diff --git a/icing/monkey_test/monkey-test-generators.cc b/icing/monkey_test/monkey-test-generators.cc index 88fc0b6..7b2ff56 100644 --- a/icing/monkey_test/monkey-test-generators.cc +++ b/icing/monkey_test/monkey-test-generators.cc @@ -106,19 +106,8 @@ std::string MonkeyDocumentGenerator::GetUri() const { } int MonkeyDocumentGenerator::GetNumTokens() const { - std::uniform_int_distribution<> int_dist(-1, 4); - int n = int_dist(*random_); - if (n == -1) { - // 1/6 chance of getting zero token for a property - return 0; - } - if (n == 0) { - // 1/6 chance of getting one token for a property - return 1; - } - // 1/6 chance of getting one of 4, 16, 64, 256 - n = 1 << (2 * n); - + std::uniform_int_distribution<> dist(0, possible_num_tokens_.size() - 1); + int n = possible_num_tokens_[dist(*random_)]; // Add some noise std::uniform_real_distribution<> real_dist(0.5, 1); float p = real_dist(*random_); diff --git a/icing/monkey_test/monkey-test-generators.h b/icing/monkey_test/monkey-test-generators.h index 68c5e92..6349918 100644 --- a/icing/monkey_test/monkey-test-generators.h +++ b/icing/monkey_test/monkey-test-generators.h @@ -70,10 +70,12 @@ class MonkeyDocumentGenerator { public: explicit MonkeyDocumentGenerator(MonkeyTestRandomEngine* random, const SchemaProto* schema, + std::vector<int> possible_num_tokens, uint32_t num_namespaces, uint32_t num_uris = 0) : random_(random), schema_(schema), + possible_num_tokens_(std::move(possible_num_tokens)), num_namespaces_(num_namespaces), num_uris_(num_uris) {} @@ -104,6 +106,11 @@ class MonkeyDocumentGenerator { private: MonkeyTestRandomEngine* random_; // Does not own. const SchemaProto* schema_; // Does not own. + + // The possible number of tokens that may appear in generated documents, with + // a noise factor from 0.5 to 1 applied. + std::vector<int> possible_num_tokens_; + uint32_t num_namespaces_; uint32_t num_uris_; uint32_t num_docs_generated_ = 0; |