aboutsummaryrefslogtreecommitdiff
path: root/icing/monkey_test
diff options
context:
space:
mode:
authorTim Barron <tjbarron@google.com>2022-12-12 18:03:05 -0800
committerTim Barron <tjbarron@google.com>2022-12-12 18:03:05 -0800
commit8ddc32ad433ea147de80dcfac2afe58962360f18 (patch)
tree50c30cb98396499bf1d6caf33b383f7f4bbc7e58 /icing/monkey_test
parent2658f90984737e5bf6c76d82024103dccd4d51c6 (diff)
downloadicing-8ddc32ad433ea147de80dcfac2afe58962360f18.tar.gz
Sync from upstream.
Descriptions: ====================================================================== Add ScoringSpec into JoinSpec. Rename joined_document to child_document. ====================================================================== Create JoinedScoredDocumentHit class and refactor ScoredDocumentHitsRanker. ====================================================================== Implement initial Join workflow ====================================================================== Implement the Lexer for Icing Advanced Query Language ====================================================================== Create struct Options for PersistentHashMap ====================================================================== Premapping FileBackedVector ====================================================================== Create class PersistentHashMapKeyMapper ====================================================================== Add integer sections into TokenizedDocument and rename string sections ====================================================================== Create NumericIndex interface and DocHitInfoIteratorNumeric ====================================================================== Implement DummyNumericIndex and unit test ====================================================================== Change PostingListAccessor::Finalize to rvalue member function ====================================================================== Define the Abstract Syntax Tree for Icing's list_filter parser. ====================================================================== Refactor query processing and score ====================================================================== Refactor IcingSearchEngine for AppSearch Dynamite Module 0p APIs ====================================================================== Implement the Lexer for Icing Advanced Scoring Language ====================================================================== Add a common interface for IcingSearchEngine and dynamite client ====================================================================== Implement a subset of the query grammar. ====================================================================== Refactor index processor ====================================================================== Add integer index into IcingSearchEngine and IndexProcessor ====================================================================== Implement the parser for Icing Advanced Scoring Language ====================================================================== Implement IntegerIndexData and PostingListUsedIntegerIndexDataSerializer ====================================================================== Add PostingListAccessor abstract class for common components and methods ====================================================================== Implement PostingListIntegerIndexDataAccessor ====================================================================== Create PostingListIntegerIndexDataAccessorTest ====================================================================== Fix Icing Segmentation tests for word connectors that changed in ICU 72. ====================================================================== Modify the Advanced Query grammar to allow functions to accept expressions. ====================================================================== Implement QueryVisitor. ====================================================================== Enable the Advanced Query Parser to handle member functions ====================================================================== Refactor the Scorer class to support the Advanced Scoring Language ====================================================================== Integrate advanced query parser with the query processor. ====================================================================== Implement support for JoinSpec in Icing. ====================================================================== Implement the Advanced Scoring Language for basic functions and operators ====================================================================== Bug: 208654892 Bug: 249829533 Bug: 256022027 Bug: 261474063 Bug: 240333360 Bug: 193919210 Change-Id: I5f5bdc6249282ecc4b014b4fbdf8e2d1f8b20c19
Diffstat (limited to 'icing/monkey_test')
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc63
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.h36
-rw-r--r--icing/monkey_test/icing-search-engine_monkey_test.cc62
-rw-r--r--icing/monkey_test/monkey-test-generators.cc15
-rw-r--r--icing/monkey_test/monkey-test-generators.h7
5 files changed, 125 insertions, 58 deletions
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index 2dd5a03..a2a6c9b 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -40,42 +40,12 @@ using ::testing::Le;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAreArray;
-inline constexpr int kNumTypes = 30;
-const std::vector<int> kPossibleNumProperties = {0,
- 1,
- 2,
- 4,
- 8,
- 16,
- kTotalNumSections / 2,
- kTotalNumSections,
- kTotalNumSections + 1,
- kTotalNumSections * 2};
-inline constexpr int kNumNamespaces = 100;
-inline constexpr int kNumURIs = 1000;
-
-// Merge per 131072 hits
-const int kIndexMergeSize = 1024 * 1024;
-
-// An array of pairs of monkey test APIs with frequencies.
-// If f_sum is the sum of all the frequencies, an operation with frequency f
-// means for every f_sum iterations, the operation is expected to run f times.
-const std::vector<
- std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
- kMonkeyAPISchedules = {{&IcingMonkeyTestRunner::DoPut, 500},
- {&IcingMonkeyTestRunner::DoSearch, 200},
- {&IcingMonkeyTestRunner::DoGet, 70},
- {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
- {&IcingMonkeyTestRunner::DoDelete, 50},
- {&IcingMonkeyTestRunner::DoDeleteByNamespace, 50},
- {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50},
- {&IcingMonkeyTestRunner::DoDeleteByQuery, 20},
- {&IcingMonkeyTestRunner::DoOptimize, 5},
- {&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
-
-SchemaProto GenerateRandomSchema(MonkeyTestRandomEngine* random) {
+SchemaProto GenerateRandomSchema(
+ const IcingMonkeyTestRunnerConfiguration& config,
+ MonkeyTestRandomEngine* random) {
MonkeySchemaGenerator schema_generator(random);
- return schema_generator.GenerateSchema(kNumTypes, kPossibleNumProperties);
+ return schema_generator.GenerateSchema(config.num_types,
+ config.possible_num_properties);
}
SearchSpecProto GenerateRandomSearchSpecProto(
@@ -166,18 +136,20 @@ void SortDocuments(std::vector<DocumentProto>& documents) {
} // namespace
-IcingMonkeyTestRunner::IcingMonkeyTestRunner(uint32_t seed)
- : random_(seed), in_memory_icing_() {
- ICING_LOG(INFO) << "Monkey test runner started with seed: " << seed;
+IcingMonkeyTestRunner::IcingMonkeyTestRunner(
+ const IcingMonkeyTestRunnerConfiguration& config)
+ : config_(config), random_(config.seed), in_memory_icing_() {
+ ICING_LOG(INFO) << "Monkey test runner started with seed: " << config_.seed;
- SchemaProto schema = GenerateRandomSchema(&random_);
+ SchemaProto schema = GenerateRandomSchema(config_, &random_);
ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
in_memory_icing_ =
std::make_unique<InMemoryIcingSearchEngine>(&random_, std::move(schema));
document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
- &random_, in_memory_icing_->GetSchema(), kNumNamespaces, kNumURIs);
+ &random_, in_memory_icing_->GetSchema(), config_.possible_num_tokens_,
+ config_.num_namespaces, config_.num_uris);
std::string dir = GetTestTempDir() + "/icing/monkey";
filesystem_.DeleteDirectoryRecursively(dir.c_str());
@@ -190,13 +162,13 @@ void IcingMonkeyTestRunner::Run(uint32_t num) {
"CreateIcingSearchEngineWithSchema() first";
uint32_t frequency_sum = 0;
- for (const auto& schedule : kMonkeyAPISchedules) {
+ for (const auto& schedule : config_.monkey_api_schedules) {
frequency_sum += schedule.second;
}
std::uniform_int_distribution<> dist(0, frequency_sum - 1);
for (; num; --num) {
int p = dist(random_);
- for (const auto& schedule : kMonkeyAPISchedules) {
+ for (const auto& schedule : config_.monkey_api_schedules) {
if (p < schedule.second) {
ASSERT_NO_FATAL_FAILURE(schedule.first(this));
break;
@@ -404,6 +376,11 @@ void IcingMonkeyTestRunner::DoSearch() {
search_result = icing_->GetNextPage(search_result.next_page_token());
ASSERT_THAT(search_result.status(), ProtoIsOk());
}
+ // The maximum number of scored documents allowed in Icing is 30000, in which
+ // case we are not able to compare the results with the in-memory Icing.
+ if (exp_documents.size() >= 30000) {
+ return;
+ }
if (snippet_spec.num_matches_per_property() > 0) {
ASSERT_THAT(num_snippeted,
Eq(std::min<uint32_t>(exp_documents.size(),
@@ -432,7 +409,7 @@ void IcingMonkeyTestRunner::DoOptimize() {
void IcingMonkeyTestRunner::CreateIcingSearchEngine() {
IcingSearchEngineOptions icing_options;
- icing_options.set_index_merge_size(kIndexMergeSize);
+ icing_options.set_index_merge_size(config_.index_merge_size);
icing_options.set_base_dir(icing_dir_->dir());
icing_ = std::make_unique<IcingSearchEngine>(icing_options);
ASSERT_THAT(icing_->Initialize().status(), ProtoIsOk());
diff --git a/icing/monkey_test/icing-monkey-test-runner.h b/icing/monkey_test/icing-monkey-test-runner.h
index 5f5649c..fbaaaaa 100644
--- a/icing/monkey_test/icing-monkey-test-runner.h
+++ b/icing/monkey_test/icing-monkey-test-runner.h
@@ -26,9 +26,42 @@
namespace icing {
namespace lib {
+class IcingMonkeyTestRunner;
+
+struct IcingMonkeyTestRunnerConfiguration {
+ explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
+ int num_namespaces, int num_uris,
+ int index_merge_size)
+ : seed(seed),
+ num_types(num_types),
+ num_namespaces(num_namespaces),
+ num_uris(num_uris),
+ index_merge_size(index_merge_size) {}
+
+ uint32_t seed;
+ int num_types;
+ int num_namespaces;
+ int num_uris;
+ int index_merge_size;
+
+ // The possible number of properties that may appear in generated schema
+ // types.
+ std::vector<int> possible_num_properties;
+
+ // The possible number of tokens that may appear in generated documents, with
+ // a noise factor from 0.5 to 1 applied.
+ std::vector<int> possible_num_tokens_;
+
+ // An array of pairs of monkey test APIs with frequencies.
+ // If f_sum is the sum of all the frequencies, an operation with frequency f
+ // means for every f_sum iterations, the operation is expected to run f times.
+ std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
+ monkey_api_schedules;
+};
+
class IcingMonkeyTestRunner {
public:
- IcingMonkeyTestRunner(uint32_t seed = std::random_device()());
+ IcingMonkeyTestRunner(const IcingMonkeyTestRunnerConfiguration& config);
IcingMonkeyTestRunner(const IcingMonkeyTestRunner&) = delete;
IcingMonkeyTestRunner& operator=(const IcingMonkeyTestRunner&) = delete;
@@ -54,6 +87,7 @@ class IcingMonkeyTestRunner {
void DoOptimize();
private:
+ IcingMonkeyTestRunnerConfiguration config_;
MonkeyTestRandomEngine random_;
Filesystem filesystem_;
std::unique_ptr<DestructibleDirectory> icing_dir_;
diff --git a/icing/monkey_test/icing-search-engine_monkey_test.cc b/icing/monkey_test/icing-search-engine_monkey_test.cc
index ad887b8..a24e57f 100644
--- a/icing/monkey_test/icing-search-engine_monkey_test.cc
+++ b/icing/monkey_test/icing-search-engine_monkey_test.cc
@@ -20,11 +20,71 @@ namespace icing {
namespace lib {
TEST(IcingSearchEngineMonkeyTest, MonkeyTest) {
+ IcingMonkeyTestRunnerConfiguration config(
+ /*seed=*/std::random_device()(),
+ /*num_types=*/30,
+ /*num_namespaces=*/100,
+ /*num_uris=*/1000,
+ /*index_merge_size=*/1024 * 1024);
+ config.possible_num_properties = {0,
+ 1,
+ 2,
+ 4,
+ 8,
+ 16,
+ kTotalNumSections / 2,
+ kTotalNumSections,
+ kTotalNumSections + 1,
+ kTotalNumSections * 2};
+ config.possible_num_tokens_ = {0, 1, 4, 16, 64, 256};
+ config.monkey_api_schedules = {
+ {&IcingMonkeyTestRunner::DoPut, 500},
+ {&IcingMonkeyTestRunner::DoSearch, 200},
+ {&IcingMonkeyTestRunner::DoGet, 70},
+ {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
+ {&IcingMonkeyTestRunner::DoDelete, 50},
+ {&IcingMonkeyTestRunner::DoDeleteByNamespace, 50},
+ {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50},
+ {&IcingMonkeyTestRunner::DoDeleteByQuery, 20},
+ {&IcingMonkeyTestRunner::DoOptimize, 5},
+ {&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
uint32_t num_iterations = IsAndroidArm() ? 1000 : 5000;
- IcingMonkeyTestRunner runner;
+ IcingMonkeyTestRunner runner(config);
ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
ASSERT_NO_FATAL_FAILURE(runner.Run(num_iterations));
}
+TEST(DISABLED_IcingSearchEngineMonkeyTest, MonkeyManyDocTest) {
+ IcingMonkeyTestRunnerConfiguration config(
+ /*seed=*/std::random_device()(),
+ /*num_types=*/30,
+ /*num_namespaces=*/200,
+ /*num_uris=*/100000,
+ /*index_merge_size=*/1024 * 1024);
+
+ // Due to the large amount of documents, we need to make each document smaller
+ // to finish the test.
+ config.possible_num_properties = {0, 1, 2};
+ config.possible_num_tokens_ = {0, 1, 4};
+
+ // No deletion is performed to preserve a large number of documents.
+ config.monkey_api_schedules = {
+ {&IcingMonkeyTestRunner::DoPut, 500},
+ {&IcingMonkeyTestRunner::DoSearch, 200},
+ {&IcingMonkeyTestRunner::DoGet, 70},
+ {&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
+ {&IcingMonkeyTestRunner::DoOptimize, 5},
+ {&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
+ IcingMonkeyTestRunner runner(config);
+ ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
+ // Pre-fill with 4 million documents
+ SetLoggingLevel(LogSeverity::WARNING);
+ for (int i = 0; i < 4000000; i++) {
+ ASSERT_NO_FATAL_FAILURE(runner.DoPut());
+ }
+ SetLoggingLevel(LogSeverity::INFO);
+ ASSERT_NO_FATAL_FAILURE(runner.Run(1000));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/monkey_test/monkey-test-generators.cc b/icing/monkey_test/monkey-test-generators.cc
index 88fc0b6..7b2ff56 100644
--- a/icing/monkey_test/monkey-test-generators.cc
+++ b/icing/monkey_test/monkey-test-generators.cc
@@ -106,19 +106,8 @@ std::string MonkeyDocumentGenerator::GetUri() const {
}
int MonkeyDocumentGenerator::GetNumTokens() const {
- std::uniform_int_distribution<> int_dist(-1, 4);
- int n = int_dist(*random_);
- if (n == -1) {
- // 1/6 chance of getting zero token for a property
- return 0;
- }
- if (n == 0) {
- // 1/6 chance of getting one token for a property
- return 1;
- }
- // 1/6 chance of getting one of 4, 16, 64, 256
- n = 1 << (2 * n);
-
+ std::uniform_int_distribution<> dist(0, possible_num_tokens_.size() - 1);
+ int n = possible_num_tokens_[dist(*random_)];
// Add some noise
std::uniform_real_distribution<> real_dist(0.5, 1);
float p = real_dist(*random_);
diff --git a/icing/monkey_test/monkey-test-generators.h b/icing/monkey_test/monkey-test-generators.h
index 68c5e92..6349918 100644
--- a/icing/monkey_test/monkey-test-generators.h
+++ b/icing/monkey_test/monkey-test-generators.h
@@ -70,10 +70,12 @@ class MonkeyDocumentGenerator {
public:
explicit MonkeyDocumentGenerator(MonkeyTestRandomEngine* random,
const SchemaProto* schema,
+ std::vector<int> possible_num_tokens,
uint32_t num_namespaces,
uint32_t num_uris = 0)
: random_(random),
schema_(schema),
+ possible_num_tokens_(std::move(possible_num_tokens)),
num_namespaces_(num_namespaces),
num_uris_(num_uris) {}
@@ -104,6 +106,11 @@ class MonkeyDocumentGenerator {
private:
MonkeyTestRandomEngine* random_; // Does not own.
const SchemaProto* schema_; // Does not own.
+
+ // The possible number of tokens that may appear in generated documents, with
+ // a noise factor from 0.5 to 1 applied.
+ std::vector<int> possible_num_tokens_;
+
uint32_t num_namespaces_;
uint32_t num_uris_;
uint32_t num_docs_generated_ = 0;