aboutsummaryrefslogtreecommitdiff
path: root/icing/monkey_test
diff options
context:
space:
mode:
Diffstat (limited to 'icing/monkey_test')
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.cc105
-rw-r--r--icing/monkey_test/icing-monkey-test-runner.h46
-rw-r--r--icing/monkey_test/icing-search-engine_monkey_test.cc19
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.cc133
-rw-r--r--icing/monkey_test/in-memory-icing-search-engine.h33
-rw-r--r--icing/monkey_test/monkey-test-generators.cc287
-rw-r--r--icing/monkey_test/monkey-test-generators.h84
-rw-r--r--icing/monkey_test/monkey-test-util.h68
-rw-r--r--icing/monkey_test/monkey-tokenized-document.h3
9 files changed, 598 insertions, 180 deletions
diff --git a/icing/monkey_test/icing-monkey-test-runner.cc b/icing/monkey_test/icing-monkey-test-runner.cc
index 558da1c..76e41ce 100644
--- a/icing/monkey_test/icing-monkey-test-runner.cc
+++ b/icing/monkey_test/icing-monkey-test-runner.cc
@@ -15,16 +15,33 @@
#include "icing/monkey_test/icing-monkey-test-runner.h"
#include <algorithm>
+#include <array>
#include <cstdint>
#include <functional>
+#include <memory>
+#include <random>
#include <string>
+#include <utility>
#include <vector>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-directory.h"
+#include "icing/icing-search-engine.h"
#include "icing/monkey_test/in-memory-icing-search-engine.h"
#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/result/result-state-manager.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/logging.h"
@@ -37,17 +54,10 @@ namespace {
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::Eq;
using ::testing::Le;
+using ::testing::Not;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAreArray;
-SchemaProto GenerateRandomSchema(
- const IcingMonkeyTestRunnerConfiguration& config,
- MonkeyTestRandomEngine* random) {
- MonkeySchemaGenerator schema_generator(random);
- return schema_generator.GenerateSchema(config.num_types,
- config.possible_num_properties);
-}
-
SearchSpecProto GenerateRandomSearchSpecProto(
MonkeyTestRandomEngine* random,
MonkeyDocumentGenerator* document_generator) {
@@ -164,20 +174,13 @@ void SortDocuments(std::vector<DocumentProto>& documents) {
} // namespace
IcingMonkeyTestRunner::IcingMonkeyTestRunner(
- const IcingMonkeyTestRunnerConfiguration& config)
- : config_(config), random_(config.seed), in_memory_icing_() {
+ IcingMonkeyTestRunnerConfiguration config)
+ : config_(std::move(config)),
+ random_(config_.seed),
+ in_memory_icing_(std::make_unique<InMemoryIcingSearchEngine>(&random_)),
+ schema_generator_(
+ std::make_unique<MonkeySchemaGenerator>(&random_, &config_)) {
ICING_LOG(INFO) << "Monkey test runner started with seed: " << config_.seed;
-
- SchemaProto schema = GenerateRandomSchema(config_, &random_);
- ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
-
- in_memory_icing_ =
- std::make_unique<InMemoryIcingSearchEngine>(&random_, std::move(schema));
-
- document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
- &random_, in_memory_icing_->GetSchema(), config_.possible_num_tokens_,
- config_.num_namespaces, config_.num_uris);
-
std::string dir = GetTestTempDir() + "/icing/monkey";
filesystem_.DeleteDirectoryRecursively(dir.c_str());
icing_dir_ = std::make_unique<DestructibleDirectory>(&filesystem_, dir);
@@ -186,7 +189,7 @@ IcingMonkeyTestRunner::IcingMonkeyTestRunner(
void IcingMonkeyTestRunner::Run(uint32_t num) {
ASSERT_TRUE(icing_ != nullptr)
<< "Icing search engine has not yet been created. Please call "
- "CreateIcingSearchEngineWithSchema() first";
+ "Initialize() first";
uint32_t frequency_sum = 0;
for (const auto& schedule : config_.monkey_api_schedules) {
@@ -208,10 +211,55 @@ void IcingMonkeyTestRunner::Run(uint32_t num) {
}
}
-void IcingMonkeyTestRunner::CreateIcingSearchEngineWithSchema() {
+SetSchemaResultProto IcingMonkeyTestRunner::SetSchema(SchemaProto&& schema) {
+ in_memory_icing_->SetSchema(std::move(schema));
+ document_generator_ = std::make_unique<MonkeyDocumentGenerator>(
+ &random_, in_memory_icing_->GetSchema(), &config_);
+ return icing_->SetSchema(*in_memory_icing_->GetSchema(),
+ /*ignore_errors_and_delete_documents=*/true);
+}
+
+void IcingMonkeyTestRunner::Initialize() {
ASSERT_NO_FATAL_FAILURE(CreateIcingSearchEngine());
- ASSERT_THAT(icing_->SetSchema(*in_memory_icing_->GetSchema()).status(),
- ProtoIsOk());
+
+ SchemaProto schema = schema_generator_->GenerateSchema();
+ ICING_LOG(DBG) << "Schema Generated: " << schema.DebugString();
+
+ ASSERT_THAT(SetSchema(std::move(schema)).status(), ProtoIsOk());
+}
+
+void IcingMonkeyTestRunner::DoUpdateSchema() {
+ ICING_LOG(INFO) << "Monkey updating schema";
+
+ MonkeySchemaGenerator::UpdateSchemaResult result =
+ schema_generator_->UpdateSchema(*in_memory_icing_->GetSchema());
+ if (result.is_invalid_schema) {
+ SetSchemaResultProto set_schema_result =
+ icing_->SetSchema(result.schema,
+ /*ignore_errors_and_delete_documents=*/true);
+ ASSERT_THAT(set_schema_result.status(), Not(ProtoIsOk()));
+ return;
+ }
+ ICING_LOG(DBG) << "Updating schema to: " << result.schema.DebugString();
+ SetSchemaResultProto icing_set_schema_result =
+ SetSchema(std::move(result.schema));
+ ASSERT_THAT(icing_set_schema_result.status(), ProtoIsOk());
+ ASSERT_THAT(icing_set_schema_result.deleted_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_deleted));
+ ASSERT_THAT(icing_set_schema_result.incompatible_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_incompatible));
+ ASSERT_THAT(
+ icing_set_schema_result.index_incompatible_changed_schema_types(),
+ UnorderedElementsAreArray(result.schema_types_index_incompatible));
+
+ // Update in-memory icing
+ for (const std::string& deleted_type : result.schema_types_deleted) {
+ ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(deleted_type));
+ }
+ for (const std::string& incompatible_type :
+ result.schema_types_incompatible) {
+ ICING_ASSERT_OK(in_memory_icing_->DeleteBySchemaType(incompatible_type));
+ }
}
void IcingMonkeyTestRunner::DoGet() {
@@ -266,10 +314,11 @@ void IcingMonkeyTestRunner::DoDelete() {
/*p_other=*/0.1);
ICING_LOG(INFO) << "Monkey deleting namespace: " << document.name_space
<< ", uri: " << document.uri;
- in_memory_icing_->Delete(document.name_space, document.uri);
DeleteResultProto delete_result =
icing_->Delete(document.name_space, document.uri);
if (document.document.has_value()) {
+ ICING_ASSERT_OK(
+ in_memory_icing_->Delete(document.name_space, document.uri));
ASSERT_THAT(delete_result.status(), ProtoIsOk())
<< "Cannot delete an existing document.";
} else {
@@ -383,8 +432,8 @@ void IcingMonkeyTestRunner::DoSearch() {
ICING_VLOG(1) << "scoring_spec:\n" << scoring_spec->DebugString();
ICING_VLOG(1) << "result_spec:\n" << result_spec->DebugString();
- std::vector<DocumentProto> exp_documents =
- in_memory_icing_->Search(*search_spec);
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<DocumentProto> exp_documents,
+ in_memory_icing_->Search(*search_spec));
SearchResultProto search_result =
icing_->Search(*search_spec, *scoring_spec, *result_spec);
diff --git a/icing/monkey_test/icing-monkey-test-runner.h b/icing/monkey_test/icing-monkey-test-runner.h
index fbaaaaa..10be60c 100644
--- a/icing/monkey_test/icing-monkey-test-runner.h
+++ b/icing/monkey_test/icing-monkey-test-runner.h
@@ -16,63 +16,36 @@
#define ICING_MONKEY_TEST_ICING_MONKEY_TEST_RUNNER_H_
#include <cstdint>
-#include <random>
+#include <memory>
#include "icing/file/destructible-directory.h"
+#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
#include "icing/monkey_test/in-memory-icing-search-engine.h"
#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/proto/schema.pb.h"
namespace icing {
namespace lib {
-class IcingMonkeyTestRunner;
-
-struct IcingMonkeyTestRunnerConfiguration {
- explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
- int num_namespaces, int num_uris,
- int index_merge_size)
- : seed(seed),
- num_types(num_types),
- num_namespaces(num_namespaces),
- num_uris(num_uris),
- index_merge_size(index_merge_size) {}
-
- uint32_t seed;
- int num_types;
- int num_namespaces;
- int num_uris;
- int index_merge_size;
-
- // The possible number of properties that may appear in generated schema
- // types.
- std::vector<int> possible_num_properties;
-
- // The possible number of tokens that may appear in generated documents, with
- // a noise factor from 0.5 to 1 applied.
- std::vector<int> possible_num_tokens_;
-
- // An array of pairs of monkey test APIs with frequencies.
- // If f_sum is the sum of all the frequencies, an operation with frequency f
- // means for every f_sum iterations, the operation is expected to run f times.
- std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
- monkey_api_schedules;
-};
-
class IcingMonkeyTestRunner {
public:
- IcingMonkeyTestRunner(const IcingMonkeyTestRunnerConfiguration& config);
+ IcingMonkeyTestRunner(IcingMonkeyTestRunnerConfiguration config);
IcingMonkeyTestRunner(const IcingMonkeyTestRunner&) = delete;
IcingMonkeyTestRunner& operator=(const IcingMonkeyTestRunner&) = delete;
+ SetSchemaResultProto SetSchema(SchemaProto&& schema);
+
// This function must and should only be called before running the monkey
// test.
- void CreateIcingSearchEngineWithSchema();
+ void Initialize();
// Run the monkey test with num operations.
void Run(uint32_t num);
// APIs supported in icing search engine.
+ void DoUpdateSchema();
void DoGet();
void DoGetAllNamespaces();
void DoPut();
@@ -94,6 +67,7 @@ class IcingMonkeyTestRunner {
std::unique_ptr<InMemoryIcingSearchEngine> in_memory_icing_;
std::unique_ptr<IcingSearchEngine> icing_;
+ std::unique_ptr<MonkeySchemaGenerator> schema_generator_;
std::unique_ptr<MonkeyDocumentGenerator> document_generator_;
void CreateIcingSearchEngine();
diff --git a/icing/monkey_test/icing-search-engine_monkey_test.cc b/icing/monkey_test/icing-search-engine_monkey_test.cc
index a24e57f..436e27b 100644
--- a/icing/monkey_test/icing-search-engine_monkey_test.cc
+++ b/icing/monkey_test/icing-search-engine_monkey_test.cc
@@ -12,9 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+#include <cstdint>
+#include <random>
+#include <utility>
+
#include "gtest/gtest.h"
#include "icing/monkey_test/icing-monkey-test-runner.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/portable/platform.h"
+#include "icing/proto/debug.pb.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
namespace icing {
namespace lib {
@@ -44,13 +52,14 @@ TEST(IcingSearchEngineMonkeyTest, MonkeyTest) {
{&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
{&IcingMonkeyTestRunner::DoDelete, 50},
{&IcingMonkeyTestRunner::DoDeleteByNamespace, 50},
- {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 50},
+ {&IcingMonkeyTestRunner::DoDeleteBySchemaType, 45},
{&IcingMonkeyTestRunner::DoDeleteByQuery, 20},
{&IcingMonkeyTestRunner::DoOptimize, 5},
+ {&IcingMonkeyTestRunner::DoUpdateSchema, 5},
{&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
uint32_t num_iterations = IsAndroidArm() ? 1000 : 5000;
- IcingMonkeyTestRunner runner(config);
- ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
+ IcingMonkeyTestRunner runner(std::move(config));
+ ASSERT_NO_FATAL_FAILURE(runner.Initialize());
ASSERT_NO_FATAL_FAILURE(runner.Run(num_iterations));
}
@@ -75,8 +84,8 @@ TEST(DISABLED_IcingSearchEngineMonkeyTest, MonkeyManyDocTest) {
{&IcingMonkeyTestRunner::DoGetAllNamespaces, 50},
{&IcingMonkeyTestRunner::DoOptimize, 5},
{&IcingMonkeyTestRunner::ReloadFromDisk, 5}};
- IcingMonkeyTestRunner runner(config);
- ASSERT_NO_FATAL_FAILURE(runner.CreateIcingSearchEngineWithSchema());
+ IcingMonkeyTestRunner runner(std::move(config));
+ ASSERT_NO_FATAL_FAILURE(runner.Initialize());
// Pre-fill with 4 million documents
SetLoggingLevel(LogSeverity::WARNING);
for (int i = 0; i < 4000000; i++) {
diff --git a/icing/monkey_test/in-memory-icing-search-engine.cc b/icing/monkey_test/in-memory-icing-search-engine.cc
index 405a7b0..7baa06e 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.cc
+++ b/icing/monkey_test/in-memory-icing-search-engine.cc
@@ -14,15 +14,27 @@
#include "icing/monkey_test/in-memory-icing-search-engine.h"
+#include <algorithm>
#include <cstdint>
+#include <memory>
+#include <random>
+#include <string>
#include <string_view>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/store/document-id.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -38,9 +50,80 @@ bool IsPrefix(std::string_view s1, std::string_view s2) {
return s1 == s2.substr(0, s1.length());
}
-bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
- const std::string &query,
- TermMatchType::Code term_match_type) {
+} // namespace
+
+libtextclassifier3::StatusOr<const PropertyConfigProto *>
+InMemoryIcingSearchEngine::GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const {
+ auto schema_iter = property_config_map_.find(schema_type);
+ if (schema_iter == property_config_map_.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Schema type: ", schema_type, " is not found."));
+ }
+ auto property_iter = schema_iter->second.find(property_name);
+ if (property_iter == schema_iter->second.end()) {
+ return absl_ports::NotFoundError(
+ absl_ports::StrCat("Property: ", property_name, " is not found."));
+ }
+ return &property_iter->second;
+}
+
+libtextclassifier3::StatusOr<TermMatchType::Code>
+InMemoryIcingSearchEngine::GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const {
+ bool in_indexable_properties_list = false;
+ bool all_indexable_from_top = true;
+
+ std::vector<std::string_view> properties_in_path =
+ absl_ports::StrSplit(section.path, ".");
+ if (properties_in_path.empty()) {
+ return absl_ports::InvalidArgumentError("Got empty path.");
+ }
+ std::string curr_schema_type = schema_type;
+ for (int i = 0; i < properties_in_path.size(); ++i) {
+ ICING_ASSIGN_OR_RETURN(
+ const PropertyConfigProto *prop,
+ GetPropertyConfig(curr_schema_type,
+ std::string(properties_in_path[i])));
+ if (prop->data_type() == PropertyConfigProto::DataType::STRING) {
+ return prop->string_indexing_config().term_match_type();
+ }
+
+ if (prop->data_type() != PropertyConfigProto::DataType::DOCUMENT) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+
+ bool old_all_indexable_from_top = all_indexable_from_top;
+ all_indexable_from_top &=
+ prop->document_indexing_config().index_nested_properties();
+ if (!all_indexable_from_top && !in_indexable_properties_list) {
+ // Only try to update in_indexable_properties_list if this is the first
+ // level with index_nested_properties=false.
+ if (old_all_indexable_from_top) {
+ auto &indexable_properties =
+ prop->document_indexing_config().indexable_nested_properties_list();
+ std::string relative_path =
+ absl_ports::StrCatPieces(std::vector<std::string_view>(
+ properties_in_path.begin() + i + 1, properties_in_path.end()));
+ in_indexable_properties_list =
+ std::find(indexable_properties.begin(), indexable_properties.end(),
+ relative_path) != indexable_properties.end();
+ }
+ // Check in_indexable_properties_list again.
+ if (!in_indexable_properties_list) {
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+ }
+ }
+ curr_schema_type = prop->document_indexing_config().GetTypeName();
+ }
+ return TermMatchType::Code::TermMatchType_Code_UNKNOWN;
+}
+
+libtextclassifier3::StatusOr<bool>
+InMemoryIcingSearchEngine::DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const {
std::vector<std::string_view> strs = absl_ports::StrSplit(query, ":");
std::string_view query_term;
std::string_view section_restrict;
@@ -54,8 +137,15 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
if (!section_restrict.empty() && section.path != section_restrict) {
continue;
}
+ ICING_ASSIGN_OR_RETURN(
+ TermMatchType::Code section_term_match_type,
+ GetTermMatchType(document.document.schema(), section));
+ if (section_term_match_type == TermMatchType::UNKNOWN) {
+ // Skip non-indexable property.
+ continue;
+ }
for (const std::string &token : section.token_sequence) {
- if (section.term_match_type == TermMatchType::EXACT_ONLY ||
+ if (section_term_match_type == TermMatchType::EXACT_ONLY ||
term_match_type == TermMatchType::EXACT_ONLY) {
if (token == query_term) {
return true;
@@ -68,7 +158,18 @@ bool DoesDocumentMatchQuery(const MonkeyTokenizedDocument &document,
return false;
}
-} // namespace
+void InMemoryIcingSearchEngine::SetSchema(SchemaProto &&schema) {
+ schema_ = std::make_unique<SchemaProto>(std::move(schema));
+ property_config_map_.clear();
+ for (const SchemaTypeConfigProto &type_config : schema_->types()) {
+ auto &curr_property_map = property_config_map_[type_config.schema_type()];
+ for (const PropertyConfigProto &property_config :
+ type_config.properties()) {
+ curr_property_map.insert(
+ {property_config.property_name(), property_config});
+ }
+ }
+}
InMemoryIcingSearchEngine::PickDocumentResult
InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
@@ -121,7 +222,7 @@ InMemoryIcingSearchEngine::RandomPickDocument(float p_alive, float p_all,
void InMemoryIcingSearchEngine::Put(const MonkeyTokenizedDocument &document) {
// Delete the old one if existing.
- Delete(document.document.namespace_(), document.document.uri());
+ Delete(document.document.namespace_(), document.document.uri()).IgnoreError();
existing_doc_ids_.push_back(documents_.size());
namespace_uri_docid_map[document.document.namespace_()]
[document.document.uri()] = documents_.size();
@@ -192,7 +293,8 @@ InMemoryIcingSearchEngine::DeleteBySchemaType(const std::string &schema_type) {
libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
const SearchSpecProto &search_spec) {
- std::vector<DocumentId> doc_ids_to_delete = InternalSearch(search_spec);
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> doc_ids_to_delete,
+ InternalSearch(search_spec));
for (DocumentId doc_id : doc_ids_to_delete) {
const DocumentProto &document = documents_[doc_id].document;
if (!Delete(document.namespace_(), document.uri()).ok()) {
@@ -204,9 +306,10 @@ libtextclassifier3::StatusOr<uint32_t> InMemoryIcingSearchEngine::DeleteByQuery(
return doc_ids_to_delete.size();
}
-std::vector<DocumentProto> InMemoryIcingSearchEngine::Search(
- const SearchSpecProto &search_spec) const {
- std::vector<DocumentId> matched_doc_ids = InternalSearch(search_spec);
+libtextclassifier3::StatusOr<std::vector<DocumentProto>>
+InMemoryIcingSearchEngine::Search(const SearchSpecProto &search_spec) const {
+ ICING_ASSIGN_OR_RETURN(std::vector<DocumentId> matched_doc_ids,
+ InternalSearch(search_spec));
std::vector<DocumentProto> result;
result.reserve(matched_doc_ids.size());
for (DocumentId doc_id : matched_doc_ids) {
@@ -229,12 +332,16 @@ libtextclassifier3::StatusOr<DocumentId> InMemoryIcingSearchEngine::InternalGet(
" is not found by InMemoryIcingSearchEngine::InternalGet."));
}
-std::vector<DocumentId> InMemoryIcingSearchEngine::InternalSearch(
+libtextclassifier3::StatusOr<std::vector<DocumentId>>
+InMemoryIcingSearchEngine::InternalSearch(
const SearchSpecProto &search_spec) const {
std::vector<DocumentId> matched_doc_ids;
for (DocumentId doc_id : existing_doc_ids_) {
- if (DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
- search_spec.term_match_type())) {
+ ICING_ASSIGN_OR_RETURN(
+ bool match,
+ DoesDocumentMatchQuery(documents_[doc_id], search_spec.query(),
+ search_spec.term_match_type()));
+ if (match) {
matched_doc_ids.push_back(doc_id);
}
}
diff --git a/icing/monkey_test/in-memory-icing-search-engine.h b/icing/monkey_test/in-memory-icing-search-engine.h
index a5d8872..98e7e4c 100644
--- a/icing/monkey_test/in-memory-icing-search-engine.h
+++ b/icing/monkey_test/in-memory-icing-search-engine.h
@@ -16,18 +16,21 @@
#define ICING_MONKEY_TEST_IN_MEMORY_ICING_SEARCH_ENGINE_H_
#include <cstdint>
+#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/monkey_test/monkey-test-generators.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/monkey_test/monkey-tokenized-document.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/store/document-id.h"
namespace icing {
@@ -43,15 +46,14 @@ class InMemoryIcingSearchEngine {
std::optional<DocumentProto> document;
};
- InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random,
- SchemaProto &&schema)
- : random_(random),
- schema_(std::make_unique<SchemaProto>(std::move(schema))) {}
+ InMemoryIcingSearchEngine(MonkeyTestRandomEngine *random) : random_(random) {}
uint32_t GetNumAliveDocuments() const { return existing_doc_ids_.size(); }
const SchemaProto *GetSchema() const { return schema_.get(); }
+ void SetSchema(SchemaProto &&schema);
+
// Randomly pick a document from the in-memory Icing for monkey testing.
//
// p_alive: chance of getting an alive document.
@@ -112,7 +114,8 @@ class InMemoryIcingSearchEngine {
// Currently, only the "query" and "term_match_type" fields are recognized by
// the in-memory Icing, and only single term queries with possible section
// restrictions are supported.
- std::vector<DocumentProto> Search(const SearchSpecProto &search_spec) const;
+ libtextclassifier3::StatusOr<std::vector<DocumentProto>> Search(
+ const SearchSpecProto &search_spec) const;
private:
// Does not own.
@@ -126,6 +129,11 @@ class InMemoryIcingSearchEngine {
namespace_uri_docid_map;
std::unique_ptr<SchemaProto> schema_;
+ // A map that maps from (schema_type, property_name) to the corresponding
+ // PropertyConfigProto.
+ std::unordered_map<
+ std::string, std::unordered_map<std::string, const PropertyConfigProto &>>
+ property_config_map_;
// Finds and returns the internal document id for the document identified by
// the given key (namespace, uri)
@@ -138,8 +146,19 @@ class InMemoryIcingSearchEngine {
// A helper method for DeleteByQuery and Search to get matched internal doc
// ids.
- std::vector<DocumentId> InternalSearch(
+ libtextclassifier3::StatusOr<std::vector<DocumentId>> InternalSearch(
const SearchSpecProto &search_spec) const;
+
+ libtextclassifier3::StatusOr<const PropertyConfigProto *> GetPropertyConfig(
+ const std::string &schema_type, const std::string &property_name) const;
+
+ libtextclassifier3::StatusOr<TermMatchType::Code> GetTermMatchType(
+ const std::string &schema_type,
+ const MonkeyTokenizedSection &section) const;
+
+ libtextclassifier3::StatusOr<bool> DoesDocumentMatchQuery(
+ const MonkeyTokenizedDocument &document, const std::string &query,
+ TermMatchType::Code term_match_type) const;
};
} // namespace lib
diff --git a/icing/monkey_test/monkey-test-generators.cc b/icing/monkey_test/monkey-test-generators.cc
index 7b2ff56..0d5ad73 100644
--- a/icing/monkey_test/monkey-test-generators.cc
+++ b/icing/monkey_test/monkey-test-generators.cc
@@ -14,79 +14,269 @@
#include "icing/monkey_test/monkey-test-generators.h"
+#include <array>
+#include <cstdint>
+#include <random>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/document-builder.h"
+#include "icing/monkey_test/monkey-test-util.h"
+#include "icing/monkey_test/monkey-tokenized-document.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+
namespace icing {
namespace lib {
-SchemaProto MonkeySchemaGenerator::GenerateSchema(
- int num_types, const std::vector<int>& possible_num_properties) const {
+namespace {
+
+constexpr std::array<PropertyConfigProto::Cardinality::Code, 3> kCardinalities =
+ {PropertyConfigProto::Cardinality::REPEATED,
+ PropertyConfigProto::Cardinality::OPTIONAL,
+ PropertyConfigProto::Cardinality::REQUIRED};
+
+constexpr std::array<TermMatchType::Code, 3> kTermMatchTypes = {
+ TermMatchType::UNKNOWN, TermMatchType::EXACT_ONLY, TermMatchType::PREFIX};
+
+PropertyConfigProto::Cardinality::Code GetRandomCardinality(
+ MonkeyTestRandomEngine* random) {
+ std::uniform_int_distribution<> dist(0, kCardinalities.size() - 1);
+ return kCardinalities[dist(*random)];
+}
+
+TermMatchType::Code GetRandomTermMatchType(MonkeyTestRandomEngine* random) {
+ std::uniform_int_distribution<> dist(0, kTermMatchTypes.size() - 1);
+ return kTermMatchTypes[dist(*random)];
+}
+
+// TODO: Update this function when supporting document_indexing_config.
+bool IsIndexableProperty(const PropertyConfigProto& property) {
+ return property.string_indexing_config().term_match_type() !=
+ TermMatchType::UNKNOWN;
+}
+
+void SetStringIndexingConfig(PropertyConfigProto& property,
+ TermMatchType::Code term_match_type) {
+ if (term_match_type != TermMatchType::UNKNOWN) {
+ StringIndexingConfig* string_indexing_config =
+ property.mutable_string_indexing_config();
+ string_indexing_config->set_term_match_type(term_match_type);
+ // TODO: Try to add different TokenizerTypes. VERBATIM, RFC822, and URL are
+ // the remaining candidates to consider.
+ string_indexing_config->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ } else {
+ property.clear_string_indexing_config();
+ }
+}
+
+} // namespace
+
+SchemaProto MonkeySchemaGenerator::GenerateSchema() {
SchemaProto schema;
- std::uniform_int_distribution<> dist(0, possible_num_properties.size() - 1);
- while (num_types > 0) {
- int num_properties = possible_num_properties[dist(*random_)];
- *schema.add_types() = GenerateType(
- "MonkeyTestType" + std::to_string(num_types), num_properties);
- --num_types;
+ for (int i = 0; i < config_->num_types; ++i) {
+ *schema.add_types() = GenerateType();
}
return schema;
}
+MonkeySchemaGenerator::UpdateSchemaResult MonkeySchemaGenerator::UpdateSchema(
+ const SchemaProto& schema) {
+ UpdateSchemaResult result = {std::move(schema)};
+ SchemaProto& new_schema = result.schema;
+
+ // Delete up to 2 existing types.
+ std::uniform_int_distribution<> num_types_to_delete_dist(0, 2);
+ for (int num_types_to_delete = num_types_to_delete_dist(*random_);
+ num_types_to_delete >= 0; --num_types_to_delete) {
+ if (new_schema.types_size() > 0) {
+ std::uniform_int_distribution<> dist(0, new_schema.types_size() - 1);
+ int index_to_delete = dist(*random_);
+ result.schema_types_deleted.insert(
+ new_schema.types(index_to_delete).schema_type());
+ new_schema.mutable_types()->SwapElements(index_to_delete,
+ new_schema.types_size() - 1);
+ new_schema.mutable_types()->RemoveLast();
+ }
+ }
+
+ // Updating about 1/3 of existing types.
+ for (int i = 0; i < new_schema.types_size(); ++i) {
+ std::uniform_int_distribution<> dist(0, 2);
+ if (dist(*random_) == 0) {
+ UpdateType(*new_schema.mutable_types(i), result);
+ }
+ }
+
+ // Add up to 2 new types.
+ std::uniform_int_distribution<> num_types_to_add_dist(0, 2);
+ for (int num_types_to_add = num_types_to_add_dist(*random_);
+ num_types_to_add >= 0; --num_types_to_add) {
+ *new_schema.add_types() = GenerateType();
+ }
+
+ return result;
+}
+
PropertyConfigProto MonkeySchemaGenerator::GenerateProperty(
- std::string_view name, TermMatchType::Code term_match_type) const {
+ const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ TermMatchType::Code term_match_type) {
PropertyConfigProto prop;
- prop.set_property_name(name.data(), name.length());
+ prop.set_property_name(
+ "MonkeyTestProp" +
+ std::to_string(num_properties_generated_[type_config.schema_type()]++));
// TODO: Perhaps in future iterations we will want to generate more than just
// string properties.
prop.set_data_type(PropertyConfigProto::DataType::STRING);
+ prop.set_cardinality(cardinality);
+ SetStringIndexingConfig(prop, term_match_type);
+ return prop;
+}
- constexpr std::array<PropertyConfigProto::Cardinality::Code, 3>
- cardinalities = {PropertyConfigProto::Cardinality::REPEATED,
- PropertyConfigProto::Cardinality::OPTIONAL,
- PropertyConfigProto::Cardinality::REQUIRED};
- std::uniform_int_distribution<> dist(0, cardinalities.size() - 1);
- prop.set_cardinality(cardinalities[dist(*random_)]);
+void MonkeySchemaGenerator::UpdateProperty(
+ const SchemaTypeConfigProto& type_config, PropertyConfigProto& property,
+ UpdateSchemaResult& result) {
+ PropertyConfigProto::Cardinality::Code new_cardinality =
+ GetRandomCardinality(random_);
+ if (new_cardinality != property.cardinality()) {
+ // Only do compatible cardinality update for now, otherwise it would be hard
+ // to track which documents will be invalid after updating the schema.
+ //
+ // The following type of updates are not allowed:
+ // - optional -> required
+ // - repeated -> optional
+ // - repeated -> required
+ if (property.cardinality() == PropertyConfigProto::Cardinality::OPTIONAL &&
+ new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+ return;
+ }
+ if (property.cardinality() == PropertyConfigProto::Cardinality::REPEATED &&
+ (new_cardinality == PropertyConfigProto::Cardinality::OPTIONAL ||
+ new_cardinality == PropertyConfigProto::Cardinality::REQUIRED)) {
+ return;
+ }
+ property.set_cardinality(new_cardinality);
+ }
- if (term_match_type != TermMatchType::UNKNOWN) {
- StringIndexingConfig* string_indexing_config =
- prop.mutable_string_indexing_config();
- string_indexing_config->set_term_match_type(term_match_type);
- string_indexing_config->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ if (property.data_type() == PropertyConfigProto::DataType::STRING) {
+ TermMatchType::Code new_term_match_type = GetRandomTermMatchType(random_);
+ if (new_term_match_type !=
+ property.string_indexing_config().term_match_type()) {
+ SetStringIndexingConfig(property, new_term_match_type);
+ result.schema_types_index_incompatible.insert(type_config.schema_type());
+ }
}
- return prop;
}
-SchemaTypeConfigProto MonkeySchemaGenerator::GenerateType(
- std::string_view name, int num_properties) const {
+SchemaTypeConfigProto MonkeySchemaGenerator::GenerateType() {
SchemaTypeConfigProto type_config;
- type_config.set_schema_type(name.data(), name.length());
+ type_config.set_schema_type("MonkeyTestType" +
+ std::to_string(num_types_generated_++));
+ std::uniform_int_distribution<> possible_num_properties_dist(
+ 0, config_->possible_num_properties.size() - 1);
+ int total_num_properties =
+ config_->possible_num_properties[possible_num_properties_dist(*random_)];
+
int num_indexed_properties = 0;
- constexpr std::array<TermMatchType::Code, 3> term_match_types = {
- TermMatchType::UNKNOWN, TermMatchType::EXACT_ONLY, TermMatchType::PREFIX};
- std::uniform_int_distribution<> dist(0, term_match_types.size() - 1);
- while (--num_properties >= 0) {
- std::string prop_name = "MonkeyTestProp" + std::to_string(num_properties);
+ for (int i = 0; i < total_num_properties; ++i) {
TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
if (num_indexed_properties < kTotalNumSections) {
- term_match_type = term_match_types[dist(*random_)];
+ term_match_type = GetRandomTermMatchType(random_);
}
if (term_match_type != TermMatchType::UNKNOWN) {
num_indexed_properties += 1;
}
- (*type_config.add_properties()) =
- GenerateProperty(prop_name, term_match_type);
+ (*type_config.add_properties()) = GenerateProperty(
+ type_config, GetRandomCardinality(random_), term_match_type);
}
return type_config;
}
+void MonkeySchemaGenerator::UpdateType(SchemaTypeConfigProto& type_config,
+ UpdateSchemaResult& result) {
+ // Delete up to 4 existing property.
+ std::uniform_int_distribution<> num_properties_to_delete_dist(0, 4);
+ for (int num_properties_to_delete = num_properties_to_delete_dist(*random_);
+ num_properties_to_delete >= 0; --num_properties_to_delete) {
+ if (type_config.properties_size() > 0) {
+ std::uniform_int_distribution<> dist(0,
+ type_config.properties_size() - 1);
+ int index_to_delete = dist(*random_);
+ // Only delete a required property for now, otherwise it would be hard
+ // to track which documents will be invalid after updating the schema.
+ if (type_config.properties(index_to_delete).cardinality() !=
+ PropertyConfigProto::Cardinality::REQUIRED) {
+ continue;
+ }
+ if (IsIndexableProperty(type_config.properties(index_to_delete))) {
+ result.schema_types_index_incompatible.insert(
+ type_config.schema_type());
+ }
+ // Removing a property will cause the type to be considered as
+ // incompatible.
+ result.schema_types_incompatible.insert(type_config.schema_type());
+
+ type_config.mutable_properties()->SwapElements(
+ index_to_delete, type_config.properties_size() - 1);
+ type_config.mutable_properties()->RemoveLast();
+ }
+ }
+
+ // Updating about 1/3 of existing properties.
+ for (int i = 0; i < type_config.properties_size(); ++i) {
+ std::uniform_int_distribution<> dist(0, 2);
+ if (dist(*random_) == 0) {
+ UpdateProperty(type_config, *type_config.mutable_properties(i), result);
+ }
+ }
+
+ // Add up to 4 new properties.
+ std::uniform_int_distribution<> num_types_to_add_dist(0, 4);
+ for (int num_types_to_add = num_types_to_add_dist(*random_);
+ num_types_to_add >= 0; --num_types_to_add) {
+ PropertyConfigProto::Cardinality::Code new_cardinality =
+ GetRandomCardinality(random_);
+ // Adding a required property will make all document of this type invalid.
+ if (new_cardinality == PropertyConfigProto::Cardinality::REQUIRED) {
+ result.schema_types_incompatible.insert(type_config.schema_type());
+ }
+ PropertyConfigProto new_property = GenerateProperty(
+ type_config, new_cardinality, GetRandomTermMatchType(random_));
+ if (IsIndexableProperty(new_property)) {
+ result.schema_types_index_incompatible.insert(type_config.schema_type());
+ }
+ (*type_config.add_properties()) = std::move(new_property);
+ }
+
+ int num_indexed_properties = 0;
+ for (int i = 0; i < type_config.properties_size(); ++i) {
+ if (IsIndexableProperty(type_config.properties(i))) {
+ ++num_indexed_properties;
+ }
+ }
+
+ if (num_indexed_properties > kTotalNumSections) {
+ result.is_invalid_schema = true;
+ }
+}
+
std::string MonkeyDocumentGenerator::GetNamespace() const {
uint32_t name_space;
// When num_namespaces is 0, all documents generated get different namespaces.
// Otherwise, namespaces will be randomly picked from a set with
// num_namespaces elements.
- if (num_namespaces_ == 0) {
+ if (config_->num_namespaces == 0) {
name_space = num_docs_generated_;
} else {
- std::uniform_int_distribution<> dist(0, num_namespaces_ - 1);
+ std::uniform_int_distribution<> dist(0, config_->num_namespaces - 1);
name_space = dist(*random_);
}
return absl_ports::StrCat("namespace", std::to_string(name_space));
@@ -96,18 +286,19 @@ std::string MonkeyDocumentGenerator::GetUri() const {
uint32_t uri;
// When num_uris is 0, all documents generated get different URIs. Otherwise,
// URIs will be randomly picked from a set with num_uris elements.
- if (num_uris_ == 0) {
+ if (config_->num_uris == 0) {
uri = num_docs_generated_;
} else {
- std::uniform_int_distribution<> dist(0, num_uris_ - 1);
+ std::uniform_int_distribution<> dist(0, config_->num_uris - 1);
uri = dist(*random_);
}
return absl_ports::StrCat("uri", std::to_string(uri));
}
int MonkeyDocumentGenerator::GetNumTokens() const {
- std::uniform_int_distribution<> dist(0, possible_num_tokens_.size() - 1);
- int n = possible_num_tokens_[dist(*random_)];
+ std::uniform_int_distribution<> dist(
+ 0, config_->possible_num_tokens_.size() - 1);
+ int n = config_->possible_num_tokens_[dist(*random_)];
// Add some noise
std::uniform_real_distribution<> real_dist(0.5, 1);
float p = real_dist(*random_);
@@ -138,15 +329,13 @@ MonkeyTokenizedDocument MonkeyDocumentGenerator::GenerateDocument() {
std::vector<std::string> prop_content = GetPropertyContent();
doc_builder.AddStringProperty(prop.property_name(),
absl_ports::StrJoin(prop_content, " "));
- // Create a tokenized section if the current property is indexable.
- if (prop.data_type() == PropertyConfigProto::DataType::STRING &&
- prop.string_indexing_config().term_match_type() !=
- TermMatchType::UNKNOWN) {
- MonkeyTokenizedSection section = {
- prop.property_name(), prop.string_indexing_config().term_match_type(),
- std::move(prop_content)};
- document.tokenized_sections.push_back(std::move(section));
- }
+ // No matter whether the property is indexable currently, we have to create
+ // a section for it since a non-indexable property can become indexable
+ // after a schema type change. The in-memory icing will automatically skip
+ // sections that are non-indexable at the time of search requests.
+ MonkeyTokenizedSection section = {prop.property_name(),
+ std::move(prop_content)};
+ document.tokenized_sections.push_back(std::move(section));
}
document.document = doc_builder.Build();
++num_docs_generated_;
diff --git a/icing/monkey_test/monkey-test-generators.h b/icing/monkey_test/monkey-test-generators.h
index 6349918..72a4723 100644
--- a/icing/monkey_test/monkey-test-generators.h
+++ b/icing/monkey_test/monkey-test-generators.h
@@ -15,51 +15,66 @@
#ifndef ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
#define ICING_MONKEY_TEST_MONKEY_TEST_GENERATORS_H_
-#include <algorithm>
#include <cstdint>
#include <random>
#include <string>
#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
#include <vector>
-#include "icing/absl_ports/str_cat.h"
-#include "icing/absl_ports/str_join.h"
-#include "icing/document-builder.h"
#include "icing/monkey_test/monkey-test-common-words.h"
+#include "icing/monkey_test/monkey-test-util.h"
#include "icing/monkey_test/monkey-tokenized-document.h"
-#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
-#include "icing/schema/section.h"
+#include "icing/proto/term.pb.h"
#include "icing/util/clock.h"
namespace icing {
namespace lib {
-using MonkeyTestRandomEngine = std::mt19937;
-
// A random schema generator used for monkey testing.
class MonkeySchemaGenerator {
public:
- explicit MonkeySchemaGenerator(MonkeyTestRandomEngine* random)
- : random_(random) {}
+ struct UpdateSchemaResult {
+ SchemaProto schema;
+ bool is_invalid_schema;
+ std::unordered_set<std::string> schema_types_deleted;
+ std::unordered_set<std::string> schema_types_incompatible;
+ std::unordered_set<std::string> schema_types_index_incompatible;
+ };
+
+ explicit MonkeySchemaGenerator(
+ MonkeyTestRandomEngine* random,
+ const IcingMonkeyTestRunnerConfiguration* config)
+ : random_(random), config_(config) {}
- // To ensure that the random schema is generated with the best quality, the
- // number of properties for each type will only be randomly picked from the
- // list of possible_num_properties, instead of picking it from a range.
- // For example, a vector of [1, 2, 3, 4] means each generated types have a 25%
- // chance of getting 1 property, 2 properties, 3 properties and 4 properties.
- SchemaProto GenerateSchema(
- int num_types, const std::vector<int>& possible_num_properties) const;
+ SchemaProto GenerateSchema();
+
+ UpdateSchemaResult UpdateSchema(const SchemaProto& schema);
private:
PropertyConfigProto GenerateProperty(
- std::string_view name, TermMatchType::Code term_match_type) const;
+ const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ TermMatchType::Code term_match_type);
+
+ void UpdateProperty(const SchemaTypeConfigProto& type_config,
+ PropertyConfigProto& property,
+ UpdateSchemaResult& result);
- SchemaTypeConfigProto GenerateType(std::string_view name,
- int num_properties) const;
+ SchemaTypeConfigProto GenerateType();
- // Does not own.
- MonkeyTestRandomEngine* random_;
+ void UpdateType(SchemaTypeConfigProto& type_config,
+ UpdateSchemaResult& result);
+
+ int num_types_generated_ = 0;
+ // A map from type name to the number of properties generated in the
+ // corresponding types.
+ std::unordered_map<std::string, int> num_properties_generated_;
+
+ MonkeyTestRandomEngine* random_; // Does not own.
+ const IcingMonkeyTestRunnerConfiguration* config_; // Does not own.
};
// A random document generator used for monkey testing.
@@ -68,16 +83,10 @@ class MonkeySchemaGenerator {
// Same for num_namespaces.
class MonkeyDocumentGenerator {
public:
- explicit MonkeyDocumentGenerator(MonkeyTestRandomEngine* random,
- const SchemaProto* schema,
- std::vector<int> possible_num_tokens,
- uint32_t num_namespaces,
- uint32_t num_uris = 0)
- : random_(random),
- schema_(schema),
- possible_num_tokens_(std::move(possible_num_tokens)),
- num_namespaces_(num_namespaces),
- num_uris_(num_uris) {}
+ explicit MonkeyDocumentGenerator(
+ MonkeyTestRandomEngine* random, const SchemaProto* schema,
+ const IcingMonkeyTestRunnerConfiguration* config)
+ : random_(random), schema_(schema), config_(config) {}
const SchemaTypeConfigProto& GetType() const {
std::uniform_int_distribution<> dist(0, schema_->types_size() - 1);
@@ -104,15 +113,10 @@ class MonkeyDocumentGenerator {
MonkeyTokenizedDocument GenerateDocument();
private:
- MonkeyTestRandomEngine* random_; // Does not own.
- const SchemaProto* schema_; // Does not own.
-
- // The possible number of tokens that may appear in generated documents, with
- // a noise factor from 0.5 to 1 applied.
- std::vector<int> possible_num_tokens_;
+ MonkeyTestRandomEngine* random_; // Does not own.
+ const SchemaProto* schema_; // Does not own.
+ const IcingMonkeyTestRunnerConfiguration* config_; // Does not own.
- uint32_t num_namespaces_;
- uint32_t num_uris_;
uint32_t num_docs_generated_ = 0;
Clock clock_;
};
diff --git a/icing/monkey_test/monkey-test-util.h b/icing/monkey_test/monkey-test-util.h
new file mode 100644
index 0000000..d6053d8
--- /dev/null
+++ b/icing/monkey_test/monkey-test-util.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+#define ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
+
+#include <cstdint>
+#include <functional>
+#include <random>
+#include <utility>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+using MonkeyTestRandomEngine = std::mt19937;
+
+class IcingMonkeyTestRunner;
+
+struct IcingMonkeyTestRunnerConfiguration {
+ explicit IcingMonkeyTestRunnerConfiguration(uint32_t seed, int num_types,
+ int num_namespaces, int num_uris,
+ int index_merge_size)
+ : seed(seed),
+ num_types(num_types),
+ num_namespaces(num_namespaces),
+ num_uris(num_uris),
+ index_merge_size(index_merge_size) {}
+
+ uint32_t seed;
+ int num_types;
+ int num_namespaces;
+ int num_uris;
+ int index_merge_size;
+
+ // To ensure that the random schema is generated with the best quality, the
+ // number of properties for each type will only be randomly picked from this
+ // list, instead of picking it from a range. For example, a vector of
+ // [1, 2, 3, 4] means each generated types have a 25% chance of getting 1
+ // property, 2 properties, 3 properties and 4 properties.
+ std::vector<int> possible_num_properties;
+
+ // The possible number of tokens that may appear in generated documents, with
+ // a noise factor from 0.5 to 1 applied.
+ std::vector<int> possible_num_tokens_;
+
+ // An array of pairs of monkey test APIs with frequencies.
+ // If f_sum is the sum of all the frequencies, an operation with frequency f
+ // means for every f_sum iterations, the operation is expected to run f times.
+ std::vector<std::pair<std::function<void(IcingMonkeyTestRunner*)>, uint32_t>>
+ monkey_api_schedules;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_MONKEY_TEST_MONKEY_TEST_UTIL_H_
diff --git a/icing/monkey_test/monkey-tokenized-document.h b/icing/monkey_test/monkey-tokenized-document.h
index a0b38c2..87b77bb 100644
--- a/icing/monkey_test/monkey-tokenized-document.h
+++ b/icing/monkey_test/monkey-tokenized-document.h
@@ -16,16 +16,15 @@
#define ICING_MONKEY_TEST_MONKEY_TOKENIZED_DOCUMENT_H_
#include <string>
+#include <vector>
#include "icing/proto/document.pb.h"
-#include "icing/proto/term.pb.h"
namespace icing {
namespace lib {
struct MonkeyTokenizedSection {
std::string path;
- TermMatchType::Code term_match_type;
std::vector<std::string> token_sequence;
};