aboutsummaryrefslogtreecommitdiff
path: root/icing/index/index-processor_benchmark.cc
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2019-12-20 15:11:45 -0800
committerCassie Wang <cassiewang@google.com>2019-12-20 16:18:05 -0800
commit128c9db88925c8425f2ad81e1d8985461d7ba21a (patch)
treef97ee47cc99d2c162eb30a5e051c606823dfd1ec /icing/index/index-processor_benchmark.cc
parent1897505cb34f3d53e848da13fafe7691c17417ea (diff)
downloadicing-128c9db88925c8425f2ad81e1d8985461d7ba21a.tar.gz
Port over Icing c++ code from upstream
Change-Id: Ia3981fed7e0e70589efc027d4123f306cdfbe990
Diffstat (limited to 'icing/index/index-processor_benchmark.cc')
-rw-r--r--icing/index/index-processor_benchmark.cc379
1 files changed, 379 insertions, 0 deletions
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
new file mode 100644
index 0000000..f22d2f2
--- /dev/null
+++ b/icing/index/index-processor_benchmark.cc
@@ -0,0 +1,379 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index-processor.h"
+#include "icing/index/index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index:index-processor_benchmark
+//
+// $ blaze-bin/icing/index/index-processor_benchmark
+// --benchmarks=all
+//
+// Run on an Android device:
+// Make target //icing/tokenization:language-segmenter depend on
+// //third_party/icu
+//
+// Make target //icing/transform:normalizer depend on
+// //third_party/icu
+//
+// Download LangId model file from
+// //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and
+// put it into your device:
+// $ adb push [your model path] /data/local/tmp/
+//
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/index:index-processor_benchmark
+//
+// $ adb push blaze-bin/icing/index/index-processor_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all
+// --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Creates a fake type config with 10 properties (p0 - p9)
+void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
+ type_config->set_schema_type("Fake_Type");
+
+ for (int i = 0; i < 10; i++) {
+ auto property = type_config->add_properties();
+ property->set_property_name(
+ IcingStringUtil::StringPrintf("p%d", i)); // p0 - p9
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ property->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ property->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ }
+}
+
+DocumentProto CreateDocumentWithOneProperty(int content_length) {
+ return DocumentBuilder()
+ .SetKey("icing", "fake/1")
+ .SetSchema("Fake_Type")
+ .AddStringProperty("p0", std::string(content_length, 'A'))
+ .Build();
+}
+
+DocumentProto CreateDocumentWithTenProperties(int content_length) {
+ int property_length = content_length / 10;
+ return DocumentBuilder()
+ .SetKey("icing", "fake/1")
+ .SetSchema("Fake_Type")
+ .AddStringProperty("p0", std::string(property_length, 'A'))
+ .AddStringProperty("p1", std::string(property_length, 'B'))
+ .AddStringProperty("p2", std::string(property_length, 'C'))
+ .AddStringProperty("p3", std::string(property_length, 'D'))
+ .AddStringProperty("p4", std::string(property_length, 'E'))
+ .AddStringProperty("p5", std::string(property_length, 'F'))
+ .AddStringProperty("p6", std::string(property_length, 'G'))
+ .AddStringProperty("p7", std::string(property_length, 'H'))
+ .AddStringProperty("p8", std::string(property_length, 'I'))
+ .AddStringProperty("p9", std::string(property_length, 'J'))
+ .Build();
+}
+
+DocumentProto CreateDocumentWithDiacriticLetters(int content_length) {
+ std::string content;
+ while (content.length() < content_length) {
+ content.append("àáâãā");
+ }
+ return DocumentBuilder()
+ .SetKey("icing", "fake/1")
+ .SetSchema("Fake_Type")
+ .AddStringProperty("p0", content)
+ .Build();
+}
+
+DocumentProto CreateDocumentWithHiragana(int content_length) {
+ std::string content;
+ while (content.length() < content_length) {
+ content.append("あいうえお");
+ }
+ return DocumentBuilder()
+ .SetKey("icing", "fake/1")
+ .SetSchema("Fake_Type")
+ .AddStringProperty("p0", content)
+ .Build();
+}
+
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+ const std::string& index_dir) {
+ Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+ return Index::Create(options, &filesystem).ValueOrDie();
+}
+
+std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() {
+ if (absl::GetFlag(FLAGS_adb)) {
+ return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb")
+ .ValueOrDie();
+ } else {
+ return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie();
+ }
+}
+
+std::unique_ptr<Normalizer> CreateNormalizer() {
+ return Normalizer::Create(
+ /*max_term_byte_size=*/std::numeric_limits<int>::max())
+ .ValueOrDie();
+}
+
+std::unique_ptr<SchemaStore> CreateSchemaStore() {
+ Filesystem filesystem;
+ std::unique_ptr<SchemaStore> schema_store =
+ SchemaStore::Create(&filesystem, GetTestTempDir()).ValueOrDie();
+
+ SchemaProto schema;
+ CreateFakeTypeConfig(schema.add_types());
+ auto set_schema_status = schema_store->SetSchema(schema);
+
+ if (!set_schema_status.ok()) {
+ ICING_LOG(ERROR) << set_schema_status.status().error_message();
+ }
+
+ return schema_store;
+}
+
+void CleanUp(const IcingFilesystem& filesystem, const std::string& index_dir) {
+ filesystem.DeleteDirectoryRecursively(index_dir.c_str());
+}
+
+std::unique_ptr<IndexProcessor> CreateIndexProcessor(
+ const SchemaStore* schema_store,
+ const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+ Index* index) {
+ IndexProcessor::Options processor_options{};
+ processor_options.max_tokens_per_document = 1024 * 1024 * 10;
+ processor_options.token_limit_behavior =
+ IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+ return std::make_unique<IndexProcessor>(schema_store, language_segmenter,
+ normalizer, index, processor_options);
+}
+
+void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+ }
+
+ IcingFilesystem filesystem;
+ std::string index_dir = GetTestTempDir() + "/index_test/";
+
+ CleanUp(filesystem, index_dir);
+
+ std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ CreateLanguageSegmenter();
+ std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ std::unique_ptr<IndexProcessor> index_processor =
+ CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+ normalizer.get(), index.get());
+
+ DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
+
+ DocumentId document_id = 0;
+ for (auto _ : state) {
+ ICING_ASSERT_OK(
+ index_processor->IndexDocument(input_document, document_id++));
+ }
+
+ CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithOneProperty)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+ }
+
+ IcingFilesystem filesystem;
+ std::string index_dir = GetTestTempDir() + "/index_test/";
+
+ CleanUp(filesystem, index_dir);
+
+ std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ CreateLanguageSegmenter();
+ std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ std::unique_ptr<IndexProcessor> index_processor =
+ CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+ normalizer.get(), index.get());
+
+ DocumentProto input_document =
+ CreateDocumentWithTenProperties(state.range(0));
+
+ DocumentId document_id = 0;
+ for (auto _ : state) {
+ ICING_ASSERT_OK(
+ index_processor->IndexDocument(input_document, document_id++));
+ }
+
+ CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithTenProperties)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+ }
+
+ IcingFilesystem filesystem;
+ std::string index_dir = GetTestTempDir() + "/index_test/";
+
+ CleanUp(filesystem, index_dir);
+
+ std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ CreateLanguageSegmenter();
+ std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ std::unique_ptr<IndexProcessor> index_processor =
+ CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+ normalizer.get(), index.get());
+
+ DocumentProto input_document =
+ CreateDocumentWithDiacriticLetters(state.range(0));
+
+ DocumentId document_id = 0;
+ for (auto _ : state) {
+ ICING_ASSERT_OK(
+ index_processor->IndexDocument(input_document, document_id++));
+ }
+
+ CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithDiacriticLetters)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_IndexDocumentWithHiragana(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+ }
+
+ IcingFilesystem filesystem;
+ std::string index_dir = GetTestTempDir() + "/index_test/";
+
+ CleanUp(filesystem, index_dir);
+
+ std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<LanguageSegmenter> language_segmenter =
+ CreateLanguageSegmenter();
+ std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ std::unique_ptr<IndexProcessor> index_processor =
+ CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+ normalizer.get(), index.get());
+
+ DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
+
+ DocumentId document_id = 0;
+ for (auto _ : state) {
+ ICING_ASSERT_OK(
+ index_processor->IndexDocument(input_document, document_id++));
+ }
+
+ CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithHiragana)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+} // namespace
+
+} // namespace lib
+} // namespace icing