aboutsummaryrefslogtreecommitdiff
path: root/icing/transform/icu/icu-normalizer_benchmark.cc
diff options
context:
space:
mode:
authorMy Name <dsaadati@google.com>2021-09-08 16:51:58 -0700
committerMy Name <dsaadati@google.com>2021-09-08 16:51:58 -0700
commit39f59853b980d94a55e9b0f76185b0d3fff88455 (patch)
treeaca1fcdee32fc7f6ae15093de27361f5226fda64 /icing/transform/icu/icu-normalizer_benchmark.cc
parent2d636a05e41cef9131b1696624436358aba363b2 (diff)
downloadicing-39f59853b980d94a55e9b0f76185b0d3fff88455.tar.gz
Sync from upstream.
Descriptions: ================ Remove no-longer-used write paths for file-backed-proto-log. ================ Modify segmentation rules to consider any segment that begins with a non-Ascii alphanumeric character as valid ================= Implement CalculateNormalizedMatchLength for IcuNormalizer. ================ Add additional benchmark cases that were useful in developing submatching and CalculateNormalizedMatchLength for IcuNormalizer ================= Switch NormalizationMap from static const std::unordered_map<char16_t, char16_t>& to static const std::unordered_map<char16_t, char16_t> *const. ================== Bug: 147509515 Bug: 149610413 Bug: 195720764 Bug: 196257995 Change-Id: Iabdb34a983b5d47daca808888a46c241767d93bf
Diffstat (limited to 'icing/transform/icu/icu-normalizer_benchmark.cc')
-rw-r--r--icing/transform/icu/icu-normalizer_benchmark.cc118
1 files changed, 118 insertions, 0 deletions
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index b037538..8d09be2 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -161,6 +161,124 @@ BENCHMARK(BM_NormalizeHiragana)
->Arg(2048000)
->Arg(4096000);
+void BM_UppercaseSubTokenLength(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Normalizer> normalizer,
+ normalizer_factory::Create(
+
+ /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+ std::string input_string(state.range(0), 'A');
+ std::string normalized_input_string(state.range(0), 'a');
+ for (auto _ : state) {
+ normalizer->FindNormalizedMatchEndPosition(input_string,
+ normalized_input_string);
+ }
+}
+BENCHMARK(BM_UppercaseSubTokenLength)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_AccentSubTokenLength(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Normalizer> normalizer,
+ normalizer_factory::Create(
+
+ /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+ std::string input_string;
+ std::string normalized_input_string;
+ while (input_string.length() < state.range(0)) {
+ input_string.append("àáâãā");
+ normalized_input_string.append("aaaaa");
+ }
+
+ for (auto _ : state) {
+ normalizer->FindNormalizedMatchEndPosition(input_string,
+ normalized_input_string);
+ }
+}
+BENCHMARK(BM_AccentSubTokenLength)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
+void BM_HiraganaSubTokenLength(benchmark::State& state) {
+ bool run_via_adb = absl::GetFlag(FLAGS_adb);
+ if (!run_via_adb) {
+ ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Normalizer> normalizer,
+ normalizer_factory::Create(
+
+ /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+ std::string input_string;
+ std::string normalized_input_string;
+ while (input_string.length() < state.range(0)) {
+ input_string.append("あいうえお");
+ normalized_input_string.append("アイウエオ");
+ }
+
+ for (auto _ : state) {
+ normalizer->FindNormalizedMatchEndPosition(input_string,
+ normalized_input_string);
+ }
+}
+BENCHMARK(BM_HiraganaSubTokenLength)
+ ->Arg(1000)
+ ->Arg(2000)
+ ->Arg(4000)
+ ->Arg(8000)
+ ->Arg(16000)
+ ->Arg(32000)
+ ->Arg(64000)
+ ->Arg(128000)
+ ->Arg(256000)
+ ->Arg(384000)
+ ->Arg(512000)
+ ->Arg(1024000)
+ ->Arg(2048000)
+ ->Arg(4096000);
+
} // namespace
} // namespace lib