diff options
author | My Name <dsaadati@google.com> | 2021-09-08 16:51:58 -0700 |
---|---|---|
committer | My Name <dsaadati@google.com> | 2021-09-08 16:51:58 -0700 |
commit | 39f59853b980d94a55e9b0f76185b0d3fff88455 (patch) | |
tree | aca1fcdee32fc7f6ae15093de27361f5226fda64 /icing/transform/icu/icu-normalizer_benchmark.cc | |
parent | 2d636a05e41cef9131b1696624436358aba363b2 (diff) | |
download | icing-39f59853b980d94a55e9b0f76185b0d3fff88455.tar.gz |
Sync from upstream.
Descriptions:
================
Remove no-longer-used write paths for file-backed-proto-log.
================
Modify segmentation rules to consider any segment that begins with a non-Ascii
alphanumeric character as valid
=================
Implement CalculateNormalizedMatchLength for IcuNormalizer.
================
Add additional benchmark cases that were useful in developing
submatching and CalculateNormalizedMatchLength for IcuNormalizer
=================
Switch NormalizationMap from
static const std::unordered_map<char16_t, char16_t>& to
static const std::unordered_map<char16_t, char16_t> *const.
==================
Bug: 147509515
Bug: 149610413
Bug: 195720764
Bug: 196257995
Change-Id: Iabdb34a983b5d47daca808888a46c241767d93bf
Diffstat (limited to 'icing/transform/icu/icu-normalizer_benchmark.cc')
-rw-r--r-- | icing/transform/icu/icu-normalizer_benchmark.cc | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc index b037538..8d09be2 100644 --- a/icing/transform/icu/icu-normalizer_benchmark.cc +++ b/icing/transform/icu/icu-normalizer_benchmark.cc @@ -161,6 +161,124 @@ BENCHMARK(BM_NormalizeHiragana) ->Arg(2048000) ->Arg(4096000); +void BM_UppercaseSubTokenLength(benchmark::State& state) { + bool run_via_adb = absl::GetFlag(FLAGS_adb); + if (!run_via_adb) { + ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Normalizer> normalizer, + normalizer_factory::Create( + + /*max_term_byte_size=*/std::numeric_limits<int>::max())); + + std::string input_string(state.range(0), 'A'); + std::string normalized_input_string(state.range(0), 'a'); + for (auto _ : state) { + normalizer->FindNormalizedMatchEndPosition(input_string, + normalized_input_string); + } +} +BENCHMARK(BM_UppercaseSubTokenLength) + ->Arg(1000) + ->Arg(2000) + ->Arg(4000) + ->Arg(8000) + ->Arg(16000) + ->Arg(32000) + ->Arg(64000) + ->Arg(128000) + ->Arg(256000) + ->Arg(384000) + ->Arg(512000) + ->Arg(1024000) + ->Arg(2048000) + ->Arg(4096000); + +void BM_AccentSubTokenLength(benchmark::State& state) { + bool run_via_adb = absl::GetFlag(FLAGS_adb); + if (!run_via_adb) { + ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Normalizer> normalizer, + normalizer_factory::Create( + + /*max_term_byte_size=*/std::numeric_limits<int>::max())); + + std::string input_string; + std::string normalized_input_string; + while (input_string.length() < state.range(0)) { + input_string.append("àáâãā"); + normalized_input_string.append("aaaaa"); + } + + for (auto _ : state) { + normalizer->FindNormalizedMatchEndPosition(input_string, + normalized_input_string); + } +} +BENCHMARK(BM_AccentSubTokenLength) + ->Arg(1000) + ->Arg(2000) + ->Arg(4000) + ->Arg(8000) + ->Arg(16000) + ->Arg(32000) + ->Arg(64000) + ->Arg(128000) + ->Arg(256000) + ->Arg(384000) + ->Arg(512000) + ->Arg(1024000) + ->Arg(2048000) + ->Arg(4096000); + +void BM_HiraganaSubTokenLength(benchmark::State& state) { + bool run_via_adb = absl::GetFlag(FLAGS_adb); + if (!run_via_adb) { + ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile( + GetTestFilePath("icing/icu.dat"))); + } + + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<Normalizer> normalizer, + normalizer_factory::Create( + + /*max_term_byte_size=*/std::numeric_limits<int>::max())); + + std::string input_string; + std::string normalized_input_string; + while (input_string.length() < state.range(0)) { + input_string.append("あいうえお"); + normalized_input_string.append("アイウエオ"); + } + + for (auto _ : state) { + normalizer->FindNormalizedMatchEndPosition(input_string, + normalized_input_string); + } +} +BENCHMARK(BM_HiraganaSubTokenLength) + ->Arg(1000) + ->Arg(2000) + ->Arg(4000) + ->Arg(8000) + ->Arg(16000) + ->Arg(32000) + ->Arg(64000) + ->Arg(128000) + ->Arg(256000) + ->Arg(384000) + ->Arg(512000) + ->Arg(1024000) + ->Arg(2048000) + ->Arg(4096000); + } // namespace } // namespace lib |