diff options
author | Mircea Trofin <mtrofin@google.com> | 2021-04-28 01:25:29 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-28 09:25:29 +0100 |
commit | 376ebc26354ca2b79af94467133f3c35b539627e (patch) | |
tree | f6b9b93ba9a93945fd58a1dd948b58b77754af32 /src/benchmark_runner.cc | |
parent | 835951aa44c2f802b4d563d533eac34565848eb0 (diff) | |
download | google-benchmark-376ebc26354ca2b79af94467133f3c35b539627e.tar.gz |
Support optional, user-directed collection of performance counters (#1114)
* Support optional, user-directed collection of performance counters
The patch allows an engineer wishing to drill into the root causes
of a regression, for example. Currently, only single threaded runs
are supported. The feature is a build-time opt in, and then a runtime
opt in.
The engineer may run the benchmark executable, passing a list of
performance counter names (using libpfm's naming scheme) at the
command line. The counter values will then be collected and reported
back as UserCounters.
This is different from #240 in that it is a benchmark user opt-in, and
the counter collection is transparent to the benchmark.
Currently, this is only supported on platforms where libpfm is
supported.
libpfm: http://perfmon2.sourceforge.net/
* 'Use' values param in Snapshot when BENCHMARK_OS_WINDOWS
This is to avoid unused parameter warning-as-error
* Added missing include for <vector> in perf_counters.cc
* Moved doc to docs
* Added license blurbs
Diffstat (limited to 'src/benchmark_runner.cc')
-rw-r--r-- | src/benchmark_runner.cc | 29 |
1 files changed, 23 insertions, 6 deletions
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index d081aa8..083d184 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -45,6 +45,7 @@ #include "internal_macros.h" #include "log.h" #include "mutex.h" +#include "perf_counters.h" #include "re.h" #include "statistics.h" #include "string_util.h" @@ -111,12 +112,14 @@ BenchmarkReporter::Run CreateRunReport( // Execute one thread of benchmark b for the specified number of iterations. // Adds the stats collected for the thread into manager->results. void RunInThread(const BenchmarkInstance* b, IterationCount iters, - int thread_id, ThreadManager* manager) { + int thread_id, ThreadManager* manager, + PerfCountersMeasurement* perf_counters_measurement) { internal::ThreadTimer timer( b->measure_process_cpu_time ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); - State st = b->Run(iters, thread_id, &timer, manager); + State st = + b->Run(iters, thread_id, &timer, manager, perf_counters_measurement); CHECK(st.error_occurred() || st.iterations() >= st.max_iterations) << "Benchmark returned before State::KeepRunning() returned false!"; { @@ -143,7 +146,12 @@ class BenchmarkRunner { : FLAGS_benchmark_repetitions), has_explicit_iteration_count(b.iterations != 0), pool(b.threads - 1), - iters(has_explicit_iteration_count ? b.iterations : 1) { + iters(has_explicit_iteration_count ? b.iterations : 1), + perf_counters_measurement( + PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))), + perf_counters_measurement_ptr(perf_counters_measurement.IsValid() + ? &perf_counters_measurement + : nullptr) { run_results.display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); @@ -155,6 +163,11 @@ class BenchmarkRunner { internal::ARM_DisplayReportAggregatesOnly); run_results.file_report_aggregates_only = (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); + CHECK(b.threads == 1 || !perf_counters_measurement.IsValid()) + << "Perf counters are not supported in multi-threaded cases.\n"; + CHECK(FLAGS_benchmark_perf_counters.empty() || + perf_counters_measurement.IsValid()) + << "Perf counters were requested but could not be set up."; } for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { @@ -192,6 +205,9 @@ class BenchmarkRunner { // So only the first repetition has to find/calculate it, // the other repetitions will just use that precomputed iteration count. + PerfCountersMeasurement perf_counters_measurement; + PerfCountersMeasurement* const perf_counters_measurement_ptr; + struct IterationResults { internal::ThreadManager::Result results; IterationCount iters; @@ -206,12 +222,12 @@ class BenchmarkRunner { // Run all but one thread in separate threads for (std::size_t ti = 0; ti < pool.size(); ++ti) { pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1), - manager.get()); + manager.get(), perf_counters_measurement_ptr); } // And run one thread here directly. // (If we were asked to run just one thread, we don't create new threads.) // Yes, we need to do this here *after* we start the separate threads. - RunInThread(&b, iters, 0, manager.get()); + RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr); // The main thread has finished. Now let's wait for the other threads. manager->WaitForAllThreads(); @@ -331,7 +347,8 @@ class BenchmarkRunner { memory_manager->Start(); std::unique_ptr<internal::ThreadManager> manager; manager.reset(new internal::ThreadManager(1)); - RunInThread(&b, memory_iterations, 0, manager.get()); + RunInThread(&b, memory_iterations, 0, manager.get(), + perf_counters_measurement_ptr); manager->WaitForAllThreads(); manager.reset(); |