diff options
author | Andy Christiansen <andy.christ79@gmail.com> | 2023-07-07 10:58:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-07 09:58:16 +0100 |
commit | 4931aefb51d1e5872b096a97f43e13fa0fc33c8c (patch) | |
tree | ec4e03779b3fbde21dca78a9478bc87dd0701532 | |
parent | 015d1a091af6937488242b70121858bce8fd40e9 (diff) | |
download | google-benchmark-4931aefb51d1e5872b096a97f43e13fa0fc33c8c.tar.gz |
Fix broken PFM-enabled tests (#1623)
* Add pfm CI actions for bazel
* Fix problems in unit test.
* Undo enabling the CI tests for pfm - github CI machines seemingly do not support performance counters.
* Remove commented code - can be revisited in github history when needed, and there's a comment explaining the rationale behind the new test code.
---------
Co-authored-by: Andy Christiansen <achristiansen@google.com>
Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
-rw-r--r-- | test/perf_counters_gtest.cc | 53 | ||||
-rw-r--r-- | test/perf_counters_test.cc | 9 |
2 files changed, 34 insertions, 28 deletions
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc index bb55aff..250ceef 100644 --- a/test/perf_counters_gtest.cc +++ b/test/perf_counters_gtest.cc @@ -2,6 +2,7 @@ #include <thread> #include "../src/perf_counters.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" #ifndef GTEST_SKIP @@ -14,6 +15,9 @@ struct MsgHandler { using benchmark::internal::PerfCounters; using benchmark::internal::PerfCountersMeasurement; using benchmark::internal::PerfCounterValues; +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::Lt; namespace { const char kGenericPerfEvent1[] = "CYCLES"; @@ -72,8 +76,7 @@ TEST(PerfCountersTest, NegativeTest) { { // Add a bad apple in the end of the chain to check the edges auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2, - kGenericPerfEvent3, - "MISPREDICTED_BRANCH_RETIRED"}); + kGenericPerfEvent3, "bad event name"}); EXPECT_EQ(counter.num_counters(), 3); EXPECT_EQ(counter.names(), std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2, @@ -257,10 +260,14 @@ TEST(PerfCountersTest, MultiThreaded) { static_cast<double>(after[0] - before[0]), static_cast<double>(after[1] - before[1])}; - // Some extra work will happen on the main thread - like joining the threads - // - so the ratio won't be quite 2.0, but very close. - EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]); - EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]); + // The following expectations fail (at least on a beefy workstation with lots + // of cpus) - it seems that in some circumstances the runtime of 4 threads + // can even be better than with 2. + // So instead of expecting 4 threads to be slower, let's just make sure they + // do not differ too much in general (one is not more than 10x than the + // other). + EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10))); + EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10))); } TEST(PerfCountersTest, HardwareLimits) { @@ -273,28 +280,18 @@ TEST(PerfCountersTest, HardwareLimits) { } EXPECT_TRUE(PerfCounters::Initialize()); - // Taken straight from `perf list` on x86-64 - // Got all hardware names since these are the problematic ones - std::vector<std::string> counter_names{"cycles", // leader - "instructions", - "branches", - "L1-dcache-loads", - "L1-dcache-load-misses", - "L1-dcache-prefetches", - "L1-icache-load-misses", // leader - "L1-icache-loads", - "branch-load-misses", - "branch-loads", - "dTLB-load-misses", - "dTLB-loads", - "iTLB-load-misses", // leader - "iTLB-loads", - "branch-instructions", - "branch-misses", - "cache-misses", - "cache-references", - "stalled-cycles-backend", // leader - "stalled-cycles-frontend"}; + // Taken from `perf list`, but focusses only on those HW events that actually + // were reported when running `sudo perf stat -a sleep 10`. All HW events + // listed in the first command not reported in the second seem to not work. + // This is sad as we don't really get to test the grouping here (groups can + // contain up to 6 members)... + std::vector<std::string> counter_names{ + "cycles", // leader + "instructions", // + "branches", // + "branch-misses", // + "cache-misses", // + }; // In the off-chance that some of these values are not supported, // we filter them out so the test will complete without failure diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc index f0e9a17..98cadda 100644 --- a/test/perf_counters_test.cc +++ b/test/perf_counters_test.cc @@ -2,9 +2,16 @@ #include "../src/perf_counters.h" +#include "../src/commandlineflags.h" #include "benchmark/benchmark.h" #include "output_test.h" +namespace benchmark { + +BM_DECLARE_string(benchmark_perf_counters); + +} // namespace benchmark + static void BM_Simple(benchmark::State& state) { for (auto _ : state) { auto iterations = state.iterations(); @@ -24,5 +31,7 @@ int main(int argc, char* argv[]) { if (!benchmark::internal::PerfCounters::kSupported) { return 0; } + benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES"; + benchmark::internal::PerfCounters::Initialize(); RunOutputTests(argc, argv); } |