Fix broken PFM-enabled tests (#1623)

* Add pfm CI actions for bazel * Fix problems in unit test. * Undo enabling the CI tests for pfm - github CI machines seemingly do not support performance counters. * Remove commented code - can be revisited in github history when needed, and there's a comment explaining the rationale behind the new test code. --------- Co-authored-by: Andy Christiansen <achristiansen@google.com> Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
author: Andy Christiansen <andy.christ79@gmail.com> 2023-07-07 10:58:16 +0200
committer: GitHub <noreply@github.com> 2023-07-07 09:58:16 +0100
commit: 4931aefb51d1e5872b096a97f43e13fa0fc33c8c (patch)
tree: ec4e03779b3fbde21dca78a9478bc87dd0701532
parent: 015d1a091af6937488242b70121858bce8fd40e9 (diff)
download: google-benchmark-4931aefb51d1e5872b096a97f43e13fa0fc33c8c.tar.gz
2 files changed, 34 insertions, 28 deletions
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
index bb55aff..250ceef 100644
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@@ -2,6 +2,7 @@
 #include <thread>
 
 #include "../src/perf_counters.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 #ifndef GTEST_SKIP
@@ -14,6 +15,9 @@ struct MsgHandler {
 using benchmark::internal::PerfCounters;
 using benchmark::internal::PerfCountersMeasurement;
 using benchmark::internal::PerfCounterValues;
+using ::testing::AllOf;
+using ::testing::Gt;
+using ::testing::Lt;
 
 namespace {
 const char kGenericPerfEvent1[] = "CYCLES";
@@ -72,8 +76,7 @@ TEST(PerfCountersTest, NegativeTest) {
   {
     // Add a bad apple in the end of the chain to check the edges
     auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
-                                         kGenericPerfEvent3,
-                                         "MISPREDICTED_BRANCH_RETIRED"});
+                                         kGenericPerfEvent3, "bad event name"});
     EXPECT_EQ(counter.num_counters(), 3);
     EXPECT_EQ(counter.names(),
               std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
@@ -257,10 +260,14 @@ TEST(PerfCountersTest, MultiThreaded) {
       static_cast<double>(after[0] - before[0]),
       static_cast<double>(after[1] - before[1])};
 
-  // Some extra work will happen on the main thread - like joining the threads
-  // - so the ratio won't be quite 2.0, but very close.
-  EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
-  EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
+  // The following expectations fail (at least on a beefy workstation with lots
+  // of cpus) - it seems that in some circumstances the runtime of 4 threads
+  // can even be better than with 2.
+  // So instead of expecting 4 threads to be slower, let's just make sure they
+  // do not differ too much in general (one is not more than 10x than the
+  // other).
+  EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
+  EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
 }
 
 TEST(PerfCountersTest, HardwareLimits) {
@@ -273,28 +280,18 @@ TEST(PerfCountersTest, HardwareLimits) {
   }
   EXPECT_TRUE(PerfCounters::Initialize());
 
-  // Taken straight from `perf list` on x86-64
-  // Got all hardware names since these are the problematic ones
-  std::vector<std::string> counter_names{"cycles",  // leader
-                                         "instructions",
-                                         "branches",
-                                         "L1-dcache-loads",
-                                         "L1-dcache-load-misses",
-                                         "L1-dcache-prefetches",
-                                         "L1-icache-load-misses",  // leader
-                                         "L1-icache-loads",
-                                         "branch-load-misses",
-                                         "branch-loads",
-                                         "dTLB-load-misses",
-                                         "dTLB-loads",
-                                         "iTLB-load-misses",  // leader
-                                         "iTLB-loads",
-                                         "branch-instructions",
-                                         "branch-misses",
-                                         "cache-misses",
-                                         "cache-references",
-                                         "stalled-cycles-backend",  // leader
-                                         "stalled-cycles-frontend"};
+  // Taken from `perf list`, but focusses only on those HW events that actually
+  // were reported when running `sudo perf stat -a sleep 10`. All HW events
+  // listed in the first command not reported in the second seem to not work.
+  // This is sad as we don't really get to test the grouping here (groups can
+  // contain up to 6 members)...
+  std::vector<std::string> counter_names{
+      "cycles",         // leader
+      "instructions",   //
+      "branches",       //
+      "branch-misses",  //
+      "cache-misses",   //
+  };
 
   // In the off-chance that some of these values are not supported,
   // we filter them out so the test will complete without failure
diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc
index f0e9a17..98cadda 100644
--- a/test/perf_counters_test.cc
+++ b/test/perf_counters_test.cc
@@ -2,9 +2,16 @@
 
 #include "../src/perf_counters.h"
 
+#include "../src/commandlineflags.h"
 #include "benchmark/benchmark.h"
 #include "output_test.h"
 
+namespace benchmark {
+
+BM_DECLARE_string(benchmark_perf_counters);
+
+}  // namespace benchmark
+
 static void BM_Simple(benchmark::State& state) {
   for (auto _ : state) {
     auto iterations = state.iterations();
@@ -24,5 +31,7 @@ int main(int argc, char* argv[]) {
   if (!benchmark::internal::PerfCounters::kSupported) {
     return 0;
   }
+  benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES";
+  benchmark::internal::PerfCounters::Initialize();
   RunOutputTests(argc, argv);
 }
author	Andy Christiansen <andy.christ79@gmail.com>	2023-07-07 10:58:16 +0200
committer	GitHub <noreply@github.com>	2023-07-07 09:58:16 +0100
commit	4931aefb51d1e5872b096a97f43e13fa0fc33c8c (patch)
tree	ec4e03779b3fbde21dca78a9478bc87dd0701532
parent	015d1a091af6937488242b70121858bce8fd40e9 (diff)
download	google-benchmark-4931aefb51d1e5872b096a97f43e13fa0fc33c8c.tar.gz