aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndy Christiansen <andy.christ79@gmail.com>2023-07-07 10:58:16 +0200
committerGitHub <noreply@github.com>2023-07-07 09:58:16 +0100
commit4931aefb51d1e5872b096a97f43e13fa0fc33c8c (patch)
treeec4e03779b3fbde21dca78a9478bc87dd0701532
parent015d1a091af6937488242b70121858bce8fd40e9 (diff)
downloadgoogle-benchmark-4931aefb51d1e5872b096a97f43e13fa0fc33c8c.tar.gz
Fix broken PFM-enabled tests (#1623)
* Add pfm CI actions for bazel * Fix problems in unit test. * Undo enabling the CI tests for pfm - github CI machines seemingly do not support performance counters. * Remove commented code - can be revisited in github history when needed, and there's a comment explaining the rationale behind the new test code. --------- Co-authored-by: Andy Christiansen <achristiansen@google.com> Co-authored-by: dominic <510002+dmah42@users.noreply.github.com>
-rw-r--r--test/perf_counters_gtest.cc53
-rw-r--r--test/perf_counters_test.cc9
2 files changed, 34 insertions, 28 deletions
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
index bb55aff..250ceef 100644
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@@ -2,6 +2,7 @@
#include <thread>
#include "../src/perf_counters.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
#ifndef GTEST_SKIP
@@ -14,6 +15,9 @@ struct MsgHandler {
using benchmark::internal::PerfCounters;
using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
+using ::testing::AllOf;
+using ::testing::Gt;
+using ::testing::Lt;
namespace {
const char kGenericPerfEvent1[] = "CYCLES";
@@ -72,8 +76,7 @@ TEST(PerfCountersTest, NegativeTest) {
{
// Add a bad apple in the end of the chain to check the edges
auto counter = PerfCounters::Create({kGenericPerfEvent1, kGenericPerfEvent2,
- kGenericPerfEvent3,
- "MISPREDICTED_BRANCH_RETIRED"});
+ kGenericPerfEvent3, "bad event name"});
EXPECT_EQ(counter.num_counters(), 3);
EXPECT_EQ(counter.names(),
std::vector<std::string>({kGenericPerfEvent1, kGenericPerfEvent2,
@@ -257,10 +260,14 @@ TEST(PerfCountersTest, MultiThreaded) {
static_cast<double>(after[0] - before[0]),
static_cast<double>(after[1] - before[1])};
- // Some extra work will happen on the main thread - like joining the threads
- // - so the ratio won't be quite 2.0, but very close.
- EXPECT_GE(Elapsed4Threads[0], 1.9 * Elapsed2Threads[0]);
- EXPECT_GE(Elapsed4Threads[1], 1.9 * Elapsed2Threads[1]);
+ // The following expectations fail (at least on a beefy workstation with lots
+ // of cpus) - it seems that in some circumstances the runtime of 4 threads
+ // can even be better than with 2.
+ // So instead of expecting 4 threads to be slower, let's just make sure they
+ // do not differ too much in general (one is not more than 10x than the
+ // other).
+ EXPECT_THAT(Elapsed4Threads[0] / Elapsed2Threads[0], AllOf(Gt(0.1), Lt(10)));
+ EXPECT_THAT(Elapsed4Threads[1] / Elapsed2Threads[1], AllOf(Gt(0.1), Lt(10)));
}
TEST(PerfCountersTest, HardwareLimits) {
@@ -273,28 +280,18 @@ TEST(PerfCountersTest, HardwareLimits) {
}
EXPECT_TRUE(PerfCounters::Initialize());
- // Taken straight from `perf list` on x86-64
- // Got all hardware names since these are the problematic ones
- std::vector<std::string> counter_names{"cycles", // leader
- "instructions",
- "branches",
- "L1-dcache-loads",
- "L1-dcache-load-misses",
- "L1-dcache-prefetches",
- "L1-icache-load-misses", // leader
- "L1-icache-loads",
- "branch-load-misses",
- "branch-loads",
- "dTLB-load-misses",
- "dTLB-loads",
- "iTLB-load-misses", // leader
- "iTLB-loads",
- "branch-instructions",
- "branch-misses",
- "cache-misses",
- "cache-references",
- "stalled-cycles-backend", // leader
- "stalled-cycles-frontend"};
+ // Taken from `perf list`, but focusses only on those HW events that actually
+ // were reported when running `sudo perf stat -a sleep 10`. All HW events
+ // listed in the first command not reported in the second seem to not work.
+ // This is sad as we don't really get to test the grouping here (groups can
+ // contain up to 6 members)...
+ std::vector<std::string> counter_names{
+ "cycles", // leader
+ "instructions", //
+ "branches", //
+ "branch-misses", //
+ "cache-misses", //
+ };
// In the off-chance that some of these values are not supported,
// we filter them out so the test will complete without failure
diff --git a/test/perf_counters_test.cc b/test/perf_counters_test.cc
index f0e9a17..98cadda 100644
--- a/test/perf_counters_test.cc
+++ b/test/perf_counters_test.cc
@@ -2,9 +2,16 @@
#include "../src/perf_counters.h"
+#include "../src/commandlineflags.h"
#include "benchmark/benchmark.h"
#include "output_test.h"
+namespace benchmark {
+
+BM_DECLARE_string(benchmark_perf_counters);
+
+} // namespace benchmark
+
static void BM_Simple(benchmark::State& state) {
for (auto _ : state) {
auto iterations = state.iterations();
@@ -24,5 +31,7 @@ int main(int argc, char* argv[]) {
if (!benchmark::internal::PerfCounters::kSupported) {
return 0;
}
+ benchmark::FLAGS_benchmark_perf_counters = "CYCLES,BRANCHES";
+ benchmark::internal::PerfCounters::Initialize();
RunOutputTests(argc, argv);
}