aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiqiang TAO <taolq@outlook.com>2022-01-25 19:14:20 +0900
committerGitHub <noreply@github.com>2022-01-25 10:14:20 +0000
commitd0fbf8ac238d3bc3a111334ab3553173812d78e7 (patch)
tree589bdc6d72203e72080ad9b3e128f8d4ce9580b0
parent57b2bfa33bbc4f171a7fc2807de71583b1e2fd2a (diff)
downloadgoogle-benchmark-d0fbf8ac238d3bc3a111334ab3553173812d78e7.tar.gz
Cache PerfCounters instance in PerfCountersMeasurement (#1308)
This patch fixes #1306, by reducing the pinned instances of PerfCounters. The issue is caused by creating multiple pinned events in the same thread, doing so results in the Snapshot(PerfCounterValues* values) failing, and that's now discoverable. Creating multile pinned events is an unsupported behavior currently. The error would be detected at read() time, not perf_event_open() / iotcl() time. The unsupported benavior above is confirmed by Stephane Eranian @seranian, and he also pointed the dectection method. Finished this patch under the guidance of Mircea Trofin @mtrofin.
-rw-r--r--src/benchmark.cc5
-rw-r--r--src/benchmark_runner.cc3
-rw-r--r--src/perf_counters.cc42
-rw-r--r--src/perf_counters.h52
-rw-r--r--test/perf_counters_gtest.cc48
5 files changed, 124 insertions, 26 deletions
diff --git a/src/benchmark.cc b/src/benchmark.cc
index cedeee3..1a76b58 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -184,7 +184,10 @@ void State::PauseTiming() {
BM_CHECK(started_ && !finished_ && !error_occurred_);
timer_->StopTimer();
if (perf_counters_measurement_) {
- auto measurements = perf_counters_measurement_->StopAndGetMeasurements();
+ std::vector<std::pair<std::string, double>> measurements;
+ if (!perf_counters_measurement_->Stop(measurements)) {
+ BM_CHECK(false) << "Perf counters read the value failed.";
+ }
for (const auto& name_and_measurement : measurements) {
auto name = name_and_measurement.first;
auto measurement = name_and_measurement.second;
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index eac807b..ec7c127 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -152,8 +152,7 @@ BenchmarkRunner::BenchmarkRunner(
has_explicit_iteration_count(b.iterations() != 0),
pool(b.threads() - 1),
iters(has_explicit_iteration_count ? b.iterations() : 1),
- perf_counters_measurement(
- PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
+ perf_counters_measurement(StrSplit(FLAGS_benchmark_perf_counters, ',')),
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
? &perf_counters_measurement
: nullptr) {
diff --git a/src/perf_counters.cc b/src/perf_counters.cc
index b2ac768..582475f 100644
--- a/src/perf_counters.cc
+++ b/src/perf_counters.cc
@@ -15,6 +15,7 @@
#include "perf_counters.h"
#include <cstring>
+#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
@@ -104,7 +105,7 @@ PerfCounters PerfCounters::Create(
return PerfCounters(counter_names, std::move(counter_ids));
}
-PerfCounters::~PerfCounters() {
+void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
@@ -126,7 +127,44 @@ PerfCounters PerfCounters::Create(
return NoCounters();
}
-PerfCounters::~PerfCounters() = default;
+void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
+
+Mutex PerfCountersMeasurement::mutex_;
+int PerfCountersMeasurement::ref_count_ = 0;
+PerfCounters PerfCountersMeasurement::counters_ = PerfCounters::NoCounters();
+
+PerfCountersMeasurement::PerfCountersMeasurement(
+ const std::vector<std::string>& counter_names)
+ : start_values_(counter_names.size()), end_values_(counter_names.size()) {
+ MutexLock l(mutex_);
+ if (ref_count_ == 0) {
+ counters_ = PerfCounters::Create(counter_names);
+ }
+ // We chose to increment it even if `counters_` ends up invalid,
+ // so that we don't keep trying to create, and also since the dtor
+ // will decrement regardless of `counters_`'s validity
+ ++ref_count_;
+
+ BM_CHECK(!counters_.IsValid() || counters_.names() == counter_names);
+}
+
+PerfCountersMeasurement::~PerfCountersMeasurement() {
+ MutexLock l(mutex_);
+ --ref_count_;
+ if (ref_count_ == 0) {
+ counters_ = PerfCounters::NoCounters();
+ }
+}
+
+PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
+ if (this != &other) {
+ CloseCounters();
+
+ counter_ids_ = std::move(other.counter_ids_);
+ counter_names_ = std::move(other.counter_names_);
+ }
+ return *this;
+}
} // namespace internal
} // namespace benchmark
diff --git a/src/perf_counters.h b/src/perf_counters.h
index 47ca138..9a6e46c 100644
--- a/src/perf_counters.h
+++ b/src/perf_counters.h
@@ -17,11 +17,13 @@
#include <array>
#include <cstdint>
+#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
+#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
@@ -71,12 +73,14 @@ class PerfCounters final {
// True iff this platform supports performance counters.
static const bool kSupported;
- bool IsValid() const { return is_valid_; }
+ bool IsValid() const { return !counter_names_.empty(); }
static PerfCounters NoCounters() { return PerfCounters(); }
- ~PerfCounters();
+ ~PerfCounters() { CloseCounters(); }
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
+ PerfCounters& operator=(PerfCounters&&) noexcept;
+ PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
@@ -111,24 +115,27 @@ class PerfCounters final {
private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids)
- : counter_ids_(std::move(counter_ids)),
- counter_names_(counter_names),
- is_valid_(true) {}
- PerfCounters() : is_valid_(false) {}
+ : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
+ PerfCounters() = default;
+
+ void CloseCounters() const;
std::vector<int> counter_ids_;
- const std::vector<std::string> counter_names_;
- const bool is_valid_;
+ std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
class PerfCountersMeasurement final {
public:
- PerfCountersMeasurement(PerfCounters&& c)
- : counters_(std::move(c)),
- start_values_(counters_.IsValid() ? counters_.names().size() : 0),
- end_values_(counters_.IsValid() ? counters_.names().size() : 0) {}
-
+ PerfCountersMeasurement(const std::vector<std::string>& counter_names);
+ ~PerfCountersMeasurement();
+
+ // The only way to get to `counters_` is after ctor-ing a
+ // `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
+ // decided (either invalid or valid) and won't change again even if a ctor is
+ // concurrently running with this. This is preferring efficiency to
+ // maintainability, because the address of the static can be known at compile
+ // time.
bool IsValid() const { return counters_.IsValid(); }
BENCHMARK_ALWAYS_INLINE void Start() {
@@ -136,30 +143,33 @@ class PerfCountersMeasurement final {
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
- counters_.Snapshot(&start_values_);
+ valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
}
- BENCHMARK_ALWAYS_INLINE std::vector<std::pair<std::string, double>>
- StopAndGetMeasurements() {
+ BENCHMARK_ALWAYS_INLINE bool Stop(
+ std::vector<std::pair<std::string, double>>& measurements) {
assert(IsValid());
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
- counters_.Snapshot(&end_values_);
+ valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
- std::vector<std::pair<std::string, double>> ret;
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
- ret.push_back({counters_.names()[i], measurement});
+ measurements.push_back({counters_.names()[i], measurement});
}
- return ret;
+
+ return valid_read_;
}
private:
- PerfCounters counters_;
+ static Mutex mutex_;
+ GUARDED_BY(mutex_) static int ref_count_;
+ GUARDED_BY(mutex_) static PerfCounters counters_;
+ bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};
diff --git a/test/perf_counters_gtest.cc b/test/perf_counters_gtest.cc
index 3eac624..f9e6a6f 100644
--- a/test/perf_counters_gtest.cc
+++ b/test/perf_counters_gtest.cc
@@ -11,6 +11,7 @@ struct MsgHandler {
#endif
using benchmark::internal::PerfCounters;
+using benchmark::internal::PerfCountersMeasurement;
using benchmark::internal::PerfCounterValues;
namespace {
@@ -95,6 +96,53 @@ TEST(PerfCountersTest, Read2Counters) {
EXPECT_GT(values2[1], 0);
}
+TEST(PerfCountersTest, ReopenExistingCounters) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (3-4) hardware
+ // counters) at this date.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ std::vector<PerfCounters> counters;
+ counters.reserve(6);
+ for (int i = 0; i < 6; i++)
+ counters.push_back(PerfCounters::Create({kGenericPerfEvent1}));
+ PerfCounterValues values(1);
+ EXPECT_TRUE(counters[0].Snapshot(&values));
+ EXPECT_FALSE(counters[4].Snapshot(&values));
+ EXPECT_FALSE(counters[5].Snapshot(&values));
+}
+
+TEST(PerfCountersTest, CreateExistingMeasurements) {
+ // The test works (i.e. causes read to fail) for the assumptions
+ // about hardware capabilities (i.e. small number (3-4) hardware
+ // counters) at this date,
+ // the same as previous test ReopenExistingCounters.
+ if (!PerfCounters::kSupported) {
+ GTEST_SKIP() << "Test skipped because libpfm is not supported.\n";
+ }
+ EXPECT_TRUE(PerfCounters::Initialize());
+ std::vector<PerfCountersMeasurement> perf_counter_measurements;
+ std::vector<std::pair<std::string, double>> measurements;
+
+ perf_counter_measurements.reserve(10);
+ for (int i = 0; i < 10; i++)
+ perf_counter_measurements.emplace_back(
+ std::vector<std::string>{kGenericPerfEvent1});
+
+ perf_counter_measurements[0].Start();
+ EXPECT_TRUE(perf_counter_measurements[0].Stop(measurements));
+
+ measurements.clear();
+ perf_counter_measurements[8].Start();
+ EXPECT_FALSE(perf_counter_measurements[8].Stop(measurements));
+
+ measurements.clear();
+ perf_counter_measurements[9].Start();
+ EXPECT_FALSE(perf_counter_measurements[9].Stop(measurements));
+}
+
size_t do_work() {
size_t res = 0;
for (size_t i = 0; i < 100000000; ++i) res += i * i;