diff options
Diffstat (limited to 'src/perf_counters.cc')
-rw-r--r-- | src/perf_counters.cc | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/src/perf_counters.cc b/src/perf_counters.cc new file mode 100644 index 0000000..417acdb --- /dev/null +++ b/src/perf_counters.cc @@ -0,0 +1,282 @@ +// Copyright 2021 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "perf_counters.h" + +#include <cstring> +#include <memory> +#include <vector> + +#if defined HAVE_LIBPFM +#include "perfmon/pfmlib.h" +#include "perfmon/pfmlib_perf_event.h" +#endif + +namespace benchmark { +namespace internal { + +constexpr size_t PerfCounterValues::kMaxCounters; + +#if defined HAVE_LIBPFM + +size_t PerfCounterValues::Read(const std::vector<int>& leaders) { + // Create a pointer for multiple reads + const size_t bufsize = values_.size() * sizeof(values_[0]); + char* ptr = reinterpret_cast<char*>(values_.data()); + size_t size = bufsize; + for (int lead : leaders) { + auto read_bytes = ::read(lead, ptr, size); + if (read_bytes >= ssize_t(sizeof(uint64_t))) { + // Actual data bytes are all bytes minus initial padding + std::size_t data_bytes = read_bytes - sizeof(uint64_t); + // This should be very cheap since it's in hot cache + std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); + // Increment our counters + ptr += data_bytes; + size -= data_bytes; + } else { + int err = errno; + GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err + << " " << ::strerror(err) << "\n"; + return 0; + } + } + return (bufsize - size) / sizeof(uint64_t); +} + +const bool PerfCounters::kSupported = true; + +// Initializes libpfm only on the first call. Returns whether that single +// initialization was successful. +bool PerfCounters::Initialize() { + // Function-scope static gets initialized only once on first call. + static const bool success = []() { + return pfm_initialize() == PFM_SUCCESS; + }(); + return success; +} + +bool PerfCounters::IsCounterSupported(const std::string& name) { + Initialize(); + perf_event_attr_t attr; + std::memset(&attr, 0, sizeof(attr)); + pfm_perf_encode_arg_t arg; + std::memset(&arg, 0, sizeof(arg)); + arg.attr = &attr; + const int mode = PFM_PLM3; // user mode only + int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, + &arg); + return (ret == PFM_SUCCESS); +} + +PerfCounters PerfCounters::Create( + const std::vector<std::string>& counter_names) { + if (!counter_names.empty()) { + Initialize(); + } + + // Valid counters will populate these arrays but we start empty + std::vector<std::string> valid_names; + std::vector<int> counter_ids; + std::vector<int> leader_ids; + + // Resize to the maximum possible + valid_names.reserve(counter_names.size()); + counter_ids.reserve(counter_names.size()); + + const int kCounterMode = PFM_PLM3; // user mode only + + // Group leads will be assigned on demand. The idea is that once we cannot + // create a counter descriptor, the reason is that this group has maxed out + // so we set the group_id again to -1 and retry - giving the algorithm a + // chance to create a new group leader to hold the next set of counters. + int group_id = -1; + + // Loop through all performance counters + for (size_t i = 0; i < counter_names.size(); ++i) { + // we are about to push into the valid names vector + // check if we did not reach the maximum + if (valid_names.size() == PerfCounterValues::kMaxCounters) { + // Log a message if we maxed out and stop adding + GetErrorLogInstance() + << counter_names.size() << " counters were requested. The maximum is " + << PerfCounterValues::kMaxCounters << " and " << valid_names.size() + << " were already added. All remaining counters will be ignored\n"; + // stop the loop and return what we have already + break; + } + + // Check if this name is empty + const auto& name = counter_names[i]; + if (name.empty()) { + GetErrorLogInstance() + << "A performance counter name was the empty string\n"; + continue; + } + + // Here first means first in group, ie the group leader + const bool is_first = (group_id < 0); + + // This struct will be populated by libpfm from the counter string + // and then fed into the syscall perf_event_open + struct perf_event_attr attr {}; + attr.size = sizeof(attr); + + // This is the input struct to libpfm. + pfm_perf_encode_arg_t arg{}; + arg.attr = &attr; + const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, + PFM_OS_PERF_EVENT, &arg); + if (pfm_get != PFM_SUCCESS) { + GetErrorLogInstance() + << "Unknown performance counter name: " << name << "\n"; + continue; + } + + // We then proceed to populate the remaining fields in our attribute struct + // Note: the man page for perf_event_create suggests inherit = true and + // read_format = PERF_FORMAT_GROUP don't work together, but that's not the + // case. + attr.disabled = is_first; + attr.inherit = true; + attr.pinned = is_first; + attr.exclude_kernel = true; + attr.exclude_user = false; + attr.exclude_hv = true; + + // Read all counters in a group in one read. + attr.read_format = PERF_FORMAT_GROUP; + + int id = -1; + while (id < 0) { + static constexpr size_t kNrOfSyscallRetries = 5; + // Retry syscall as it was interrupted often (b/64774091). + for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; + ++num_retries) { + id = perf_event_open(&attr, 0, -1, group_id, 0); + if (id >= 0 || errno != EINTR) { + break; + } + } + if (id < 0) { + // If the file descriptor is negative we might have reached a limit + // in the current group. Set the group_id to -1 and retry + if (group_id >= 0) { + // Create a new group + group_id = -1; + } else { + // At this point we have already retried to set a new group id and + // failed. We then give up. + break; + } + } + } + + // We failed to get a new file descriptor. We might have reached a hard + // hardware limit that cannot be resolved even with group multiplexing + if (id < 0) { + GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " + "for performance counter " + << name << ". Ignoring\n"; + + // We give up on this counter but try to keep going + // as the others would be fine + continue; + } + if (group_id < 0) { + // This is a leader, store and assign it to the current file descriptor + leader_ids.push_back(id); + group_id = id; + } + // This is a valid counter, add it to our descriptor's list + counter_ids.push_back(id); + valid_names.push_back(name); + } + + // Loop through all group leaders activating them + // There is another option of starting ALL counters in a process but + // that would be far reaching an intrusion. If the user is using PMCs + // by themselves then this would have a side effect on them. It is + // friendlier to loop through all groups individually. + for (int lead : leader_ids) { + if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { + // This should never happen but if it does, we give up on the + // entire batch as recovery would be a mess. + GetErrorLogInstance() << "***WARNING*** Failed to start counters. " + "Claring out all counters.\n"; + + // Close all peformance counters + for (int id : counter_ids) { + ::close(id); + } + + // Return an empty object so our internal state is still good and + // the process can continue normally without impact + return NoCounters(); + } + } + + return PerfCounters(std::move(valid_names), std::move(counter_ids), + std::move(leader_ids)); +} + +void PerfCounters::CloseCounters() const { + if (counter_ids_.empty()) { + return; + } + for (int lead : leader_ids_) { + ioctl(lead, PERF_EVENT_IOC_DISABLE); + } + for (int fd : counter_ids_) { + close(fd); + } +} +#else // defined HAVE_LIBPFM +size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; } + +const bool PerfCounters::kSupported = false; + +bool PerfCounters::Initialize() { return false; } + +bool PerfCounters::IsCounterSupported(const std::string&) { return false; } + +PerfCounters PerfCounters::Create( + const std::vector<std::string>& counter_names) { + if (!counter_names.empty()) { + GetErrorLogInstance() << "Performance counters not supported."; + } + return NoCounters(); +} + +void PerfCounters::CloseCounters() const {} +#endif // defined HAVE_LIBPFM + +PerfCountersMeasurement::PerfCountersMeasurement( + const std::vector<std::string>& counter_names) + : start_values_(counter_names.size()), end_values_(counter_names.size()) { + counters_ = PerfCounters::Create(counter_names); +} + +PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { + if (this != &other) { + CloseCounters(); + + counter_ids_ = std::move(other.counter_ids_); + leader_ids_ = std::move(other.leader_ids_); + counter_names_ = std::move(other.counter_names_); + } + return *this; +} +} // namespace internal +} // namespace benchmark |