From ff1c578a0cb239dceb0f0c8ab333bd4d045ff2d8 Mon Sep 17 00:00:00 2001 From: Yabin Date: Thu, 18 Aug 2022 17:01:54 -0700 Subject: simpleperf: update --print-hw-counter to check each cpu. Little/Big cores may have different numbers of CPU PMU hardware counters. So check them separately. Bug: 243034569 Test: run simpleperf_unit_test Change-Id: I33527d69bc935b8523482feaf98378c5f6641762 (cherry picked from commit 6b771a2e12740f4bbcef515d772e62b364d87858) --- simpleperf/cmd_stat.cpp | 27 +++++++++++++++++++-------- simpleperf/workload.cpp | 13 +++++++++++++ simpleperf/workload.h | 1 + 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index 40a6bb0b..c46bd548 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -739,23 +739,23 @@ bool StatCommand::ParseOptions(const std::vector& args, return true; } -bool StatCommand::PrintHardwareCounters() { +std::optional GetHardwareCountersOnCpu(int cpu) { size_t available_counters = 0; const EventType* event = FindEventTypeByName("cpu-cycles", true); if (event == nullptr) { - return false; + return std::nullopt; } perf_event_attr attr = CreateDefaultPerfEventAttr(*event); while (true) { auto workload = Workload::CreateWorkload({"sleep", "0.1"}); - if (!workload) { - return false; + if (!workload || !workload->SetCpuAffinity(cpu)) { + return std::nullopt; } std::vector> event_fds; for (size_t i = 0; i <= available_counters; i++) { EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get(); - auto event_fd = - EventFd::OpenEventFile(attr, workload->GetPid(), -1, group_event_fd, "cpu-cycles", false); + auto event_fd = EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, + "cpu-cycles", false); if (!event_fd) { break; } @@ -771,7 +771,7 @@ bool StatCommand::PrintHardwareCounters() { for (auto& event_fd : event_fds) { PerfCounter counter; if (!event_fd->ReadCounter(&counter)) { - return false; + return std::nullopt; } if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) { always_running = false; @@ -783,7 +783,18 @@ bool StatCommand::PrintHardwareCounters() { } available_counters++; } - printf("There are %zu CPU PMU hardware counters available on this device.\n", available_counters); + return available_counters; +} + +bool StatCommand::PrintHardwareCounters() { + for (int cpu : GetOnlineCpus()) { + std::optional counters = GetHardwareCountersOnCpu(cpu); + if (!counters) { + LOG(ERROR) << "failed to get CPU PMU hardware counters on cpu " << cpu; + return false; + } + printf("There are %zu CPU PMU hardware counters available on cpu %d.\n", counters.value(), cpu); + } return true; } diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp index 4bb7c540..142ed713 100644 --- a/simpleperf/workload.cpp +++ b/simpleperf/workload.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -160,6 +161,18 @@ void Workload::ChildProcessFn(int start_signal_fd, int exec_child_fd) { } } +bool Workload::SetCpuAffinity(int cpu) { + CHECK_EQ(work_state_, NotYetStartNewProcess); + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(GetPid(), sizeof(mask), &mask) != 0) { + PLOG(ERROR) << "sched_setaffinity failed"; + return false; + } + return true; +} + bool Workload::Start() { CHECK_EQ(work_state_, NotYetStartNewProcess); char start_signal = 1; diff --git a/simpleperf/workload.h b/simpleperf/workload.h index 2400ac8f..86b28924 100644 --- a/simpleperf/workload.h +++ b/simpleperf/workload.h @@ -43,6 +43,7 @@ class Workload { ~Workload(); + bool SetCpuAffinity(int cpu); bool Start(); bool IsStarted() { return work_state_ == Started; } pid_t GetPid() { return work_pid_; } -- cgit v1.2.3 From 94ab56bd749f5ed15cd4745bc143e3b01dbf2242 Mon Sep 17 00:00:00 2001 From: Yabin Date: Tue, 23 Aug 2022 11:43:01 -0700 Subject: simpleperf: stat: don't open event files across cpus. When monitoring an hardware event for a thread running on all cpus, the stat cmd opens one event file to monitor the thread across all cpus. This works because CPU PMUs on all cpus are managed by one pmu object in perf event driver in the kernel. And the event file is bounded to a pmu object. So it can transfer with the thread between cpus. But now big/little ARM cores may have different numbers of CPU PMU hardware counters. To fully use them, CPU PMUs on big/little cores need to be managed by different pmu objects. As a result, event files can't be transfered between cpus belonging to different pmu objects. To work with this, this patch changes the stat cmd to open one event file for each cpu. But after that, we can't tell if hardware counter multiplexing happens by checking if runtime equals enabled time. So we avoid showing the runtime / enabled_time percentage. Instead, we check if events used are more than hardware counters available on each cpu. Bug: 243065368 Test: simpleperf_unit_test Change-Id: I0eb6acbdb2c23bd726be90232a8d97285dac5482 (cherry picked from commit 4ed8e11cc2adf37eb95b922f51a029617c875874) --- simpleperf/cmd_stat.cpp | 146 +++++++++++++++++++++++++----------------------- simpleperf/event_type.h | 5 ++ 2 files changed, 82 insertions(+), 69 deletions(-) diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index c46bd548..f4df9b67 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -157,7 +157,7 @@ void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) { } titles.emplace_back("count"); titles.emplace_back("event_name"); - titles.emplace_back(" # count / runtime, runtime / enabled_time"); + titles.emplace_back(" # count / runtime"); std::vector width(titles.size(), 0); @@ -206,10 +206,9 @@ void CounterSummaries::ShowText(FILE* fp, bool show_thread, bool show_cpu) { if (show_cpu) { fprintf(fp, " %-*d", static_cast(width[i++]), s.cpu); } - fprintf(fp, " %*s %-*s # %-*s (%.0f%%)%s\n", static_cast(width[i]), - s.readable_count.c_str(), static_cast(width[i + 1]), s.Name().c_str(), - static_cast(width[i + 2]), s.comment.c_str(), 1.0 / s.scale * 100, - (s.auto_generated ? " (generated)" : "")); + fprintf(fp, " %*s %-*s # %-*s%s\n", static_cast(width[i]), s.readable_count.c_str(), + static_cast(width[i + 1]), s.Name().c_str(), static_cast(width[i + 2]), + s.comment.c_str(), (s.auto_generated ? " (generated)" : "")); } } @@ -440,6 +439,7 @@ class StatCommand : public Command { void MonitorEachThread(); void AdjustToIntervalOnlyValues(std::vector& counters); bool ShowCounters(const std::vector& counters, double duration_in_sec, FILE* fp); + void CheckHardwareCounterMultiplexing(); bool verbose_mode_; bool system_wide_collection_; @@ -536,9 +536,6 @@ bool StatCommand::Run(const std::vector& args) { } // 3. Open perf_event_files and output file if defined. - if (cpus_.empty() && !report_per_core_ && (report_per_thread_ || !system_wide_collection_)) { - cpus_.push_back(-1); // Get event count for each thread on all cpus. - } if (!event_selection_set_.OpenEventFiles(cpus_)) { return false; } @@ -617,8 +614,15 @@ bool StatCommand::Run(const std::vector& args) { // 6. Read and print counters. if (interval_in_ms_ == 0) { - return print_counters(); + if (!print_counters()) { + return false; + } } + + // 7. Print hardware counter multiplexing warning when needed. + event_selection_set_.CloseEventFiles(); + CheckHardwareCounterMultiplexing(); + return true; } @@ -739,46 +743,49 @@ bool StatCommand::ParseOptions(const std::vector& args, return true; } -std::optional GetHardwareCountersOnCpu(int cpu) { - size_t available_counters = 0; +std::optional CheckHardwareCountersOnCpu(int cpu, size_t counters) { const EventType* event = FindEventTypeByName("cpu-cycles", true); if (event == nullptr) { return std::nullopt; } perf_event_attr attr = CreateDefaultPerfEventAttr(*event); - while (true) { - auto workload = Workload::CreateWorkload({"sleep", "0.1"}); - if (!workload || !workload->SetCpuAffinity(cpu)) { - return std::nullopt; - } - std::vector> event_fds; - for (size_t i = 0; i <= available_counters; i++) { - EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get(); - auto event_fd = EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, - "cpu-cycles", false); - if (!event_fd) { - break; - } - event_fds.emplace_back(std::move(event_fd)); + auto workload = Workload::CreateWorkload({"sleep", "0.1"}); + if (!workload || !workload->SetCpuAffinity(cpu)) { + return std::nullopt; + } + std::vector> event_fds; + for (size_t i = 0; i < counters; i++) { + EventFd* group_event_fd = event_fds.empty() ? nullptr : event_fds[0].get(); + auto event_fd = + EventFd::OpenEventFile(attr, workload->GetPid(), cpu, group_event_fd, "cpu-cycles", false); + if (!event_fd) { + return false; } - if (event_fds.size() != available_counters + 1) { - break; + event_fds.emplace_back(std::move(event_fd)); + } + if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) { + return std::nullopt; + } + for (auto& event_fd : event_fds) { + PerfCounter counter; + if (!event_fd->ReadCounter(&counter)) { + return std::nullopt; } - if (!workload->Start() || !workload->WaitChildProcess(true, nullptr)) { + if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) { return false; } - bool always_running = true; - for (auto& event_fd : event_fds) { - PerfCounter counter; - if (!event_fd->ReadCounter(&counter)) { - return std::nullopt; - } - if (counter.time_enabled == 0 || counter.time_enabled > counter.time_running) { - always_running = false; - break; - } + } + return true; +} + +std::optional GetHardwareCountersOnCpu(int cpu) { + size_t available_counters = 0; + while (true) { + std::optional result = CheckHardwareCountersOnCpu(cpu, available_counters + 1); + if (!result.has_value()) { + return std::nullopt; } - if (!always_running) { + if (!result.value()) { break; } available_counters++; @@ -907,42 +914,43 @@ bool StatCommand::ShowCounters(const std::vector& counters, double summaries.GenerateComments(duration_in_sec); summaries.Show(fp); - if (csv_) + if (csv_) { fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec); - else + } else { fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec); + } + return true; +} - const char* COUNTER_MULTIPLEX_INFO = - "probably caused by hardware counter multiplexing (less counters than events).\n" - "Try --use-devfreq-counters if on a rooted device."; - - if (cpus_ == std::vector(1, -1) || - event_selection_set_.GetMonitoredThreads() == std::set({-1})) { - // We either monitor a thread on all cpus, or monitor all threads on a cpu. In both cases, - // if percentages < 100%, probably it is caused by hardware counter multiplexing. - bool counters_always_available = true; - for (const auto& summary : summaries.Summaries()) { - if (!summary.IsMonitoredAllTheTime()) { - counters_always_available = false; - break; - } +void StatCommand::CheckHardwareCounterMultiplexing() { + size_t hardware_events = 0; + for (const EventType* event : event_selection_set_.GetEvents()) { + if (event->IsHardwareEvent()) { + hardware_events++; } - if (!counters_always_available) { - LOG(WARNING) << "Percentages < 100% means some events only run a subset of enabled time,\n" - << COUNTER_MULTIPLEX_INFO; + } + if (hardware_events == 0) { + return; + } + std::vector cpus = cpus_; + if (cpus.empty()) { + cpus = GetOnlineCpus(); + } + for (int cpu : cpus) { + std::optional result = CheckHardwareCountersOnCpu(cpu, hardware_events); + if (result.has_value() && !result.value()) { + LOG(WARNING) << "It seems the number of hardware events are more than the number of\n" + << "available CPU PMU hardware counters. That will trigger hardware counter\n" + << "multiplexing. As a result, events are not counted all the time processes\n" + << "running, and event counts are smaller than what really happen.\n" + << "Use --print-hw-counter to show available hardware counters.\n" +#if defined(__ANDROID__) + << "If on a rooted device, try --use-devfreq-counters to get more counters.\n" +#endif + ; + break; } - } else if (report_per_thread_) { - // We monitor each thread on each cpu. - LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for each thread.\n" - << "If percentage sum of a thread < 99%, or report for a running thread is missing,\n" - << COUNTER_MULTIPLEX_INFO; - } else { - // We monitor some threads on each cpu. - LOG(INFO) << "A percentage represents runtime_on_a_cpu / runtime_on_all_cpus for monitored\n" - << "threads. If percentage sum < 99%, or report for an event is missing,\n" - << COUNTER_MULTIPLEX_INFO; } - return true; } } // namespace diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h index d2cd0c15..14863caa 100644 --- a/simpleperf/event_type.h +++ b/simpleperf/event_type.h @@ -24,6 +24,8 @@ #include #include +#include "perf_event.h" + namespace simpleperf { inline const std::string kETMEventName = "cs-etm"; @@ -50,6 +52,9 @@ struct EventType { bool IsPmuEvent() const { return name.find('/') != std::string::npos; } bool IsEtmEvent() const { return name == kETMEventName; } + bool IsHardwareEvent() const { + return type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE || type == PERF_TYPE_RAW; + } std::vector GetPmuCpumask(); -- cgit v1.2.3 From 9f3c6f5ba4e45ea1564b8d8fd5b2f9b6f59a285c Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 25 Aug 2022 10:51:11 -0700 Subject: simpleperf: accept failures when getting hw counters on a cpu. When built as a 32-bit program, simpleperf can't set sched_affinity to a 64-bit only CPU. That can make simpleperf not be able to get hardware counters on that CPU. Bug: 243065368 Bug: 243479304 Test: run simpleperf_unit_test Change-Id: I0eb2841e191e88a96eeae53ec95b265db194cfd8 (cherry picked from commit 5aded99e9925531836b0e2b511b1448b4d64785e) --- simpleperf/cmd_stat.cpp | 14 ++++++++------ simpleperf/workload.cpp | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index f4df9b67..bd2cc5e3 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -433,7 +433,7 @@ class StatCommand : public Command { private: bool ParseOptions(const std::vector& args, std::vector* non_option_args); - bool PrintHardwareCounters(); + void PrintHardwareCounters(); bool AddDefaultMeasuredEventTypes(); void SetEventSelectionFlags(); void MonitorEachThread(); @@ -480,7 +480,8 @@ bool StatCommand::Run(const std::vector& args) { return false; } if (print_hw_counter_) { - return PrintHardwareCounters(); + PrintHardwareCounters(); + return true; } if (!app_package_name_.empty() && !in_app_context_) { if (!IsRoot()) { @@ -793,16 +794,17 @@ std::optional GetHardwareCountersOnCpu(int cpu) { return available_counters; } -bool StatCommand::PrintHardwareCounters() { +void StatCommand::PrintHardwareCounters() { for (int cpu : GetOnlineCpus()) { std::optional counters = GetHardwareCountersOnCpu(cpu); if (!counters) { - LOG(ERROR) << "failed to get CPU PMU hardware counters on cpu " << cpu; - return false; + // When built as a 32-bit program, we can't set sched_affinity to a 64-bit only CPU. So we + // may not be able to get hardware counters on that CPU. + LOG(WARNING) << "Failed to get CPU PMU hardware counters on cpu " << cpu; + continue; } printf("There are %zu CPU PMU hardware counters available on cpu %d.\n", counters.value(), cpu); } - return true; } bool StatCommand::AddDefaultMeasuredEventTypes() { diff --git a/simpleperf/workload.cpp b/simpleperf/workload.cpp index 142ed713..92c6537b 100644 --- a/simpleperf/workload.cpp +++ b/simpleperf/workload.cpp @@ -167,7 +167,7 @@ bool Workload::SetCpuAffinity(int cpu) { CPU_ZERO(&mask); CPU_SET(cpu, &mask); if (sched_setaffinity(GetPid(), sizeof(mask), &mask) != 0) { - PLOG(ERROR) << "sched_setaffinity failed"; + PLOG(WARNING) << "sched_setaffinity failed"; return false; } return true; -- cgit v1.2.3 From e275d1e943acdaeaaa644dc7cb1db5c4cf73f4e8 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Thu, 25 Aug 2022 11:06:39 -0700 Subject: simpleperf: stat: remove percentage in csv output. Because percentage isn't correct after we open an event for each cpu. Bug: 243065368 Bug: 243479304 Test: run simpleperf_unit_test Change-Id: I67bc23594e82afce040de7755deb9b274b8ad8b8 (cherry picked from commit abc8ee2e0f792d24562405159ae46be19a1c8acb) --- simpleperf/cmd_stat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index bd2cc5e3..bd1bffba 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -141,8 +141,8 @@ void CounterSummaries::ShowCSV(FILE* fp, bool show_thread, bool show_cpu) { if (show_cpu) { fprintf(fp, "%d,", s.cpu); } - fprintf(fp, "%s,%s,%s,(%.0f%%)%s\n", s.readable_count.c_str(), s.Name().c_str(), - s.comment.c_str(), 1.0 / s.scale * 100, (s.auto_generated ? " (generated)," : ",")); + fprintf(fp, "%s,%s,%s,%s\n", s.readable_count.c_str(), s.Name().c_str(), s.comment.c_str(), + (s.auto_generated ? "(generated)," : "")); } } -- cgit v1.2.3