From 26968e6c48dea2eaa217991ade5a04e801f1be8f Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Mon, 30 Jan 2017 11:34:24 -0800 Subject: simpleperf: add inplace-sampler event type. Add inplace-sampler event type, so it can be used in record/list command. This cl doesn't add code for communicating with profiled process, and fake records in InplaceSamplerClient.cpp for testing purpose. Refactor runtest.py to test inplace-sampler profiling. Bug: http://b/30974760 Test: run runtest.py --inplace-sampler. Change-Id: I92d8b03583c58b3589207f5c655e03853899be3a --- simpleperf/Android.mk | 1 + simpleperf/IOEventLoop.cpp | 1 + simpleperf/InplaceSamplerClient.cpp | 93 ++++++++++++ simpleperf/InplaceSamplerClient.h | 53 +++++++ simpleperf/cmd_list.cpp | 3 +- simpleperf/cmd_record.cpp | 24 +-- simpleperf/cmd_stat.cpp | 21 ++- simpleperf/event_fd.cpp | 9 +- simpleperf/event_fd.h | 2 +- simpleperf/event_selection_set.cpp | 161 ++++++++++++++++----- simpleperf/event_selection_set.h | 18 ++- simpleperf/event_type.h | 8 + simpleperf/event_type_table.h | 2 + simpleperf/generate_event_type_table.py | 7 + simpleperf/record.cpp | 16 +- simpleperf/runtest/comm_change.cpp | 13 +- simpleperf/runtest/function_fork.cpp | 16 +- simpleperf/runtest/function_indirect_recursive.cpp | 4 +- simpleperf/runtest/function_pthread.cpp | 24 +-- simpleperf/runtest/function_recursive.cpp | 4 +- simpleperf/runtest/one_function.cpp | 4 +- simpleperf/runtest/runtest.py | 110 ++++++++++---- simpleperf/runtest/two_functions.cpp | 6 +- simpleperf/workload.h | 3 + 24 files changed, 485 insertions(+), 118 deletions(-) create mode 100644 simpleperf/InplaceSamplerClient.cpp create mode 100644 simpleperf/InplaceSamplerClient.h diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk index 67eb0a04..a677de4a 100644 --- a/simpleperf/Android.mk +++ b/simpleperf/Android.mk @@ -110,6 +110,7 @@ libsimpleperf_src_files_linux := \ environment.cpp \ event_fd.cpp \ event_selection_set.cpp \ + InplaceSamplerClient.cpp \ IOEventLoop.cpp \ perf_clock.cpp \ record_file_writer.cpp \ diff --git a/simpleperf/IOEventLoop.cpp b/simpleperf/IOEventLoop.cpp index 44de2896..ce259280 100644 --- a/simpleperf/IOEventLoop.cpp +++ b/simpleperf/IOEventLoop.cpp @@ -40,6 +40,7 @@ struct IOEvent { IOEventLoop::IOEventLoop() : ebase_(nullptr), has_error_(false) {} IOEventLoop::~IOEventLoop() { + events_.clear(); if (ebase_ != nullptr) { event_base_free(ebase_); } diff --git a/simpleperf/InplaceSamplerClient.cpp b/simpleperf/InplaceSamplerClient.cpp new file mode 100644 index 00000000..5a788613 --- /dev/null +++ b/simpleperf/InplaceSamplerClient.cpp @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "InplaceSamplerClient.h" + +#include +#include +#include + +#include +#include + +#include "environment.h" +#include "utils.h" + +static constexpr uint64_t EVENT_ID_FOR_INPLACE_SAMPLER = ULONG_MAX; + +std::unique_ptr InplaceSamplerClient::Create(const perf_event_attr& attr, + pid_t pid, + const std::set& tids) { + if (pid == -1) { + LOG(ERROR) << "inplace-sampler can't monitor system wide events."; + return nullptr; + } + std::unique_ptr sampler(new InplaceSamplerClient(attr, pid, tids)); + if (!sampler->ConnectServer()) { + return nullptr; + } + if (!sampler->StartProfiling()) { + return nullptr; + } + return sampler; +} + +InplaceSamplerClient::InplaceSamplerClient(const perf_event_attr& attr, pid_t pid, + const std::set& tids) + : attr_(attr), pid_(pid), tids_(tids), closed_(false) { +} + +uint64_t InplaceSamplerClient::Id() const { + return EVENT_ID_FOR_INPLACE_SAMPLER; +} + +bool InplaceSamplerClient::ConnectServer() { + return true; +} + +bool InplaceSamplerClient::StartProfiling() { + return true; +} + +bool InplaceSamplerClient::StartPolling(IOEventLoop& loop, + const std::function& record_callback, + const std::function& close_callback) { + record_callback_ = record_callback; + close_callback_ = close_callback; + auto callback = [this]() { + // Fake records for testing. + uint64_t time = GetSystemClock(); + CommRecord comm_r(attr_, pid_, pid_, "fake_comm", Id(), time); + if (!record_callback_(&comm_r)) { + return false; + } + MmapRecord mmap_r(attr_, false, pid_, pid_, 0x1000, 0x1000, 0x0, "fake_elf", Id(), time); + if (!record_callback_(&mmap_r)) { + return false; + } + std::vector ips(1, 0x1000); + SampleRecord r(attr_, Id(), ips[0], pid_, pid_, time, 0, 1, ips); + if (!record_callback_(&r)) { + return false; + } + closed_ = true; + return close_callback_(); + }; + timeval duration; + duration.tv_sec = 0; + duration.tv_usec = 1000; + return loop.AddPeriodicEvent(duration, callback); +} diff --git a/simpleperf/InplaceSamplerClient.h b/simpleperf/InplaceSamplerClient.h new file mode 100644 index 00000000..0c606bb3 --- /dev/null +++ b/simpleperf/InplaceSamplerClient.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_ +#define SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_ + +#include +#include +#include + +#include "event_attr.h" +#include "record.h" +#include "UnixSocket.h" + +class InplaceSamplerClient { + public: + static std::unique_ptr Create(const perf_event_attr& attr, pid_t pid, + const std::set& tids); + uint64_t Id() const; + bool IsClosed() { + return closed_; + } + bool StartPolling(IOEventLoop& loop, const std::function& record_callback, + const std::function& close_callback); + bool StopProfiling(); + + private: + InplaceSamplerClient(const perf_event_attr& attr, pid_t pid, const std::set& tids); + bool ConnectServer(); + bool StartProfiling(); + + const perf_event_attr attr_; + const pid_t pid_; + const std::set tids_; + std::function record_callback_; + std::function close_callback_; + bool closed_; +}; + +#endif // SIMPLE_PERF_INPLACE_SAMPLER_CLIENT_H_ diff --git a/simpleperf/cmd_list.cpp b/simpleperf/cmd_list.cpp index 273a8037..0248aa97 100644 --- a/simpleperf/cmd_list.cpp +++ b/simpleperf/cmd_list.cpp @@ -36,7 +36,7 @@ static void PrintEventTypesOfType(uint32_t type, const std::string& type_name, // Exclude kernel to list supported events even when // /proc/sys/kernel/perf_event_paranoid is 2. attr.exclude_kernel = 1; - if (IsEventAttrSupportedByKernel(attr)) { + if (IsEventAttrSupported(attr)) { printf(" %s\n", event_type.name.c_str()); } } @@ -65,6 +65,7 @@ bool ListCommand::Run(const std::vector& args) { {"sw", {PERF_TYPE_SOFTWARE, "software events"}}, {"cache", {PERF_TYPE_HW_CACHE, "hw-cache events"}}, {"tracepoint", {PERF_TYPE_TRACEPOINT, "tracepoint events"}}, + {"user-space-sampler", {SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, "user-space samplers"}}, }; std::vector names; diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index b9dfff02..1cbb86cd 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -259,6 +259,13 @@ bool RecordCommand::Run(const std::vector& args) { if (workload != nullptr) { event_selection_set_.AddMonitoredProcesses({workload->GetPid()}); event_selection_set_.SetEnableOnExec(true); + if (event_selection_set_.HasInplaceSampler()) { + // Start worker early, because the worker process has to setup inplace-sampler server + // before we try to connect it. + if (!workload->Start()) { + return false; + } + } } else { LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help"; @@ -282,9 +289,7 @@ bool RecordCommand::Run(const std::vector& args) { return false; } - // 5. Create IOEventLoop and add read/signal/periodic Events. - IOEventLoop loop; - event_selection_set_.SetIOEventLoop(loop); + // 5. Add read/signal/periodic Events. auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1); if (!event_selection_set_.PrepareToReadMmapEventData(callback)) { @@ -296,13 +301,14 @@ bool RecordCommand::Run(const std::vector& args) { if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { return false; } - if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, - [&]() { return loop.ExitLoop(); })) { + IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); + if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, + [&]() { return loop->ExitLoop(); })) { return false; } if (duration_in_sec_ != 0) { - if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_), - [&]() { return loop.ExitLoop(); })) { + if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), + [&]() { return loop->ExitLoop(); })) { return false; } } @@ -312,10 +318,10 @@ bool RecordCommand::Run(const std::vector& args) { start_sampling_time_in_ns_ = GetPerfClock(); LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns"; - if (workload != nullptr && !workload->Start()) { + if (workload != nullptr && !workload->IsStarted() && !workload->Start()) { return false; } - if (!loop.RunLoop()) { + if (!loop->RunLoop()) { return false; } if (!event_selection_set_.FinishReadMmapEventData()) { diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp index 423fbffb..cdd5593a 100644 --- a/simpleperf/cmd_stat.cpp +++ b/simpleperf/cmd_stat.cpp @@ -383,9 +383,7 @@ bool StatCommand::Run(const std::vector& args) { fp = fp_holder.get(); } - // 4. Create IOEventLoop and add signal/periodic Events. - IOEventLoop loop; - event_selection_set_.SetIOEventLoop(loop); + // 4. Add signal/periodic Events. std::chrono::time_point start_time; std::vector counters; if (system_wide_collection_ || (!cpus_.empty() && cpus_[0] != -1)) { @@ -396,13 +394,14 @@ bool StatCommand::Run(const std::vector& args) { if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { return false; } - if (!loop.AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, - [&]() { return loop.ExitLoop(); })) { + IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); + if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, + [&]() { return loop->ExitLoop(); })) { return false; } if (duration_in_sec_ != 0) { - if (!loop.AddPeriodicEvent(SecondToTimeval(duration_in_sec_), - [&]() { return loop.ExitLoop(); })) { + if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), + [&]() { return loop->ExitLoop(); })) { return false; } } @@ -422,8 +421,8 @@ bool StatCommand::Run(const std::vector& args) { }; if (interval_in_ms_ != 0) { - if (!loop.AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0), - print_counters)) { + if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0), + print_counters)) { return false; } } @@ -433,7 +432,7 @@ bool StatCommand::Run(const std::vector& args) { if (workload != nullptr && !workload->Start()) { return false; } - if (!loop.RunLoop()) { + if (!loop->RunLoop()) { return false; } @@ -548,7 +547,7 @@ bool StatCommand::AddDefaultMeasuredEventTypes() { // supported by the kernel. const EventType* type = FindEventTypeByName(name); if (type != nullptr && - IsEventAttrSupportedByKernel(CreateDefaultPerfEventAttr(*type))) { + IsEventAttrSupported(CreateDefaultPerfEventAttr(*type))) { if (!event_selection_set_.AddEventType(name)) { return false; } diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp index 08cf98ff..b78da4c0 100644 --- a/simpleperf/event_fd.cpp +++ b/simpleperf/event_fd.cpp @@ -260,7 +260,12 @@ bool EventFd::StartPolling(IOEventLoop& loop, bool EventFd::StopPolling() { return IOEventLoop::DelEvent(ioevent_ref_); } -bool IsEventAttrSupportedByKernel(perf_event_attr attr) { - auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false); +bool IsEventAttrSupported(const perf_event_attr& attr) { + if (attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS && + attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) { + // User space samplers don't need kernel support. + return true; + } + std::unique_ptr event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false); return event_fd != nullptr; } diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h index aaba0ef2..f1ddb551 100644 --- a/simpleperf/event_fd.h +++ b/simpleperf/event_fd.h @@ -126,6 +126,6 @@ class EventFd { DISALLOW_COPY_AND_ASSIGN(EventFd); }; -bool IsEventAttrSupportedByKernel(perf_event_attr attr); +bool IsEventAttrSupported(const perf_event_attr& attr); #endif // SIMPLE_PERF_EVENT_FD_H_ diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp index 038997fb..71c6c723 100644 --- a/simpleperf/event_selection_set.cpp +++ b/simpleperf/event_selection_set.cpp @@ -36,7 +36,7 @@ bool IsBranchSamplingSupported() { perf_event_attr attr = CreateDefaultPerfEventAttr(*type); attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; - return IsEventAttrSupportedByKernel(attr); + return IsEventAttrSupported(attr); } bool IsDwarfCallChainSamplingSupported() { @@ -50,7 +50,7 @@ bool IsDwarfCallChainSamplingSupported() { attr.exclude_callchain_user = 1; attr.sample_regs_user = GetSupportedRegMask(GetBuildArch()); attr.sample_stack_user = 8192; - return IsEventAttrSupportedByKernel(attr); + return IsEventAttrSupported(attr); } bool EventSelectionSet::BuildAndCheckEventSelection( @@ -78,9 +78,9 @@ bool EventSelectionSet::BuildAndCheckEventSelection( selection->event_attr.exclude_host = event_type->exclude_host; selection->event_attr.exclude_guest = event_type->exclude_guest; selection->event_attr.precise_ip = event_type->precise_ip; - if (!IsEventAttrSupportedByKernel(selection->event_attr)) { + if (!IsEventAttrSupported(selection->event_attr)) { LOG(ERROR) << "Event type '" << event_type->name - << "' is not supported by the kernel"; + << "' is not supported on the device"; return false; } selection->event_fds.clear(); @@ -129,6 +129,18 @@ std::vector EventSelectionSet::GetTracepointEvents() const { return result; } +bool EventSelectionSet::HasInplaceSampler() const { + for (const auto& group : groups_) { + for (const auto& sel : group) { + if (sel.event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS && + sel.event_attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) { + return true; + } + } + } + return false; +} + std::vector EventSelectionSet::GetEventAttrWithId() const { std::vector result; for (const auto& group : groups_) { @@ -138,6 +150,9 @@ std::vector EventSelectionSet::GetEventAttrWithId() const { for (const auto& fd : selection.event_fds) { attr_id.ids.push_back(fd->Id()); } + if (!selection.inplace_samplers.empty()) { + attr_id.ids.push_back(selection.inplace_samplers[0]->Id()); + } result.push_back(attr_id); } } @@ -347,12 +362,24 @@ bool EventSelectionSet::OpenEventFilesOnGroup(EventSelectionGroup& group, return true; } -static std::set PrepareThreads(const std::set& processes, - const std::set& threads) { - std::set result = threads; - for (const auto& pid : processes) { +static std::map> PrepareThreads(const std::set& processes, + const std::set& threads) { + std::map> result; + for (auto& pid : processes) { std::vector tids = GetThreadsInProcess(pid); - result.insert(tids.begin(), tids.end()); + std::set& threads_in_process = result[pid]; + threads_in_process.insert(tids.begin(), tids.end()); + } + for (auto& tid : threads) { + // tid = -1 means monitoring all threads. + if (tid == -1) { + result[-1].insert(-1); + } else { + pid_t pid; + if (GetProcessForThread(tid, &pid)) { + result[pid].insert(tid); + } + } } return result; } @@ -367,26 +394,56 @@ bool EventSelectionSet::OpenEventFiles(const std::vector& on_cpus) { } else { cpus = GetOnlineCpus(); } - std::set threads = PrepareThreads(processes_, threads_); + std::map> process_map = PrepareThreads(processes_, threads_); for (auto& group : groups_) { - for (const auto& tid : threads) { - size_t success_cpu_count = 0; - std::string failed_event_type; - for (const auto& cpu : cpus) { - if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) { - success_cpu_count++; + if (IsUserSpaceSamplerGroup(group)) { + if (!OpenUserSpaceSamplersOnGroup(group, process_map)) { + return false; + } + } else { + for (const auto& pair : process_map) { + for (const auto& tid : pair.second) { + size_t success_cpu_count = 0; + std::string failed_event_type; + for (const auto& cpu : cpus) { + if (OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) { + success_cpu_count++; + } + } + // As the online cpus can be enabled or disabled at runtime, we may not + // open event file for all cpus successfully. But we should open at + // least one cpu successfully. + if (success_cpu_count == 0) { + PLOG(ERROR) << "failed to open perf event file for event_type " + << failed_event_type << " for " + << (tid == -1 ? "all threads" : "thread " + std::to_string(tid)) + << " on all cpus"; + return false; + } } } - // As the online cpus can be enabled or disabled at runtime, we may not - // open event file for all cpus successfully. But we should open at - // least one cpu successfully. - if (success_cpu_count == 0) { - PLOG(ERROR) << "failed to open perf event file for event_type " - << failed_event_type << " for " - << (tid == -1 ? "all threads" - : "thread " + std::to_string(tid)) - << " on all cpus"; - return false; + } + } + return true; +} + +bool EventSelectionSet::IsUserSpaceSamplerGroup(EventSelectionGroup& group) { + return group.size() == 1 && group[0].event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS; +} + +bool EventSelectionSet::OpenUserSpaceSamplersOnGroup(EventSelectionGroup& group, + const std::map>& process_map) { + CHECK_EQ(group.size(), 1u); + for (auto& selection : group) { + if (selection.event_attr.type == SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS && + selection.event_attr.config == SIMPLEPERF_CONFIG_INPLACE_SAMPLER) { + for (auto& pair : process_map) { + std::unique_ptr sampler = InplaceSamplerClient::Create( + selection.event_attr, pair.first, pair.second); + if (sampler == nullptr) { + return false; + } + selection.inplace_samplers.push_back(std::move(sampler)); } } } @@ -479,6 +536,12 @@ bool EventSelectionSet::PrepareToReadMmapEventData(const std::functionStartPolling(*loop_, callback, + [&] { return CheckMonitoredTargets(); })) { + return false; + } + } } } @@ -518,6 +581,9 @@ bool EventSelectionSet::ReadMmapEventData() { } } + if (head_size == 0) { + return true; + } if (head_size == 1) { // Only one buffer has data, process it directly. std::vector> records = @@ -645,17 +711,21 @@ bool EventSelectionSet::HandleCpuOfflineEvent(int cpu) { bool EventSelectionSet::HandleCpuOnlineEvent(int cpu) { // We need to start profiling when opening new event files. SetEnableOnExec(false); - std::set threads = PrepareThreads(processes_, threads_); + std::map> process_map = PrepareThreads(processes_, threads_); for (auto& group : groups_) { - for (const auto& tid : threads) { - std::string failed_event_type; - if (!OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) { - // If failed to open event files, maybe the cpu has been offlined. - PLOG(WARNING) << "failed to open perf event file for event_type " - << failed_event_type << " for " - << (tid == -1 ? "all threads" - : "thread " + std::to_string(tid)) - << " on cpu " << cpu; + if (IsUserSpaceSamplerGroup(group)) { + continue; + } + for (const auto& pair : process_map) { + for (const auto& tid : pair.second) { + std::string failed_event_type; + if (!OpenEventFilesOnGroup(group, tid, cpu, &failed_event_type)) { + // If failed to open event files, maybe the cpu has been offlined. + PLOG(WARNING) << "failed to open perf event file for event_type " + << failed_event_type << " for " + << (tid == -1 ? "all threads" : "thread " + std::to_string(tid)) + << " on cpu " << cpu; + } } } } @@ -723,6 +793,9 @@ bool EventSelectionSet::StopWhenNoMoreTargets(double check_interval_in_sec) { } bool EventSelectionSet::CheckMonitoredTargets() { + if (!HasSampler()) { + return loop_->ExitLoop(); + } for (const auto& tid : threads_) { if (IsThreadAlive(tid)) { return true; @@ -735,3 +808,19 @@ bool EventSelectionSet::CheckMonitoredTargets() { } return loop_->ExitLoop(); } + +bool EventSelectionSet::HasSampler() { + for (auto& group : groups_) { + for (auto& sel : group) { + if (!sel.event_fds.empty()) { + return false; + } + for (auto& sampler : sel.inplace_samplers) { + if (!sampler->IsClosed()) { + return true; + } + } + } + } + return false; +} diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h index 8aca7840..97ad7e59 100644 --- a/simpleperf/event_selection_set.h +++ b/simpleperf/event_selection_set.h @@ -28,6 +28,8 @@ #include "event_attr.h" #include "event_fd.h" #include "event_type.h" +#include "InplaceSamplerClient.h" +#include "IOEventLoop.h" #include "perf_event.h" #include "record.h" @@ -47,8 +49,6 @@ struct CountersInfo { std::vector counters; }; -class IOEventLoop; - // EventSelectionSet helps to monitor events. It is used in following steps: // 1. Create an EventSelectionSet, and add event types to monitor by calling // AddEventType() or AddEventGroup(). @@ -67,13 +67,14 @@ class IOEventLoop; class EventSelectionSet { public: EventSelectionSet(bool for_stat_cmd) - : for_stat_cmd_(for_stat_cmd), mmap_pages_(0), loop_(nullptr) {} + : for_stat_cmd_(for_stat_cmd), mmap_pages_(0), loop_(new IOEventLoop) {} bool empty() const { return groups_.empty(); } bool AddEventType(const std::string& event_name); bool AddEventGroup(const std::vector& event_names); std::vector GetTracepointEvents() const; + bool HasInplaceSampler() const; std::vector GetEventAttrWithId() const; void SetEnableOnExec(bool enable); @@ -104,8 +105,8 @@ class EventSelectionSet { return !processes_.empty() || !threads_.empty(); } - void SetIOEventLoop(IOEventLoop& loop) { - loop_ = &loop; + IOEventLoop* GetIOEventLoop() { + return loop_.get(); } bool OpenEventFiles(const std::vector& on_cpus); @@ -128,6 +129,7 @@ class EventSelectionSet { EventTypeAndModifier event_type_modifier; perf_event_attr event_attr; std::vector> event_fds; + std::vector> inplace_samplers; // counters for event files closed for cpu hotplug events std::vector hotplugged_counters; }; @@ -136,6 +138,9 @@ class EventSelectionSet { bool BuildAndCheckEventSelection(const std::string& event_name, EventSelection* selection); void UnionSampleType(); + bool IsUserSpaceSamplerGroup(EventSelectionGroup& group); + bool OpenUserSpaceSamplersOnGroup(EventSelectionGroup& group, + const std::map>& process_map); bool OpenEventFilesOnGroup(EventSelectionGroup& group, pid_t tid, int cpu, std::string* failed_event_type); @@ -147,6 +152,7 @@ class EventSelectionSet { bool HandleCpuOfflineEvent(int cpu); bool CreateMappedBufferForCpu(int cpu); bool CheckMonitoredTargets(); + bool HasSampler(); const bool for_stat_cmd_; @@ -155,7 +161,7 @@ class EventSelectionSet { std::set threads_; size_t mmap_pages_; - IOEventLoop* loop_; + std::unique_ptr loop_; std::function record_callback_; std::set monitored_cpus_; diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h index 12d83b3c..a1e401f4 100644 --- a/simpleperf/event_type.h +++ b/simpleperf/event_type.h @@ -22,6 +22,14 @@ #include #include +// A uint32_t value far from 0 is picked, so it is unlikely to conflict with further +// PERF_TYPE_* events. +static constexpr uint32_t SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS = 32768; + +enum { + SIMPLEPERF_CONFIG_INPLACE_SAMPLER, +}; + // EventType represents one type of event, like cpu_cycle_event, cache_misses_event. // The user knows one event type by its name, and the kernel knows one event type by its // (type, config) pair. EventType connects the two representations, and tells the user if diff --git a/simpleperf/event_type_table.h b/simpleperf/event_type_table.h index a77be0af..123216c9 100644 --- a/simpleperf/event_type_table.h +++ b/simpleperf/event_type_table.h @@ -63,3 +63,5 @@ {"node-prefetches", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16))}, {"node-prefetch-misses", PERF_TYPE_HW_CACHE, ((PERF_COUNT_HW_CACHE_NODE) | (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))}, +{"inplace-sampler", SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS, SIMPLEPERF_CONFIG_INPLACE_SAMPLER}, + diff --git a/simpleperf/generate_event_type_table.py b/simpleperf/generate_event_type_table.py index ff60c236..eaffd60d 100755 --- a/simpleperf/generate_event_type_table.py +++ b/simpleperf/generate_event_type_table.py @@ -106,11 +106,18 @@ def gen_hw_cache_events(): return generated_str +def gen_user_space_events(): + generated_str = gen_event_type_entry_str("inplace-sampler", + "SIMPLEPERF_TYPE_USER_SPACE_SAMPLERS", + "SIMPLEPERF_CONFIG_INPLACE_SAMPLER") + return generated_str + def gen_events(): generated_str = "// This file is auto-generated by generate-event_table.py.\n\n" generated_str += gen_hardware_events() + '\n' generated_str += gen_software_events() + '\n' generated_str += gen_hw_cache_events() + '\n' + generated_str += gen_user_space_events() + '\n' return generated_str generated_str = gen_events() diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp index 86da0657..ba04daff 100644 --- a/simpleperf/record.cpp +++ b/simpleperf/record.cpp @@ -464,7 +464,8 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id, sample_type = attr.sample_type; CHECK_EQ(0u, sample_type & ~(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_ID | PERF_SAMPLE_CPU - | PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN)); + | PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER + | PERF_SAMPLE_STACK_USER)); ip_data.ip = ip; tid_data.pid = pid; tid_data.tid = tid; @@ -502,6 +503,13 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id, if (sample_type & PERF_SAMPLE_CALLCHAIN) { size += sizeof(uint64_t) * (ips.size() + 1); } + if (sample_type & PERF_SAMPLE_REGS_USER) { + size += sizeof(uint64_t); + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + size += sizeof(uint64_t); + } + SetSize(size); char* new_binary = new char[size]; char* p = new_binary; @@ -529,6 +537,12 @@ SampleRecord::SampleRecord(const perf_event_attr& attr, uint64_t id, callchain_data.ips = reinterpret_cast(p); MoveToBinaryFormat(ips.data(), ips.size(), p); } + if (sample_type & PERF_SAMPLE_REGS_USER) { + MoveToBinaryFormat(regs_user_data.abi, p); + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + MoveToBinaryFormat(stack_user_data.size, p); + } CHECK_EQ(p, new_binary + size); UpdateBinary(new_binary); } diff --git a/simpleperf/runtest/comm_change.cpp b/simpleperf/runtest/comm_change.cpp index 12d64fa2..cdcb2bf4 100644 --- a/simpleperf/runtest/comm_change.cpp +++ b/simpleperf/runtest/comm_change.cpp @@ -8,9 +8,14 @@ void Function1() { } int main() { - prctl(PR_SET_NAME, reinterpret_cast("RUN_COMM1"), 0, 0, 0); // NOLINT - Function1(); - prctl(PR_SET_NAME, reinterpret_cast("RUN_COMM2"), 0, 0, 0); // NOLINT - Function1(); + // Run the test in an infinite loop, so if we profile the test manually, the process + // doesn't exit before we attach to it. This scheme also allows simpleperf to control + // how long to profile. + while (true) { + prctl(PR_SET_NAME, reinterpret_cast("RUN_COMM1"), 0, 0, 0); // NOLINT + Function1(); + prctl(PR_SET_NAME, reinterpret_cast("RUN_COMM2"), 0, 0, 0); // NOLINT + Function1(); + } return 0; } diff --git a/simpleperf/runtest/function_fork.cpp b/simpleperf/runtest/function_fork.cpp index b1477a6a..8551927d 100644 --- a/simpleperf/runtest/function_fork.cpp +++ b/simpleperf/runtest/function_fork.cpp @@ -1,4 +1,5 @@ #include +#include #include constexpr int LOOP_COUNT = 100000000; @@ -19,12 +20,15 @@ void ChildFunction() { } int main() { - pid_t pid = fork(); - if (pid == 0) { - ChildFunction(); - return 0; - } else { - ParentFunction(); + while (true) { + pid_t pid = fork(); + if (pid == 0) { + ChildFunction(); + return 0; + } else { + ParentFunction(); + waitpid(pid, nullptr, 0); + } } return 0; } diff --git a/simpleperf/runtest/function_indirect_recursive.cpp b/simpleperf/runtest/function_indirect_recursive.cpp index 5e70fd32..70645a1b 100644 --- a/simpleperf/runtest/function_indirect_recursive.cpp +++ b/simpleperf/runtest/function_indirect_recursive.cpp @@ -19,6 +19,8 @@ void FunctionRecursiveTwo(int loop) { } int main() { - FunctionRecursiveOne(10); + while (true) { + FunctionRecursiveOne(10); + } return 0; } diff --git a/simpleperf/runtest/function_pthread.cpp b/simpleperf/runtest/function_pthread.cpp index 02fc0a5f..c80fb3f2 100644 --- a/simpleperf/runtest/function_pthread.cpp +++ b/simpleperf/runtest/function_pthread.cpp @@ -17,17 +17,19 @@ void MainThreadFunction() { } int main() { - pthread_t thread; - int ret = pthread_create(&thread, nullptr, ChildThreadFunction, nullptr); - if (ret != 0) { - fprintf(stderr, "pthread_create failed: %s\n", strerror(ret)); - exit(1); - } - MainThreadFunction(); - ret = pthread_join(thread, nullptr); - if (ret != 0) { - fprintf(stderr, "pthread_join failed: %s\n", strerror(ret)); - exit(1); + while (true) { + pthread_t thread; + int ret = pthread_create(&thread, nullptr, ChildThreadFunction, nullptr); + if (ret != 0) { + fprintf(stderr, "pthread_create failed: %s\n", strerror(ret)); + exit(1); + } + MainThreadFunction(); + ret = pthread_join(thread, nullptr); + if (ret != 0) { + fprintf(stderr, "pthread_join failed: %s\n", strerror(ret)); + exit(1); + } } return 0; } diff --git a/simpleperf/runtest/function_recursive.cpp b/simpleperf/runtest/function_recursive.cpp index d8d28bcc..bf60668b 100644 --- a/simpleperf/runtest/function_recursive.cpp +++ b/simpleperf/runtest/function_recursive.cpp @@ -11,6 +11,8 @@ void FunctionRecursive(int loop) { } int main() { - FunctionRecursive(10); + while (true) { + FunctionRecursive(10); + } return 0; } diff --git a/simpleperf/runtest/one_function.cpp b/simpleperf/runtest/one_function.cpp index 49090aca..561bb5a5 100644 --- a/simpleperf/runtest/one_function.cpp +++ b/simpleperf/runtest/one_function.cpp @@ -6,6 +6,8 @@ void Function1() { } int main() { - Function1(); + while (true) { + Function1(); + } return 0; } diff --git a/simpleperf/runtest/runtest.py b/simpleperf/runtest/runtest.py index bbfdc48a..77fc5669 100644 --- a/simpleperf/runtest/runtest.py +++ b/simpleperf/runtest/runtest.py @@ -277,22 +277,29 @@ def load_symbol_relation_requirement(symbol_item): class Runner(object): - def __init__(self, perf_path): + def __init__(self, target, perf_path): + self.target = target self.perf_path = perf_path + self.use_callgraph = False + self.sampler = 'cpu-cycles' def record(self, test_executable_name, record_file, additional_options=[]): - call_args = [self.perf_path, - 'record'] + additional_options + ['-e', - 'cpu-cycles:u', - '-o', - record_file, - test_executable_name] + call_args = [self.perf_path, 'record'] + call_args += ['--duration', '1'] + call_args += ['-e', '%s:u' % self.sampler] + if self.use_callgraph: + call_args += ['-f', '1000', '-g'] + call_args += ['-o', record_file] + call_args += additional_options + call_args += [test_executable_name] self._call(call_args) def report(self, record_file, report_file, additional_options=[]): - call_args = [self.perf_path, - 'report'] + additional_options + ['-i', - record_file] + call_args = [self.perf_path, 'report'] + call_args += ['-i', record_file] + if self.use_callgraph: + call_args += ['-g', 'callee'] + call_args += additional_options self._call(call_args, report_file) def _call(self, args, output_file=None): @@ -303,6 +310,9 @@ class HostRunner(Runner): """Run perf test on host.""" + def __init__(self, perf_path): + super(HostRunner, self).__init__('host', perf_path) + def _call(self, args, output_file=None): output_fh = None if output_file is not None: @@ -318,8 +328,8 @@ class DeviceRunner(Runner): def __init__(self, perf_path): self.tmpdir = '/data/local/tmp/' + super(DeviceRunner, self).__init__('device', self.tmpdir + perf_path) self._download(os.environ['OUT'] + '/system/xbin/' + perf_path, self.tmpdir) - self.perf_path = self.tmpdir + perf_path def _call(self, args, output_file=None): output_fh = None @@ -518,7 +528,7 @@ class ReportAnalyzer(object): return result -def runtest(host, device, normal, callgraph, selected_tests): +def runtest(host, device, normal, callgraph, use_inplace_sampler, selected_tests): tests = load_config_file(os.path.dirname(os.path.realpath(__file__)) + \ '/runtest.conf') host_runner = HostRunner('simpleperf') @@ -581,26 +591,76 @@ def runtest(host, device, normal, callgraph, selected_tests): if not result: exit(1) + +def build_runner(target, use_callgraph, sampler): + if target == 'host': + runner = HostRunner('simpleperf') + else: + runner = DeviceRunner('simpleperf') + runner.use_callgraph = use_callgraph + runner.sampler = sampler + return runner + + +def test_with_runner(runner, tests): + report_analyzer = ReportAnalyzer() + for test in tests: + runner.record(test.executable_name, 'perf.data') + if runner.sampler == 'inplace-sampler': + # TODO: fix this when inplace-sampler actually works. + runner.report('perf.data', 'perf.report') + symbols = report_analyzer._read_report_file('perf.report', runner.use_callgraph) + result = False + if len(symbols) == 1 and symbols[0].name == 'fake_elf[+0]': + result = True + else: + runner.report('perf.data', 'perf.report', additional_options = test.report_options) + result = report_analyzer.check_report_file(test, 'perf.report', runner.use_callgraph) + str = 'test %s on %s ' % (test.test_name, runner.target) + if runner.use_callgraph: + str += 'with call graph ' + str += 'using %s ' % runner.sampler + str += ' Succeeded' if result else 'Failed' + print str + if not result: + exit(1) + + +def runtest(target_options, use_callgraph_options, sampler_options, selected_tests): + tests = load_config_file(os.path.dirname(os.path.realpath(__file__)) + \ + '/runtest.conf') + if selected_tests is not None: + new_tests = [] + for test in tests: + if test.test_name in selected_tests: + new_tests.append(test) + tests = new_tests + for target in target_options: + for use_callgraph in use_callgraph_options: + for sampler in sampler_options: + runner = build_runner(target, use_callgraph, sampler) + test_with_runner(runner, tests) + + def main(): - host = True - device = True - normal = True - callgraph = True + target_options = ['host', 'target'] + use_callgraph_options = [False, True] + sampler_options = ['cpu-cycles', 'inplace-sampler'] selected_tests = None i = 1 while i < len(sys.argv): if sys.argv[i] == '--host': - host = True - device = False + use_callgraph_options = ['host'] elif sys.argv[i] == '--device': - host = False - device = True + use_callgraph_options = ['device'] elif sys.argv[i] == '--normal': - normal = True - callgraph = False + use_callgraph_options = [False] elif sys.argv[i] == '--callgraph': - normal = False - callgraph = True + use_callgraph_options = [True] + elif sys.argv[i] == '--no-inplace-sampler': + sampler_options = ['cpu-cycles'] + elif sys.argv[i] == '--inplace-sampler': + sampler_options = ['inplace-sampler'] elif sys.argv[i] == '--test': if i < len(sys.argv): i += 1 @@ -609,7 +669,7 @@ def main(): selected_tests = {} selected_tests[test] = True i += 1 - runtest(host, device, normal, callgraph, selected_tests) + runtest(target_options, use_callgraph_options, sampler_options, selected_tests) if __name__ == '__main__': main() diff --git a/simpleperf/runtest/two_functions.cpp b/simpleperf/runtest/two_functions.cpp index 1d3e3893..b74c1538 100644 --- a/simpleperf/runtest/two_functions.cpp +++ b/simpleperf/runtest/two_functions.cpp @@ -18,7 +18,9 @@ void Function2() { } int main() { - Function1(); - Function2(); + while (true) { + Function1(); + Function2(); + } return 0; } diff --git a/simpleperf/workload.h b/simpleperf/workload.h index 2141830f..9d9d5952 100644 --- a/simpleperf/workload.h +++ b/simpleperf/workload.h @@ -40,6 +40,9 @@ class Workload { ~Workload(); bool Start(); + bool IsStarted() { + return work_state_ == Started; + } pid_t GetPid() { return work_pid_; } -- cgit v1.2.3