summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2017-07-19 17:54:57 -0700
committerYabin Cui <yabinc@google.com>2017-07-21 15:36:56 -0700
commit68b8383d76a7a18bdaee13949cc66ca012cc3dee (patch)
treef6953d1e94e45b61cdfbf3805169cc223b6c7cbb
parent87fcb8c6c18fa934f34b0af5f2f812076c90fabf (diff)
downloadextras-68b8383d76a7a18bdaee13949cc66ca012cc3dee.tar.gz
simpleperf: support tracing offcpu time.
1. Add --trace-offcpu option in record command, to record the callstack when a thread is scheduled off cpu. 2. Change the report command to report time spent not running on cpu when --trace-offcpu option is used for recording. 3. Add related unittests and runtests. Bug: http://b/37572306 Test: run simpleperf_unit_test. Change-Id: Ia50cc39c8dde0c8fb1b1facbcb26bbd0a7ab1351
-rw-r--r--simpleperf/cmd_dumprecord.cpp1
-rw-r--r--simpleperf/cmd_record.cpp49
-rw-r--r--simpleperf/cmd_record_test.cpp35
-rw-r--r--simpleperf/cmd_report.cpp184
-rw-r--r--simpleperf/cmd_report_test.cpp15
-rw-r--r--simpleperf/event_fd.cpp6
-rw-r--r--simpleperf/event_selection_set.cpp60
-rw-r--r--simpleperf/event_selection_set.h2
-rw-r--r--simpleperf/event_type.cpp34
-rw-r--r--simpleperf/event_type.h13
-rw-r--r--simpleperf/get_test_data.h3
-rw-r--r--simpleperf/runtest/Android.build.mk6
-rw-r--r--simpleperf/runtest/Android.mk4
-rw-r--r--simpleperf/runtest/run_and_sleep.cpp46
-rw-r--r--simpleperf/runtest/runtest.conf20
-rw-r--r--simpleperf/runtest/runtest.py39
-rw-r--r--simpleperf/sample_tree.h2
-rw-r--r--simpleperf/test_util.h6
-rw-r--r--simpleperf/testdata/perf_with_trace_offcpu.databin0 -> 197611 bytes
19 files changed, 455 insertions, 70 deletions
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
index 2ce5295d..dcd7bf47 100644
--- a/simpleperf/cmd_dumprecord.cpp
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -151,6 +151,7 @@ static const std::string GetFeatureName(int feature) {
{FEAT_PMU_MAPPINGS, "pmu_mappings"},
{FEAT_GROUP_DESC, "group_desc"},
{FEAT_FILE, "file"},
+ {FEAT_META_INFO, "meta_info"},
};
auto it = feature_name_map.find(feature);
if (it != feature_name_map.end()) {
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 8bd7962f..390535c9 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include <inttypes.h>
#include <libgen.h>
#include <signal.h>
#include <sys/prctl.h>
@@ -150,6 +151,7 @@ class RecordCommand : public Command {
" This option is used to provide files with symbol table and\n"
" debug information, which are used for unwinding and dumping symbols.\n"
"-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n"
+"--trace-offcpu Generate samples when threads are scheduled off cpu.\n"
#if 0
// Below options are only used internally and shouldn't be visible to the public.
"--in-app We are already running in the app's context.\n"
@@ -179,7 +181,8 @@ class RecordCommand : public Command {
sample_record_count_(0),
lost_record_count_(0),
start_profiling_fd_(-1),
- in_app_context_(false) {
+ in_app_context_(false),
+ trace_offcpu_(false) {
// Stop profiling if parent exits.
prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
app_package_name_ = GetDefaultAppPackageName();
@@ -190,6 +193,7 @@ class RecordCommand : public Command {
private:
bool ParseOptions(const std::vector<std::string>& args,
std::vector<std::string>* non_option_args);
+ bool TraceOffCpu();
bool SetEventSelectionFlags();
bool CreateAndInitRecordFile();
std::unique_ptr<RecordFileWriter> CreateRecordFile(
@@ -205,6 +209,7 @@ class RecordCommand : public Command {
bool DumpAdditionalFeatures(const std::vector<std::string>& args);
bool DumpBuildIdFeature();
bool DumpFileFeature();
+ bool DumpMetaInfoFeature();
void CollectHitFileInfo(const SampleRecord& r);
bool use_sample_freq_;
@@ -239,6 +244,7 @@ class RecordCommand : public Command {
int start_profiling_fd_;
std::string app_package_name_;
bool in_app_context_;
+ bool trace_offcpu_;
};
bool RecordCommand::Run(const std::vector<std::string>& args) {
@@ -257,7 +263,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
// root.
if (!IsRoot()) {
return RunInAppContext(app_package_name_, "record", args, workload_args.size(),
- record_filename_, !event_selection_set_.GetTracepointEvents().empty());
+ record_filename_, true);
}
}
if (event_selection_set_.empty()) {
@@ -265,6 +271,9 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
}
+ if (trace_offcpu_ && !TraceOffCpu()) {
+ return false;
+ }
if (!SetEventSelectionFlags()) {
return false;
}
@@ -586,6 +595,8 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
return false;
}
event_selection_set_.AddMonitoredThreads(tids);
+ } else if (args[i] == "--trace-offcpu") {
+ trace_offcpu_ = true;
} else if (args[i] == "--tracepoint-events") {
if (!NextArgumentOrError(args, &i)) {
return false;
@@ -654,6 +665,24 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
return true;
}
+bool RecordCommand::TraceOffCpu() {
+ if (FindEventTypeByName("sched:sched_switch") == nullptr) {
+ LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available";
+ return false;
+ }
+ for (auto& event_type : event_selection_set_.GetTracepointEvents()) {
+ if (event_type->name == "sched:sched_switch") {
+ LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event";
+ return false;
+ }
+ }
+ if (!IsDumpingRegsForTracepointEventsSupported()) {
+ LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel";
+ return false;
+ }
+ return event_selection_set_.AddEventType("sched:sched_switch");
+}
+
bool RecordCommand::SetEventSelectionFlags() {
if (use_sample_freq_) {
event_selection_set_.SetSampleFreq(sample_freq_);
@@ -1033,10 +1062,7 @@ bool RecordCommand::DumpAdditionalFeatures(
!record_file_writer_->WriteBranchStackFeature()) {
return false;
}
-
- std::unordered_map<std::string, std::string> info_map;
- info_map["simpleperf_version"] = GetSimpleperfVersion();
- if (!record_file_writer_->WriteMetaInfoFeature(info_map)) {
+ if (!DumpMetaInfoFeature()) {
return false;
}
@@ -1130,6 +1156,17 @@ bool RecordCommand::DumpFileFeature() {
return true;
}
+bool RecordCommand::DumpMetaInfoFeature() {
+ std::unordered_map<std::string, std::string> info_map;
+ info_map["simpleperf_version"] = GetSimpleperfVersion();
+ info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false";
+ info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false";
+ // By storing event types information in perf.data, the readers of perf.data have the same
+ // understanding of event types, even if they are on another machine.
+ info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents());
+ return record_file_writer_->WriteMetaInfoFeature(info_map);
+}
+
void RecordCommand::CollectHitFileInfo(const SampleRecord& r) {
const ThreadEntry* thread =
thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid);
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 8b70fcd2..d888a58a 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -431,32 +431,21 @@ TEST(record_cmd, cpu_clock_for_a_long_time) {
}
TEST(record_cmd, dump_regs_for_tracepoint_events) {
+ TEST_REQUIRE_HOST_ROOT();
// Check if the kernel can dump registers for tracepoint events.
// If not, probably a kernel patch below is missing:
// "5b09a094f2 arm64: perf: Fix callchain parse error with kernel tracepoint events"
- std::vector<std::unique_ptr<Workload>> workloads;
- CreateProcesses(1, &workloads);
- std::string pid = std::to_string(workloads[0]->GetPid());
- TemporaryFile tmpfile;
- ASSERT_TRUE(RecordCmd()->Run({"-o", tmpfile.path, "-p", pid, "-e", "sched:sched_switch",
- "-g", "--no-unwind", "--duration", "1"}));
+ ASSERT_TRUE(IsDumpingRegsForTracepointEventsSupported());
+}
- // If the kernel patch is missing, all regs dumped in sample records are zero.
+TEST(record_cmd, trace_offcpu_option) {
+ // On linux host, we need root privilege to read tracepoint events.
+ TEST_REQUIRE_HOST_ROOT();
+ TemporaryFile tmpfile;
+ ASSERT_TRUE(RunRecordCmd({"--trace-offcpu"}, tmpfile.path));
std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path);
- CHECK(reader != nullptr);
- std::unique_ptr<Record> r;
- bool regs_all_zero = true;
- while (reader->ReadRecord(r) && r && regs_all_zero) {
- if (r->type() != PERF_RECORD_SAMPLE) {
- continue;
- }
- SampleRecord* s = static_cast<SampleRecord*>(r.get());
- for (size_t i = 0; i < s->regs_user_data.reg_nr; ++i) {
- if (s->regs_user_data.regs[i] != 0u) {
- regs_all_zero = false;
- break;
- }
- }
- }
- ASSERT_FALSE(regs_all_zero);
+ ASSERT_TRUE(reader != nullptr);
+ std::unordered_map<std::string, std::string> info_map;
+ ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map));
+ ASSERT_EQ(info_map["trace_offcpu"], "true");
}
diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp
index 29cf743a..5965e635 100644
--- a/simpleperf/cmd_report.cpp
+++ b/simpleperf/cmd_report.cpp
@@ -105,10 +105,9 @@ struct SampleTree {
BUILD_COMPARE_VALUE_FUNCTION(CompareVaddrInFile, vaddr_in_file);
BUILD_DISPLAY_HEX64_FUNCTION(DisplayVaddrInFile, vaddr_in_file);
-class ReportCmdSampleTreeBuilder
- : public SampleTreeBuilder<SampleEntry, uint64_t> {
+class ReportCmdSampleTreeBuilder : public SampleTreeBuilder<SampleEntry, uint64_t> {
public:
- ReportCmdSampleTreeBuilder(SampleComparator<SampleEntry> sample_comparator,
+ ReportCmdSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator,
ThreadTree* thread_tree)
: SampleTreeBuilder(sample_comparator),
thread_tree_(thread_tree),
@@ -138,7 +137,17 @@ class ReportCmdSampleTreeBuilder
return sample_tree;
}
+ virtual void ReportCmdProcessSampleRecord(std::shared_ptr<SampleRecord>& r) {
+ return ProcessSampleRecord(*r);
+ }
+
+ virtual void ReportCmdProcessSampleRecord(const SampleRecord& r) {
+ return ProcessSampleRecord(r);
+ }
+
protected:
+ virtual uint64_t GetPeriod(const SampleRecord& r) = 0;
+
SampleEntry* CreateSample(const SampleRecord& r, bool in_kernel,
uint64_t* acc_info) override {
const ThreadEntry* thread =
@@ -148,10 +157,10 @@ class ReportCmdSampleTreeBuilder
uint64_t vaddr_in_file;
const Symbol* symbol =
thread_tree_->FindSymbol(map, r.ip_data.ip, &vaddr_in_file);
- *acc_info = r.period_data.period;
+ uint64_t period = GetPeriod(r);
+ *acc_info = period;
return InsertSample(std::unique_ptr<SampleEntry>(
- new SampleEntry(r.time_data.time, r.period_data.period, 0, 1, thread,
- map, symbol, vaddr_in_file)));
+ new SampleEntry(r.time_data.time, period, 0, 1, thread, map, symbol, vaddr_in_file)));
}
SampleEntry* CreateBranchSample(const SampleRecord& r,
@@ -253,6 +262,53 @@ class ReportCmdSampleTreeBuilder
uint64_t total_error_callchains_;
};
+// Build sample tree based on event count in each sample.
+class EventCountSampleTreeBuilder : public ReportCmdSampleTreeBuilder {
+ public:
+ EventCountSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator,
+ ThreadTree* thread_tree)
+ : ReportCmdSampleTreeBuilder(sample_comparator, thread_tree) { }
+
+ protected:
+ uint64_t GetPeriod(const SampleRecord& r) override {
+ return r.period_data.period;
+ }
+};
+
+// Build sample tree based on the time difference between current sample and next sample.
+class TimestampSampleTreeBuilder : public ReportCmdSampleTreeBuilder {
+ public:
+ TimestampSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator,
+ ThreadTree* thread_tree)
+ : ReportCmdSampleTreeBuilder(sample_comparator, thread_tree) { }
+
+ void ReportCmdProcessSampleRecord(std::shared_ptr<SampleRecord>& r) override {
+ pid_t tid = static_cast<pid_t>(r->tid_data.tid);
+ auto it = next_sample_cache_.find(tid);
+ if (it == next_sample_cache_.end()) {
+ next_sample_cache_[tid] = r;
+ } else {
+ std::shared_ptr<SampleRecord> cur = it->second;
+ it->second = r;
+ ProcessSampleRecord(*cur);
+ }
+ }
+
+ protected:
+ uint64_t GetPeriod(const SampleRecord& r) override {
+ auto it = next_sample_cache_.find(r.tid_data.tid);
+ CHECK(it != next_sample_cache_.end());
+ // Normally the samples are sorted by time, but check here for safety.
+ if (it->second->time_data.time > r.time_data.time) {
+ return it->second->time_data.time - r.time_data.time;
+ }
+ return 1u;
+ }
+
+ private:
+ std::unordered_map<pid_t, std::shared_ptr<SampleRecord>> next_sample_cache_;
+};
+
struct SampleTreeBuilderOptions {
SampleComparator<SampleEntry> comparator;
ThreadTree* thread_tree;
@@ -266,10 +322,15 @@ struct SampleTreeBuilderOptions {
bool build_callchain;
bool use_caller_as_callchain_root;
bool strict_unwind_arch_check;
+ bool trace_offcpu;
std::unique_ptr<ReportCmdSampleTreeBuilder> CreateSampleTreeBuilder() {
- std::unique_ptr<ReportCmdSampleTreeBuilder> builder(
- new ReportCmdSampleTreeBuilder(comparator, thread_tree));
+ std::unique_ptr<ReportCmdSampleTreeBuilder> builder;
+ if (trace_offcpu) {
+ builder.reset(new TimestampSampleTreeBuilder(comparator, thread_tree));
+ } else {
+ builder.reset(new EventCountSampleTreeBuilder(comparator, thread_tree));
+ }
builder->SetFilters(pid_filter, tid_filter, comm_filter, dso_filter, symbol_filter);
builder->SetBranchSampleOption(use_branch_address);
builder->SetCallChainSampleOptions(accumulate_callchain, build_callchain,
@@ -365,16 +426,20 @@ class ReportCommand : public Command {
callgraph_max_stack_(UINT32_MAX),
callgraph_percent_limit_(0),
raw_period_(false),
- brief_callgraph_(true) {}
+ brief_callgraph_(true),
+ trace_offcpu_(false),
+ sched_switch_attr_id_(0u) {}
bool Run(const std::vector<std::string>& args);
private:
bool ParseOptions(const std::vector<std::string>& args);
+ bool ReadMetaInfoFromRecordFile();
bool ReadEventAttrFromRecordFile();
bool ReadFeaturesFromRecordFile();
bool ReadSampleTreeFromRecordFile();
bool ProcessRecord(std::unique_ptr<Record> record);
+ void ProcessSampleRecordInTraceOffCpuMode(std::unique_ptr<Record> record, size_t attr_id);
bool ProcessTracingData(const std::vector<char>& data);
bool PrintReport();
void PrintReportContext(FILE* fp);
@@ -401,8 +466,12 @@ class ReportCommand : public Command {
double callgraph_percent_limit_;
bool raw_period_;
bool brief_callgraph_;
+ bool trace_offcpu_;
+ size_t sched_switch_attr_id_;
std::string report_filename_;
+ std::unordered_map<std::string, std::string> meta_info_;
+ std::unique_ptr<ScopedEventTypes> scoped_event_types_;
};
bool ReportCommand::Run(const std::vector<std::string>& args) {
@@ -416,6 +485,9 @@ bool ReportCommand::Run(const std::vector<std::string>& args) {
if (record_file_reader_ == nullptr) {
return false;
}
+ if (!ReadMetaInfoFromRecordFile()) {
+ return false;
+ }
if (!ReadEventAttrFromRecordFile()) {
return false;
}
@@ -676,8 +748,30 @@ bool ReportCommand::ParseOptions(const std::vector<std::string>& args) {
return true;
}
+bool ReportCommand::ReadMetaInfoFromRecordFile() {
+ if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) {
+ if (!record_file_reader_->ReadMetaInfoFeature(&meta_info_)) {
+ return false;
+ }
+ auto it = meta_info_.find("system_wide_collection");
+ if (it != meta_info_.end()) {
+ system_wide_collection_ = it->second == "true";
+ }
+ it = meta_info_.find("trace_offcpu");
+ if (it != meta_info_.end()) {
+ trace_offcpu_ = it->second == "true";
+ }
+ it = meta_info_.find("event_type_info");
+ if (it != meta_info_.end()) {
+ scoped_event_types_.reset(new ScopedEventTypes(it->second));
+ }
+ }
+ return true;
+}
+
bool ReportCommand::ReadEventAttrFromRecordFile() {
std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
+ LOG(ERROR) << "attrs.size() = " << attrs.size();
for (const auto& attr_with_id : attrs) {
EventAttrWithName attr;
attr.attr = *attr_with_id.attr;
@@ -698,6 +792,17 @@ bool ReportCommand::ReadEventAttrFromRecordFile() {
return false;
}
}
+ if (trace_offcpu_) {
+ size_t i;
+ for (i = 0; i < event_attrs_.size(); ++i) {
+ LOG(ERROR) << "event_attrs[" << i << "].name = " << event_attrs_[i].name;
+ if (event_attrs_[i].name == "sched:sched_switch") {
+ break;
+ }
+ }
+ CHECK_NE(i, event_attrs_.size());
+ sched_switch_attr_id_ = i;
+ }
return true;
}
@@ -716,19 +821,21 @@ bool ReportCommand::ReadFeaturesFromRecordFile() {
std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
if (!cmdline.empty()) {
record_cmdline_ = android::base::Join(cmdline, ' ');
- // TODO: the code to detect system wide collection option is fragile, remove
- // it once we can do cross unwinding.
- for (size_t i = 0; i < cmdline.size(); i++) {
- std::string& s = cmdline[i];
- if (s == "-a") {
- system_wide_collection_ = true;
- break;
- } else if (s == "--call-graph" || s == "--cpu" || s == "-e" ||
- s == "-f" || s == "-F" || s == "-j" || s == "-m" ||
- s == "-o" || s == "-p" || s == "-t") {
- i++;
- } else if (!s.empty() && s[0] != '-') {
- break;
+ if (meta_info_.find("system_wide_collection") == meta_info_.end()) {
+ // TODO: the code to detect system wide collection option is fragile, remove
+ // it once we can do cross unwinding.
+ for (size_t i = 0; i < cmdline.size(); i++) {
+ std::string& s = cmdline[i];
+ if (s == "-a") {
+ system_wide_collection_ = true;
+ break;
+ } else if (s == "--call-graph" || s == "--cpu" || s == "-e" ||
+ s == "-f" || s == "-F" || s == "-j" || s == "-m" ||
+ s == "-o" || s == "-p" || s == "-t") {
+ i++;
+ } else if (!s.empty() && s[0] != '-') {
+ break;
+ }
}
}
}
@@ -753,6 +860,7 @@ bool ReportCommand::ReadSampleTreeFromRecordFile() {
sample_tree_builder_options_.accumulate_callchain = accumulate_callchain_;
sample_tree_builder_options_.build_callchain = print_callgraph_;
sample_tree_builder_options_.use_caller_as_callchain_root = !callgraph_show_callee_;
+ sample_tree_builder_options_.trace_offcpu = trace_offcpu_;
for (size_t i = 0; i < event_attrs_.size(); ++i) {
sample_tree_builder_.push_back(sample_tree_builder_options_.CreateSampleTreeBuilder());
@@ -775,8 +883,12 @@ bool ReportCommand::ProcessRecord(std::unique_ptr<Record> record) {
thread_tree_.Update(*record);
if (record->type() == PERF_RECORD_SAMPLE) {
size_t attr_id = record_file_reader_->GetAttrIndexOfRecord(record.get());
- sample_tree_builder_[attr_id]->ProcessSampleRecord(
- *static_cast<const SampleRecord*>(record.get()));
+ if (!trace_offcpu_) {
+ sample_tree_builder_[attr_id]->ReportCmdProcessSampleRecord(
+ *static_cast<SampleRecord*>(record.get()));
+ } else {
+ ProcessSampleRecordInTraceOffCpuMode(std::move(record), attr_id);
+ }
} else if (record->type() == PERF_RECORD_TRACING_DATA) {
const auto& r = *static_cast<TracingDataRecord*>(record.get());
if (!ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size))) {
@@ -786,6 +898,24 @@ bool ReportCommand::ProcessRecord(std::unique_ptr<Record> record) {
return true;
}
+
+void ReportCommand::ProcessSampleRecordInTraceOffCpuMode(std::unique_ptr<Record> record,
+ size_t attr_id) {
+ std::shared_ptr<SampleRecord> r(static_cast<SampleRecord*>(record.release()));
+ if (attr_id == sched_switch_attr_id_) {
+ // If this sample belongs to sched_switch event, we should broadcast the offcpu info
+ // to other event types.
+ for (size_t i = 0; i < event_attrs_.size(); ++i) {
+ if (i == sched_switch_attr_id_) {
+ continue;
+ }
+ sample_tree_builder_[i]->ReportCmdProcessSampleRecord(r);
+ }
+ } else {
+ sample_tree_builder_[attr_id]->ReportCmdProcessSampleRecord(r);
+ }
+}
+
bool ReportCommand::ProcessTracingData(const std::vector<char>& data) {
Tracing tracing(data);
for (auto& attr : event_attrs_) {
@@ -810,6 +940,9 @@ bool ReportCommand::PrintReport() {
}
PrintReportContext(report_fp);
for (size_t i = 0; i < event_attrs_.size(); ++i) {
+ if (trace_offcpu_ && i == sched_switch_attr_id_) {
+ continue;
+ }
if (i != 0) {
fprintf(report_fp, "\n");
}
@@ -823,7 +956,8 @@ bool ReportCommand::PrintReport() {
sample_tree.total_error_callchains,
sample_tree.total_error_callchains * 100.0 / sample_tree.total_samples);
}
- fprintf(report_fp, "Event count: %" PRIu64 "\n\n", sample_tree.total_period);
+ const char* period_prefix = trace_offcpu_ ? "Time in ns" : "Event count";
+ fprintf(report_fp, "%s: %" PRIu64 "\n\n", period_prefix, sample_tree.total_period);
sample_tree_displayer_->DisplaySamples(report_fp, sample_tree.samples, &sample_tree);
}
fflush(report_fp);
diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp
index f1f5b1fc..f1a9eb66 100644
--- a/simpleperf/cmd_report_test.cpp
+++ b/simpleperf/cmd_report_test.cpp
@@ -471,6 +471,21 @@ TEST_F(ReportCommandTest, full_callgraph_option) {
ASSERT_EQ(content.find("skipped in brief callgraph mode"), std::string::npos);
}
+TEST_F(ReportCommandTest, report_offcpu_time) {
+ Report(PERF_DATA_WITH_TRACE_OFFCPU, {"--children"});
+ ASSERT_TRUE(success);
+ ASSERT_NE(content.find("Time in ns"), std::string::npos);
+ bool found = false;
+ for (auto& line : lines) {
+ if (line.find("SleepFunction") != std::string::npos) {
+ ASSERT_NE(line.find("46.29%"), std::string::npos);
+ found = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(found);
+}
+
#if defined(__linux__)
#include "event_selection_set.h"
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index c6edb246..26f542f8 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -56,7 +56,11 @@ std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr,
if (attr.freq) {
uint64_t max_sample_freq;
if (GetMaxSampleFrequency(&max_sample_freq) && max_sample_freq < attr.sample_freq) {
- PLOG(INFO) << "Adjust sample freq to max allowed sample freq " << max_sample_freq;
+ static bool warned = false;
+ if (!warned) {
+ warned = true;
+ PLOG(INFO) << "Adjust sample freq to max allowed sample freq " << max_sample_freq;
+ }
real_attr.sample_freq = max_sample_freq;
}
}
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 86f57b9d..5f8cf5cb 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -16,6 +16,9 @@
#include "event_selection_set.h"
+#include <atomic>
+#include <thread>
+
#include <android-base/logging.h>
#include "environment.h"
@@ -53,6 +56,53 @@ bool IsDwarfCallChainSamplingSupported() {
return IsEventAttrSupported(attr);
}
+bool IsDumpingRegsForTracepointEventsSupported() {
+ const EventType* event_type = FindEventTypeByName("sched:sched_switch");
+ if (event_type == nullptr) {
+ return false;
+ }
+ std::atomic<bool> done(false);
+ std::atomic<pid_t> thread_id(0);
+ std::thread thread([&]() {
+ thread_id = gettid();
+ while (!done) {
+ usleep(1);
+ }
+ usleep(1); // Make a sched out to generate one sample.
+ });
+ while (thread_id == 0) {
+ usleep(1);
+ }
+ perf_event_attr attr = CreateDefaultPerfEventAttr(*event_type);
+ attr.freq = 0;
+ attr.sample_period = 1;
+ std::unique_ptr<EventFd> event_fd =
+ EventFd::OpenEventFile(attr, thread_id, -1, nullptr);
+ if (event_fd == nullptr) {
+ return false;
+ }
+ if (!event_fd->CreateMappedBuffer(4, true)) {
+ return false;
+ }
+ done = true;
+ thread.join();
+
+ std::vector<char> buffer;
+ size_t buffer_pos = 0;
+ size_t size = event_fd->GetAvailableMmapData(buffer, buffer_pos);
+ std::vector<std::unique_ptr<Record>> records =
+ ReadRecordsFromBuffer(attr, buffer.data(), size);
+ for (auto& r : records) {
+ if (r->type() == PERF_RECORD_SAMPLE) {
+ auto& record = *static_cast<SampleRecord*>(r.get());
+ if (record.ip_data.ip != 0) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
bool EventSelectionSet::BuildAndCheckEventSelection(
const std::string& event_name, EventSelection* selection) {
std::unique_ptr<EventTypeAndModifier> event_type = ParseEventType(event_name);
@@ -116,6 +166,16 @@ bool EventSelectionSet::AddEventGroup(
return true;
}
+std::vector<const EventType*> EventSelectionSet::GetEvents() const {
+ std::vector<const EventType*> result;
+ for (const auto& group : groups_) {
+ for (const auto& selection : group) {
+ result.push_back(&selection.event_type_modifier.event_type);
+ }
+ }
+ return result;
+}
+
std::vector<const EventType*> EventSelectionSet::GetTracepointEvents() const {
std::vector<const EventType*> result;
for (const auto& group : groups_) {
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index 97ad7e59..f5cb53ab 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -73,6 +73,7 @@ class EventSelectionSet {
bool AddEventType(const std::string& event_name);
bool AddEventGroup(const std::vector<std::string>& event_names);
+ std::vector<const EventType*> GetEvents() const;
std::vector<const EventType*> GetTracepointEvents() const;
bool HasInplaceSampler() const;
std::vector<EventAttrWithId> GetEventAttrWithId() const;
@@ -186,5 +187,6 @@ class EventSelectionSet {
bool IsBranchSamplingSupported();
bool IsDwarfCallChainSamplingSupported();
+bool IsDumpingRegsForTracepointEventsSupported();
#endif // SIMPLE_PERF_EVENT_SELECTION_SET_H_
diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp
index bc639d12..21da17c9 100644
--- a/simpleperf/event_type.cpp
+++ b/simpleperf/event_type.cpp
@@ -51,6 +51,9 @@ bool SetTracepointEventsFilePath(const std::string& filepath) {
std::string GetTracepointEvents() {
std::string result;
for (const EventType& event : GetAllEventTypes()) {
+ if (event.type != PERF_TYPE_TRACEPOINT) {
+ continue;
+ }
if (!result.empty()) {
result.push_back('\n');
}
@@ -107,8 +110,37 @@ static std::vector<EventType> GetTracepointEventTypes() {
return result;
}
+static std::vector<EventType> event_type_array;
+
+std::string ScopedEventTypes::BuildString(const std::vector<const EventType*>& event_types) {
+ std::string result;
+ for (auto type : event_types) {
+ if (!result.empty()) {
+ result.push_back('\n');
+ }
+ result += android::base::StringPrintf("%s,%u,%" PRIu64, type->name.c_str(), type->type,
+ type->config);
+ }
+ return result;
+}
+
+ScopedEventTypes::ScopedEventTypes(const std::string& event_type_str) {
+ saved_event_types_ = std::move(event_type_array);
+ event_type_array.clear();
+ for (auto& s : android::base::Split(event_type_str, "\n")) {
+ std::string name = s.substr(0, s.find(','));
+ uint32_t type;
+ uint64_t config;
+ sscanf(s.c_str() + name.size(), ",%u,%" PRIu64, &type, &config);
+ event_type_array.emplace_back(name, type, config, "", "");
+ }
+}
+
+ScopedEventTypes::~ScopedEventTypes() {
+ event_type_array = std::move(saved_event_types_);
+}
+
const std::vector<EventType>& GetAllEventTypes() {
- static std::vector<EventType> event_type_array;
if (event_type_array.empty()) {
event_type_array.insert(event_type_array.end(), static_event_type_array.begin(),
static_event_type_array.end());
diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h
index 6ec544d8..1dd7f39f 100644
--- a/simpleperf/event_type.h
+++ b/simpleperf/event_type.h
@@ -54,6 +54,19 @@ struct EventType {
bool SetTracepointEventsFilePath(const std::string& filepath);
std::string GetTracepointEvents();
+
+// Used to temporarily change event types returned by GetAllEventTypes().
+class ScopedEventTypes {
+ public:
+ static std::string BuildString(const std::vector<const EventType*>& event_types);
+
+ ScopedEventTypes(const std::string& event_type_str);
+ ~ScopedEventTypes();
+
+ private:
+ std::vector<EventType> saved_event_types_;
+};
+
const std::vector<EventType>& GetAllEventTypes();
const EventType* FindEventTypeByName(const std::string& name);
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index 963fe006..3b9121c0 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -108,4 +108,7 @@ static const std::string INVALID_PERF_DATA = "invalid_perf.data";
// generated by recording an app.
static const std::string PERF_DATA_WITH_WRONG_IP_IN_CALLCHAIN = "wrong_ip_callchain_perf.data";
+// generated by `simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64`.
+static const std::string PERF_DATA_WITH_TRACE_OFFCPU = "perf_with_trace_offcpu.data";
+
#endif // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/runtest/Android.build.mk b/simpleperf/runtest/Android.build.mk
index 5a8f92d8..65f5fdb0 100644
--- a/simpleperf/runtest/Android.build.mk
+++ b/simpleperf/runtest/Android.build.mk
@@ -31,16 +31,16 @@ LOCAL_STRIP_MODULE := false
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk
include $(BUILD_EXECUTABLE)
-ifeq ($(HOST_OS),linux)
include $(CLEAR_VARS)
LOCAL_CLANG := true
LOCAL_CPPFLAGS := $(simpleperf_runtest_cppflags)
LOCAL_SRC_FILES := $(module_src_files)
LOCAL_SHARED_LIBRARIES := libsimpleperf_inplace_sampler
LOCAL_MODULE := $(module)
+LOCAL_MODULE_HOST_OS := linux
LOCAL_MULTILIB := both
LOCAL_MODULE_STEM_32 := $(module)32
LOCAL_MODULE_STEM_64 := $(module)64
+LOCAL_LDLIBS_linux := -lrt
LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk
-include $(BUILD_HOST_EXECUTABLE)
-endif \ No newline at end of file
+include $(BUILD_HOST_EXECUTABLE) \ No newline at end of file
diff --git a/simpleperf/runtest/Android.mk b/simpleperf/runtest/Android.mk
index 55bf3b79..ad3f5c7f 100644
--- a/simpleperf/runtest/Android.mk
+++ b/simpleperf/runtest/Android.mk
@@ -42,4 +42,8 @@ include $(LOCAL_PATH)/Android.build.mk
module := simpleperf_runtest_function_indirect_recursive
module_src_files := function_indirect_recursive.cpp
+include $(LOCAL_PATH)/Android.build.mk
+
+module := simpleperf_runtest_run_and_sleep
+module_src_files := run_and_sleep.cpp
include $(LOCAL_PATH)/Android.build.mk \ No newline at end of file
diff --git a/simpleperf/runtest/run_and_sleep.cpp b/simpleperf/runtest/run_and_sleep.cpp
new file mode 100644
index 00000000..5ed09ac8
--- /dev/null
+++ b/simpleperf/runtest/run_and_sleep.cpp
@@ -0,0 +1,46 @@
+#include <inttypes.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#define noinline __attribute__((__noinline__))
+
+static inline uint64_t GetSystemClock() {
+ timespec ts;
+ // Assume clock_gettime() doesn't fail.
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+constexpr int LOOP_COUNT = 100000000;
+uint64_t noinline RunFunction() {
+ uint64_t start_time_in_ns = GetSystemClock();
+ for (volatile int i = 0; i < LOOP_COUNT; ++i) {
+ }
+ return GetSystemClock() - start_time_in_ns;
+}
+
+uint64_t noinline SleepFunction(unsigned long long sleep_time_in_ns) {
+ uint64_t start_time_in_ns = GetSystemClock();
+ struct timespec req;
+ req.tv_sec = sleep_time_in_ns / 1000000000;
+ req.tv_nsec = sleep_time_in_ns % 1000000000;
+ nanosleep(&req, nullptr);
+ return GetSystemClock() - start_time_in_ns;
+}
+
+void noinline GlobalFunction() {
+ uint64_t total_sleep_time_in_ns = 0;
+ uint64_t total_run_time_in_ns = 0;
+ while (true) {
+ total_run_time_in_ns += RunFunction();
+ if (total_sleep_time_in_ns < total_run_time_in_ns) {
+ total_sleep_time_in_ns += SleepFunction(total_run_time_in_ns - total_sleep_time_in_ns);
+ }
+ }
+}
+
+int main() {
+ GlobalFunction();
+ return 0;
+}
diff --git a/simpleperf/runtest/runtest.conf b/simpleperf/runtest/runtest.conf
index 2fbbd985..22babf9b 100644
--- a/simpleperf/runtest/runtest.conf
+++ b/simpleperf/runtest/runtest.conf
@@ -203,4 +203,24 @@
</symbol_overhead>
</test>
+<test name="run_and_sleep">
+ <executable name="simpleperf_runtest_run_and_sleep"/>
+
+ <symbol_overhead>
+ <symbol name="RunFunction()" min="80"/>
+ </symbol_overhead>
+
+</test>
+
+<test name="run_and_sleep_trace_offcpu">
+ <executable name="simpleperf_runtest_run_and_sleep"/>
+ <disable_host/>
+ <record option="--trace-offcpu"/>
+
+ <symbol_children_overhead>
+ <symbol name="RunFunction()" min="20" max="80"/>
+ <symbol name="SleepFunction(unsigned long long)" min="20" max="80"/>
+ </symbol_children_overhead>
+</test>
+
</runtests>
diff --git a/simpleperf/runtest/runtest.py b/simpleperf/runtest/runtest.py
index a1b4520b..e4bfe16d 100644
--- a/simpleperf/runtest/runtest.py
+++ b/simpleperf/runtest/runtest.py
@@ -168,12 +168,16 @@ class Test(object):
self,
test_name,
executable_name,
+ disable_host,
+ record_options,
report_options,
symbol_overhead_requirements,
symbol_children_overhead_requirements,
symbol_relation_requirements):
self.test_name = test_name
self.executable_name = executable_name
+ self.disable_host = disable_host
+ self.record_options = record_options
self.report_options = report_options
self.symbol_overhead_requirements = symbol_overhead_requirements
self.symbol_children_overhead_requirements = (
@@ -184,6 +188,8 @@ class Test(object):
strs = []
strs.append('Test test_name=%s' % self.test_name)
strs.append('\texecutable_name=%s' % self.executable_name)
+ strs.append('\tdisable_host=%s' % self.disable_host)
+ strs.append('\trecord_options=%s' % (' '.join(self.record_options)))
strs.append('\treport_options=%s' % (' '.join(self.report_options)))
strs.append('\tsymbol_overhead_requirements:')
for req in self.symbol_overhead_requirements:
@@ -206,6 +212,8 @@ def load_config_file(config_file):
assert test.tag == 'test'
test_name = test.attrib['name']
executable_name = None
+ disable_host = False
+ record_options = []
report_options = []
symbol_overhead_requirements = []
symbol_children_overhead_requirements = []
@@ -213,6 +221,10 @@ def load_config_file(config_file):
for test_item in test:
if test_item.tag == 'executable':
executable_name = test_item.attrib['name']
+ elif test_item.tag == 'disable_host':
+ disable_host = True
+ elif test_item.tag == 'record':
+ record_options = test_item.attrib['option'].split()
elif test_item.tag == 'report':
report_options = test_item.attrib['option'].split()
elif (test_item.tag == 'symbol_overhead' or
@@ -256,6 +268,8 @@ def load_config_file(config_file):
Test(
test_name,
executable_name,
+ disable_host,
+ record_options,
report_options,
symbol_overhead_requirements,
symbol_children_overhead_requirements,
@@ -403,18 +417,20 @@ class ReportAnalyzer(object):
continue
if not line[0].isspace():
if has_callgraph:
- m = re.search(r'^([\d\.]+)%\s+([\d\.]+)%\s+(\S+).*\s+(\S+)$', line)
- children_overhead = float(m.group(1))
- overhead = float(m.group(2))
- comm = m.group(3)
- symbol_name = m.group(4)
+ items = line.split(None, 6)
+ assert len(items) == 7
+ children_overhead = float(items[0][:-1])
+ overhead = float(items[1][:-1])
+ comm = items[2]
+ symbol_name = items[6]
cur_symbol = Symbol(symbol_name, comm, overhead, children_overhead)
symbols.append(cur_symbol)
else:
- m = re.search(r'^([\d\.]+)%\s+(\S+).*\s+(\S+)$', line)
- overhead = float(m.group(1))
- comm = m.group(2)
- symbol_name = m.group(3)
+ items = line.split(None, 5)
+ assert len(items) == 6
+ overhead = float(items[0][:-1])
+ comm = items[1]
+ symbol_name = items[5]
cur_symbol = Symbol(symbol_name, comm, overhead, 0)
symbols.append(cur_symbol)
# Each report item can have different column depths.
@@ -551,7 +567,10 @@ def build_runner(target, use_callgraph, sampler):
def test_with_runner(runner, tests):
report_analyzer = ReportAnalyzer()
for test in tests:
- runner.record(test.executable_name, 'perf.data')
+ if test.disable_host and runner.target.startswith('host'):
+ print('Skip test %s on %s' % (test.test_name, runner.target))
+ continue
+ runner.record(test.executable_name, 'perf.data', additional_options = test.record_options)
if runner.sampler == 'inplace-sampler':
# TODO: fix this when inplace-sampler actually works.
runner.report('perf.data', 'perf.report')
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index c7a0ac84..67f28dac 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -53,7 +53,7 @@
template <typename EntryT, typename AccumulateInfoT>
class SampleTreeBuilder {
public:
- explicit SampleTreeBuilder(SampleComparator<EntryT> comparator)
+ explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
: sample_set_(comparator),
accumulate_callchain_(false),
sample_comparator_(comparator),
diff --git a/simpleperf/test_util.h b/simpleperf/test_util.h
index f242c896..bb061ba2 100644
--- a/simpleperf/test_util.h
+++ b/simpleperf/test_util.h
@@ -40,3 +40,9 @@ bool IsRoot();
GTEST_LOG_(INFO) << "Didn't test \"" << #TestStatement << "\" requires root privileges"; \
} \
} while (0)
+
+#if defined(__ANDROID__)
+#define TEST_REQUIRE_HOST_ROOT()
+#else
+#define TEST_REQUIRE_HOST_ROOT() if (!IsRoot()) return
+#endif
diff --git a/simpleperf/testdata/perf_with_trace_offcpu.data b/simpleperf/testdata/perf_with_trace_offcpu.data
new file mode 100644
index 00000000..6d0c5e0a
--- /dev/null
+++ b/simpleperf/testdata/perf_with_trace_offcpu.data
Binary files differ