diff options
author | Yabin Cui <yabinc@google.com> | 2017-07-19 17:54:57 -0700 |
---|---|---|
committer | Yabin Cui <yabinc@google.com> | 2017-07-21 15:36:56 -0700 |
commit | 68b8383d76a7a18bdaee13949cc66ca012cc3dee (patch) | |
tree | f6953d1e94e45b61cdfbf3805169cc223b6c7cbb | |
parent | 87fcb8c6c18fa934f34b0af5f2f812076c90fabf (diff) | |
download | extras-68b8383d76a7a18bdaee13949cc66ca012cc3dee.tar.gz |
simpleperf: support tracing offcpu time.
1. Add --trace-offcpu option in record command, to record the callstack
when a thread is scheduled off cpu.
2. Change the report command to report time spent not running on cpu
when --trace-offcpu option is used for recording.
3. Add related unittests and runtests.
Bug: http://b/37572306
Test: run simpleperf_unit_test.
Change-Id: Ia50cc39c8dde0c8fb1b1facbcb26bbd0a7ab1351
-rw-r--r-- | simpleperf/cmd_dumprecord.cpp | 1 | ||||
-rw-r--r-- | simpleperf/cmd_record.cpp | 49 | ||||
-rw-r--r-- | simpleperf/cmd_record_test.cpp | 35 | ||||
-rw-r--r-- | simpleperf/cmd_report.cpp | 184 | ||||
-rw-r--r-- | simpleperf/cmd_report_test.cpp | 15 | ||||
-rw-r--r-- | simpleperf/event_fd.cpp | 6 | ||||
-rw-r--r-- | simpleperf/event_selection_set.cpp | 60 | ||||
-rw-r--r-- | simpleperf/event_selection_set.h | 2 | ||||
-rw-r--r-- | simpleperf/event_type.cpp | 34 | ||||
-rw-r--r-- | simpleperf/event_type.h | 13 | ||||
-rw-r--r-- | simpleperf/get_test_data.h | 3 | ||||
-rw-r--r-- | simpleperf/runtest/Android.build.mk | 6 | ||||
-rw-r--r-- | simpleperf/runtest/Android.mk | 4 | ||||
-rw-r--r-- | simpleperf/runtest/run_and_sleep.cpp | 46 | ||||
-rw-r--r-- | simpleperf/runtest/runtest.conf | 20 | ||||
-rw-r--r-- | simpleperf/runtest/runtest.py | 39 | ||||
-rw-r--r-- | simpleperf/sample_tree.h | 2 | ||||
-rw-r--r-- | simpleperf/test_util.h | 6 | ||||
-rw-r--r-- | simpleperf/testdata/perf_with_trace_offcpu.data | bin | 0 -> 197611 bytes |
19 files changed, 455 insertions, 70 deletions
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp index 2ce5295d..dcd7bf47 100644 --- a/simpleperf/cmd_dumprecord.cpp +++ b/simpleperf/cmd_dumprecord.cpp @@ -151,6 +151,7 @@ static const std::string GetFeatureName(int feature) { {FEAT_PMU_MAPPINGS, "pmu_mappings"}, {FEAT_GROUP_DESC, "group_desc"}, {FEAT_FILE, "file"}, + {FEAT_META_INFO, "meta_info"}, }; auto it = feature_name_map.find(feature); if (it != feature_name_map.end()) { diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index 8bd7962f..390535c9 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <inttypes.h> #include <libgen.h> #include <signal.h> #include <sys/prctl.h> @@ -150,6 +151,7 @@ class RecordCommand : public Command { " This option is used to provide files with symbol table and\n" " debug information, which are used for unwinding and dumping symbols.\n" "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n" +"--trace-offcpu Generate samples when threads are scheduled off cpu.\n" #if 0 // Below options are only used internally and shouldn't be visible to the public. "--in-app We are already running in the app's context.\n" @@ -179,7 +181,8 @@ class RecordCommand : public Command { sample_record_count_(0), lost_record_count_(0), start_profiling_fd_(-1), - in_app_context_(false) { + in_app_context_(false), + trace_offcpu_(false) { // Stop profiling if parent exits. prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0); app_package_name_ = GetDefaultAppPackageName(); @@ -190,6 +193,7 @@ class RecordCommand : public Command { private: bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args); + bool TraceOffCpu(); bool SetEventSelectionFlags(); bool CreateAndInitRecordFile(); std::unique_ptr<RecordFileWriter> CreateRecordFile( @@ -205,6 +209,7 @@ class RecordCommand : public Command { bool DumpAdditionalFeatures(const std::vector<std::string>& args); bool DumpBuildIdFeature(); bool DumpFileFeature(); + bool DumpMetaInfoFeature(); void CollectHitFileInfo(const SampleRecord& r); bool use_sample_freq_; @@ -239,6 +244,7 @@ class RecordCommand : public Command { int start_profiling_fd_; std::string app_package_name_; bool in_app_context_; + bool trace_offcpu_; }; bool RecordCommand::Run(const std::vector<std::string>& args) { @@ -257,7 +263,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { // root. if (!IsRoot()) { return RunInAppContext(app_package_name_, "record", args, workload_args.size(), - record_filename_, !event_selection_set_.GetTracepointEvents().empty()); + record_filename_, true); } } if (event_selection_set_.empty()) { @@ -265,6 +271,9 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { return false; } } + if (trace_offcpu_ && !TraceOffCpu()) { + return false; + } if (!SetEventSelectionFlags()) { return false; } @@ -586,6 +595,8 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, return false; } event_selection_set_.AddMonitoredThreads(tids); + } else if (args[i] == "--trace-offcpu") { + trace_offcpu_ = true; } else if (args[i] == "--tracepoint-events") { if (!NextArgumentOrError(args, &i)) { return false; @@ -654,6 +665,24 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, return true; } +bool RecordCommand::TraceOffCpu() { + if (FindEventTypeByName("sched:sched_switch") == nullptr) { + LOG(ERROR) << "Can't trace off cpu because sched:sched_switch event is not available"; + return false; + } + for (auto& event_type : event_selection_set_.GetTracepointEvents()) { + if (event_type->name == "sched:sched_switch") { + LOG(ERROR) << "Trace offcpu can't be used together with sched:sched_switch event"; + return false; + } + } + if (!IsDumpingRegsForTracepointEventsSupported()) { + LOG(ERROR) << "Dumping regs for tracepoint events is not supported by the kernel"; + return false; + } + return event_selection_set_.AddEventType("sched:sched_switch"); +} + bool RecordCommand::SetEventSelectionFlags() { if (use_sample_freq_) { event_selection_set_.SetSampleFreq(sample_freq_); @@ -1033,10 +1062,7 @@ bool RecordCommand::DumpAdditionalFeatures( !record_file_writer_->WriteBranchStackFeature()) { return false; } - - std::unordered_map<std::string, std::string> info_map; - info_map["simpleperf_version"] = GetSimpleperfVersion(); - if (!record_file_writer_->WriteMetaInfoFeature(info_map)) { + if (!DumpMetaInfoFeature()) { return false; } @@ -1130,6 +1156,17 @@ bool RecordCommand::DumpFileFeature() { return true; } +bool RecordCommand::DumpMetaInfoFeature() { + std::unordered_map<std::string, std::string> info_map; + info_map["simpleperf_version"] = GetSimpleperfVersion(); + info_map["system_wide_collection"] = system_wide_collection_ ? "true" : "false"; + info_map["trace_offcpu"] = trace_offcpu_ ? "true" : "false"; + // By storing event types information in perf.data, the readers of perf.data have the same + // understanding of event types, even if they are on another machine. + info_map["event_type_info"] = ScopedEventTypes::BuildString(event_selection_set_.GetEvents()); + return record_file_writer_->WriteMetaInfoFeature(info_map); +} + void RecordCommand::CollectHitFileInfo(const SampleRecord& r) { const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp index 8b70fcd2..d888a58a 100644 --- a/simpleperf/cmd_record_test.cpp +++ b/simpleperf/cmd_record_test.cpp @@ -431,32 +431,21 @@ TEST(record_cmd, cpu_clock_for_a_long_time) { } TEST(record_cmd, dump_regs_for_tracepoint_events) { + TEST_REQUIRE_HOST_ROOT(); // Check if the kernel can dump registers for tracepoint events. // If not, probably a kernel patch below is missing: // "5b09a094f2 arm64: perf: Fix callchain parse error with kernel tracepoint events" - std::vector<std::unique_ptr<Workload>> workloads; - CreateProcesses(1, &workloads); - std::string pid = std::to_string(workloads[0]->GetPid()); - TemporaryFile tmpfile; - ASSERT_TRUE(RecordCmd()->Run({"-o", tmpfile.path, "-p", pid, "-e", "sched:sched_switch", - "-g", "--no-unwind", "--duration", "1"})); + ASSERT_TRUE(IsDumpingRegsForTracepointEventsSupported()); +} - // If the kernel patch is missing, all regs dumped in sample records are zero. +TEST(record_cmd, trace_offcpu_option) { + // On linux host, we need root privilege to read tracepoint events. + TEST_REQUIRE_HOST_ROOT(); + TemporaryFile tmpfile; + ASSERT_TRUE(RunRecordCmd({"--trace-offcpu"}, tmpfile.path)); std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile.path); - CHECK(reader != nullptr); - std::unique_ptr<Record> r; - bool regs_all_zero = true; - while (reader->ReadRecord(r) && r && regs_all_zero) { - if (r->type() != PERF_RECORD_SAMPLE) { - continue; - } - SampleRecord* s = static_cast<SampleRecord*>(r.get()); - for (size_t i = 0; i < s->regs_user_data.reg_nr; ++i) { - if (s->regs_user_data.regs[i] != 0u) { - regs_all_zero = false; - break; - } - } - } - ASSERT_FALSE(regs_all_zero); + ASSERT_TRUE(reader != nullptr); + std::unordered_map<std::string, std::string> info_map; + ASSERT_TRUE(reader->ReadMetaInfoFeature(&info_map)); + ASSERT_EQ(info_map["trace_offcpu"], "true"); } diff --git a/simpleperf/cmd_report.cpp b/simpleperf/cmd_report.cpp index 29cf743a..5965e635 100644 --- a/simpleperf/cmd_report.cpp +++ b/simpleperf/cmd_report.cpp @@ -105,10 +105,9 @@ struct SampleTree { BUILD_COMPARE_VALUE_FUNCTION(CompareVaddrInFile, vaddr_in_file); BUILD_DISPLAY_HEX64_FUNCTION(DisplayVaddrInFile, vaddr_in_file); -class ReportCmdSampleTreeBuilder - : public SampleTreeBuilder<SampleEntry, uint64_t> { +class ReportCmdSampleTreeBuilder : public SampleTreeBuilder<SampleEntry, uint64_t> { public: - ReportCmdSampleTreeBuilder(SampleComparator<SampleEntry> sample_comparator, + ReportCmdSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator, ThreadTree* thread_tree) : SampleTreeBuilder(sample_comparator), thread_tree_(thread_tree), @@ -138,7 +137,17 @@ class ReportCmdSampleTreeBuilder return sample_tree; } + virtual void ReportCmdProcessSampleRecord(std::shared_ptr<SampleRecord>& r) { + return ProcessSampleRecord(*r); + } + + virtual void ReportCmdProcessSampleRecord(const SampleRecord& r) { + return ProcessSampleRecord(r); + } + protected: + virtual uint64_t GetPeriod(const SampleRecord& r) = 0; + SampleEntry* CreateSample(const SampleRecord& r, bool in_kernel, uint64_t* acc_info) override { const ThreadEntry* thread = @@ -148,10 +157,10 @@ class ReportCmdSampleTreeBuilder uint64_t vaddr_in_file; const Symbol* symbol = thread_tree_->FindSymbol(map, r.ip_data.ip, &vaddr_in_file); - *acc_info = r.period_data.period; + uint64_t period = GetPeriod(r); + *acc_info = period; return InsertSample(std::unique_ptr<SampleEntry>( - new SampleEntry(r.time_data.time, r.period_data.period, 0, 1, thread, - map, symbol, vaddr_in_file))); + new SampleEntry(r.time_data.time, period, 0, 1, thread, map, symbol, vaddr_in_file))); } SampleEntry* CreateBranchSample(const SampleRecord& r, @@ -253,6 +262,53 @@ class ReportCmdSampleTreeBuilder uint64_t total_error_callchains_; }; +// Build sample tree based on event count in each sample. +class EventCountSampleTreeBuilder : public ReportCmdSampleTreeBuilder { + public: + EventCountSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator, + ThreadTree* thread_tree) + : ReportCmdSampleTreeBuilder(sample_comparator, thread_tree) { } + + protected: + uint64_t GetPeriod(const SampleRecord& r) override { + return r.period_data.period; + } +}; + +// Build sample tree based on the time difference between current sample and next sample. +class TimestampSampleTreeBuilder : public ReportCmdSampleTreeBuilder { + public: + TimestampSampleTreeBuilder(const SampleComparator<SampleEntry>& sample_comparator, + ThreadTree* thread_tree) + : ReportCmdSampleTreeBuilder(sample_comparator, thread_tree) { } + + void ReportCmdProcessSampleRecord(std::shared_ptr<SampleRecord>& r) override { + pid_t tid = static_cast<pid_t>(r->tid_data.tid); + auto it = next_sample_cache_.find(tid); + if (it == next_sample_cache_.end()) { + next_sample_cache_[tid] = r; + } else { + std::shared_ptr<SampleRecord> cur = it->second; + it->second = r; + ProcessSampleRecord(*cur); + } + } + + protected: + uint64_t GetPeriod(const SampleRecord& r) override { + auto it = next_sample_cache_.find(r.tid_data.tid); + CHECK(it != next_sample_cache_.end()); + // Normally the samples are sorted by time, but check here for safety. + if (it->second->time_data.time > r.time_data.time) { + return it->second->time_data.time - r.time_data.time; + } + return 1u; + } + + private: + std::unordered_map<pid_t, std::shared_ptr<SampleRecord>> next_sample_cache_; +}; + struct SampleTreeBuilderOptions { SampleComparator<SampleEntry> comparator; ThreadTree* thread_tree; @@ -266,10 +322,15 @@ struct SampleTreeBuilderOptions { bool build_callchain; bool use_caller_as_callchain_root; bool strict_unwind_arch_check; + bool trace_offcpu; std::unique_ptr<ReportCmdSampleTreeBuilder> CreateSampleTreeBuilder() { - std::unique_ptr<ReportCmdSampleTreeBuilder> builder( - new ReportCmdSampleTreeBuilder(comparator, thread_tree)); + std::unique_ptr<ReportCmdSampleTreeBuilder> builder; + if (trace_offcpu) { + builder.reset(new TimestampSampleTreeBuilder(comparator, thread_tree)); + } else { + builder.reset(new EventCountSampleTreeBuilder(comparator, thread_tree)); + } builder->SetFilters(pid_filter, tid_filter, comm_filter, dso_filter, symbol_filter); builder->SetBranchSampleOption(use_branch_address); builder->SetCallChainSampleOptions(accumulate_callchain, build_callchain, @@ -365,16 +426,20 @@ class ReportCommand : public Command { callgraph_max_stack_(UINT32_MAX), callgraph_percent_limit_(0), raw_period_(false), - brief_callgraph_(true) {} + brief_callgraph_(true), + trace_offcpu_(false), + sched_switch_attr_id_(0u) {} bool Run(const std::vector<std::string>& args); private: bool ParseOptions(const std::vector<std::string>& args); + bool ReadMetaInfoFromRecordFile(); bool ReadEventAttrFromRecordFile(); bool ReadFeaturesFromRecordFile(); bool ReadSampleTreeFromRecordFile(); bool ProcessRecord(std::unique_ptr<Record> record); + void ProcessSampleRecordInTraceOffCpuMode(std::unique_ptr<Record> record, size_t attr_id); bool ProcessTracingData(const std::vector<char>& data); bool PrintReport(); void PrintReportContext(FILE* fp); @@ -401,8 +466,12 @@ class ReportCommand : public Command { double callgraph_percent_limit_; bool raw_period_; bool brief_callgraph_; + bool trace_offcpu_; + size_t sched_switch_attr_id_; std::string report_filename_; + std::unordered_map<std::string, std::string> meta_info_; + std::unique_ptr<ScopedEventTypes> scoped_event_types_; }; bool ReportCommand::Run(const std::vector<std::string>& args) { @@ -416,6 +485,9 @@ bool ReportCommand::Run(const std::vector<std::string>& args) { if (record_file_reader_ == nullptr) { return false; } + if (!ReadMetaInfoFromRecordFile()) { + return false; + } if (!ReadEventAttrFromRecordFile()) { return false; } @@ -676,8 +748,30 @@ bool ReportCommand::ParseOptions(const std::vector<std::string>& args) { return true; } +bool ReportCommand::ReadMetaInfoFromRecordFile() { + if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) { + if (!record_file_reader_->ReadMetaInfoFeature(&meta_info_)) { + return false; + } + auto it = meta_info_.find("system_wide_collection"); + if (it != meta_info_.end()) { + system_wide_collection_ = it->second == "true"; + } + it = meta_info_.find("trace_offcpu"); + if (it != meta_info_.end()) { + trace_offcpu_ = it->second == "true"; + } + it = meta_info_.find("event_type_info"); + if (it != meta_info_.end()) { + scoped_event_types_.reset(new ScopedEventTypes(it->second)); + } + } + return true; +} + bool ReportCommand::ReadEventAttrFromRecordFile() { std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection(); + LOG(ERROR) << "attrs.size() = " << attrs.size(); for (const auto& attr_with_id : attrs) { EventAttrWithName attr; attr.attr = *attr_with_id.attr; @@ -698,6 +792,17 @@ bool ReportCommand::ReadEventAttrFromRecordFile() { return false; } } + if (trace_offcpu_) { + size_t i; + for (i = 0; i < event_attrs_.size(); ++i) { + LOG(ERROR) << "event_attrs[" << i << "].name = " << event_attrs_[i].name; + if (event_attrs_[i].name == "sched:sched_switch") { + break; + } + } + CHECK_NE(i, event_attrs_.size()); + sched_switch_attr_id_ = i; + } return true; } @@ -716,19 +821,21 @@ bool ReportCommand::ReadFeaturesFromRecordFile() { std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature(); if (!cmdline.empty()) { record_cmdline_ = android::base::Join(cmdline, ' '); - // TODO: the code to detect system wide collection option is fragile, remove - // it once we can do cross unwinding. - for (size_t i = 0; i < cmdline.size(); i++) { - std::string& s = cmdline[i]; - if (s == "-a") { - system_wide_collection_ = true; - break; - } else if (s == "--call-graph" || s == "--cpu" || s == "-e" || - s == "-f" || s == "-F" || s == "-j" || s == "-m" || - s == "-o" || s == "-p" || s == "-t") { - i++; - } else if (!s.empty() && s[0] != '-') { - break; + if (meta_info_.find("system_wide_collection") == meta_info_.end()) { + // TODO: the code to detect system wide collection option is fragile, remove + // it once we can do cross unwinding. + for (size_t i = 0; i < cmdline.size(); i++) { + std::string& s = cmdline[i]; + if (s == "-a") { + system_wide_collection_ = true; + break; + } else if (s == "--call-graph" || s == "--cpu" || s == "-e" || + s == "-f" || s == "-F" || s == "-j" || s == "-m" || + s == "-o" || s == "-p" || s == "-t") { + i++; + } else if (!s.empty() && s[0] != '-') { + break; + } } } } @@ -753,6 +860,7 @@ bool ReportCommand::ReadSampleTreeFromRecordFile() { sample_tree_builder_options_.accumulate_callchain = accumulate_callchain_; sample_tree_builder_options_.build_callchain = print_callgraph_; sample_tree_builder_options_.use_caller_as_callchain_root = !callgraph_show_callee_; + sample_tree_builder_options_.trace_offcpu = trace_offcpu_; for (size_t i = 0; i < event_attrs_.size(); ++i) { sample_tree_builder_.push_back(sample_tree_builder_options_.CreateSampleTreeBuilder()); @@ -775,8 +883,12 @@ bool ReportCommand::ProcessRecord(std::unique_ptr<Record> record) { thread_tree_.Update(*record); if (record->type() == PERF_RECORD_SAMPLE) { size_t attr_id = record_file_reader_->GetAttrIndexOfRecord(record.get()); - sample_tree_builder_[attr_id]->ProcessSampleRecord( - *static_cast<const SampleRecord*>(record.get())); + if (!trace_offcpu_) { + sample_tree_builder_[attr_id]->ReportCmdProcessSampleRecord( + *static_cast<SampleRecord*>(record.get())); + } else { + ProcessSampleRecordInTraceOffCpuMode(std::move(record), attr_id); + } } else if (record->type() == PERF_RECORD_TRACING_DATA) { const auto& r = *static_cast<TracingDataRecord*>(record.get()); if (!ProcessTracingData(std::vector<char>(r.data, r.data + r.data_size))) { @@ -786,6 +898,24 @@ bool ReportCommand::ProcessRecord(std::unique_ptr<Record> record) { return true; } + +void ReportCommand::ProcessSampleRecordInTraceOffCpuMode(std::unique_ptr<Record> record, + size_t attr_id) { + std::shared_ptr<SampleRecord> r(static_cast<SampleRecord*>(record.release())); + if (attr_id == sched_switch_attr_id_) { + // If this sample belongs to sched_switch event, we should broadcast the offcpu info + // to other event types. + for (size_t i = 0; i < event_attrs_.size(); ++i) { + if (i == sched_switch_attr_id_) { + continue; + } + sample_tree_builder_[i]->ReportCmdProcessSampleRecord(r); + } + } else { + sample_tree_builder_[attr_id]->ReportCmdProcessSampleRecord(r); + } +} + bool ReportCommand::ProcessTracingData(const std::vector<char>& data) { Tracing tracing(data); for (auto& attr : event_attrs_) { @@ -810,6 +940,9 @@ bool ReportCommand::PrintReport() { } PrintReportContext(report_fp); for (size_t i = 0; i < event_attrs_.size(); ++i) { + if (trace_offcpu_ && i == sched_switch_attr_id_) { + continue; + } if (i != 0) { fprintf(report_fp, "\n"); } @@ -823,7 +956,8 @@ bool ReportCommand::PrintReport() { sample_tree.total_error_callchains, sample_tree.total_error_callchains * 100.0 / sample_tree.total_samples); } - fprintf(report_fp, "Event count: %" PRIu64 "\n\n", sample_tree.total_period); + const char* period_prefix = trace_offcpu_ ? "Time in ns" : "Event count"; + fprintf(report_fp, "%s: %" PRIu64 "\n\n", period_prefix, sample_tree.total_period); sample_tree_displayer_->DisplaySamples(report_fp, sample_tree.samples, &sample_tree); } fflush(report_fp); diff --git a/simpleperf/cmd_report_test.cpp b/simpleperf/cmd_report_test.cpp index f1f5b1fc..f1a9eb66 100644 --- a/simpleperf/cmd_report_test.cpp +++ b/simpleperf/cmd_report_test.cpp @@ -471,6 +471,21 @@ TEST_F(ReportCommandTest, full_callgraph_option) { ASSERT_EQ(content.find("skipped in brief callgraph mode"), std::string::npos); } +TEST_F(ReportCommandTest, report_offcpu_time) { + Report(PERF_DATA_WITH_TRACE_OFFCPU, {"--children"}); + ASSERT_TRUE(success); + ASSERT_NE(content.find("Time in ns"), std::string::npos); + bool found = false; + for (auto& line : lines) { + if (line.find("SleepFunction") != std::string::npos) { + ASSERT_NE(line.find("46.29%"), std::string::npos); + found = true; + break; + } + } + ASSERT_TRUE(found); +} + #if defined(__linux__) #include "event_selection_set.h" diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp index c6edb246..26f542f8 100644 --- a/simpleperf/event_fd.cpp +++ b/simpleperf/event_fd.cpp @@ -56,7 +56,11 @@ std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr, if (attr.freq) { uint64_t max_sample_freq; if (GetMaxSampleFrequency(&max_sample_freq) && max_sample_freq < attr.sample_freq) { - PLOG(INFO) << "Adjust sample freq to max allowed sample freq " << max_sample_freq; + static bool warned = false; + if (!warned) { + warned = true; + PLOG(INFO) << "Adjust sample freq to max allowed sample freq " << max_sample_freq; + } real_attr.sample_freq = max_sample_freq; } } diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp index 86f57b9d..5f8cf5cb 100644 --- a/simpleperf/event_selection_set.cpp +++ b/simpleperf/event_selection_set.cpp @@ -16,6 +16,9 @@ #include "event_selection_set.h" +#include <atomic> +#include <thread> + #include <android-base/logging.h> #include "environment.h" @@ -53,6 +56,53 @@ bool IsDwarfCallChainSamplingSupported() { return IsEventAttrSupported(attr); } +bool IsDumpingRegsForTracepointEventsSupported() { + const EventType* event_type = FindEventTypeByName("sched:sched_switch"); + if (event_type == nullptr) { + return false; + } + std::atomic<bool> done(false); + std::atomic<pid_t> thread_id(0); + std::thread thread([&]() { + thread_id = gettid(); + while (!done) { + usleep(1); + } + usleep(1); // Make a sched out to generate one sample. + }); + while (thread_id == 0) { + usleep(1); + } + perf_event_attr attr = CreateDefaultPerfEventAttr(*event_type); + attr.freq = 0; + attr.sample_period = 1; + std::unique_ptr<EventFd> event_fd = + EventFd::OpenEventFile(attr, thread_id, -1, nullptr); + if (event_fd == nullptr) { + return false; + } + if (!event_fd->CreateMappedBuffer(4, true)) { + return false; + } + done = true; + thread.join(); + + std::vector<char> buffer; + size_t buffer_pos = 0; + size_t size = event_fd->GetAvailableMmapData(buffer, buffer_pos); + std::vector<std::unique_ptr<Record>> records = + ReadRecordsFromBuffer(attr, buffer.data(), size); + for (auto& r : records) { + if (r->type() == PERF_RECORD_SAMPLE) { + auto& record = *static_cast<SampleRecord*>(r.get()); + if (record.ip_data.ip != 0) { + return true; + } + } + } + return false; +} + bool EventSelectionSet::BuildAndCheckEventSelection( const std::string& event_name, EventSelection* selection) { std::unique_ptr<EventTypeAndModifier> event_type = ParseEventType(event_name); @@ -116,6 +166,16 @@ bool EventSelectionSet::AddEventGroup( return true; } +std::vector<const EventType*> EventSelectionSet::GetEvents() const { + std::vector<const EventType*> result; + for (const auto& group : groups_) { + for (const auto& selection : group) { + result.push_back(&selection.event_type_modifier.event_type); + } + } + return result; +} + std::vector<const EventType*> EventSelectionSet::GetTracepointEvents() const { std::vector<const EventType*> result; for (const auto& group : groups_) { diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h index 97ad7e59..f5cb53ab 100644 --- a/simpleperf/event_selection_set.h +++ b/simpleperf/event_selection_set.h @@ -73,6 +73,7 @@ class EventSelectionSet { bool AddEventType(const std::string& event_name); bool AddEventGroup(const std::vector<std::string>& event_names); + std::vector<const EventType*> GetEvents() const; std::vector<const EventType*> GetTracepointEvents() const; bool HasInplaceSampler() const; std::vector<EventAttrWithId> GetEventAttrWithId() const; @@ -186,5 +187,6 @@ class EventSelectionSet { bool IsBranchSamplingSupported(); bool IsDwarfCallChainSamplingSupported(); +bool IsDumpingRegsForTracepointEventsSupported(); #endif // SIMPLE_PERF_EVENT_SELECTION_SET_H_ diff --git a/simpleperf/event_type.cpp b/simpleperf/event_type.cpp index bc639d12..21da17c9 100644 --- a/simpleperf/event_type.cpp +++ b/simpleperf/event_type.cpp @@ -51,6 +51,9 @@ bool SetTracepointEventsFilePath(const std::string& filepath) { std::string GetTracepointEvents() { std::string result; for (const EventType& event : GetAllEventTypes()) { + if (event.type != PERF_TYPE_TRACEPOINT) { + continue; + } if (!result.empty()) { result.push_back('\n'); } @@ -107,8 +110,37 @@ static std::vector<EventType> GetTracepointEventTypes() { return result; } +static std::vector<EventType> event_type_array; + +std::string ScopedEventTypes::BuildString(const std::vector<const EventType*>& event_types) { + std::string result; + for (auto type : event_types) { + if (!result.empty()) { + result.push_back('\n'); + } + result += android::base::StringPrintf("%s,%u,%" PRIu64, type->name.c_str(), type->type, + type->config); + } + return result; +} + +ScopedEventTypes::ScopedEventTypes(const std::string& event_type_str) { + saved_event_types_ = std::move(event_type_array); + event_type_array.clear(); + for (auto& s : android::base::Split(event_type_str, "\n")) { + std::string name = s.substr(0, s.find(',')); + uint32_t type; + uint64_t config; + sscanf(s.c_str() + name.size(), ",%u,%" PRIu64, &type, &config); + event_type_array.emplace_back(name, type, config, "", ""); + } +} + +ScopedEventTypes::~ScopedEventTypes() { + event_type_array = std::move(saved_event_types_); +} + const std::vector<EventType>& GetAllEventTypes() { - static std::vector<EventType> event_type_array; if (event_type_array.empty()) { event_type_array.insert(event_type_array.end(), static_event_type_array.begin(), static_event_type_array.end()); diff --git a/simpleperf/event_type.h b/simpleperf/event_type.h index 6ec544d8..1dd7f39f 100644 --- a/simpleperf/event_type.h +++ b/simpleperf/event_type.h @@ -54,6 +54,19 @@ struct EventType { bool SetTracepointEventsFilePath(const std::string& filepath); std::string GetTracepointEvents(); + +// Used to temporarily change event types returned by GetAllEventTypes(). +class ScopedEventTypes { + public: + static std::string BuildString(const std::vector<const EventType*>& event_types); + + ScopedEventTypes(const std::string& event_type_str); + ~ScopedEventTypes(); + + private: + std::vector<EventType> saved_event_types_; +}; + const std::vector<EventType>& GetAllEventTypes(); const EventType* FindEventTypeByName(const std::string& name); diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h index 963fe006..3b9121c0 100644 --- a/simpleperf/get_test_data.h +++ b/simpleperf/get_test_data.h @@ -108,4 +108,7 @@ static const std::string INVALID_PERF_DATA = "invalid_perf.data"; // generated by recording an app. static const std::string PERF_DATA_WITH_WRONG_IP_IN_CALLCHAIN = "wrong_ip_callchain_perf.data"; +// generated by `simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64`. +static const std::string PERF_DATA_WITH_TRACE_OFFCPU = "perf_with_trace_offcpu.data"; + #endif // SIMPLE_PERF_GET_TEST_DATA_H_ diff --git a/simpleperf/runtest/Android.build.mk b/simpleperf/runtest/Android.build.mk index 5a8f92d8..65f5fdb0 100644 --- a/simpleperf/runtest/Android.build.mk +++ b/simpleperf/runtest/Android.build.mk @@ -31,16 +31,16 @@ LOCAL_STRIP_MODULE := false LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk include $(BUILD_EXECUTABLE) -ifeq ($(HOST_OS),linux) include $(CLEAR_VARS) LOCAL_CLANG := true LOCAL_CPPFLAGS := $(simpleperf_runtest_cppflags) LOCAL_SRC_FILES := $(module_src_files) LOCAL_SHARED_LIBRARIES := libsimpleperf_inplace_sampler LOCAL_MODULE := $(module) +LOCAL_MODULE_HOST_OS := linux LOCAL_MULTILIB := both LOCAL_MODULE_STEM_32 := $(module)32 LOCAL_MODULE_STEM_64 := $(module)64 +LOCAL_LDLIBS_linux := -lrt LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/Android.build.mk -include $(BUILD_HOST_EXECUTABLE) -endif
\ No newline at end of file +include $(BUILD_HOST_EXECUTABLE)
\ No newline at end of file diff --git a/simpleperf/runtest/Android.mk b/simpleperf/runtest/Android.mk index 55bf3b79..ad3f5c7f 100644 --- a/simpleperf/runtest/Android.mk +++ b/simpleperf/runtest/Android.mk @@ -42,4 +42,8 @@ include $(LOCAL_PATH)/Android.build.mk module := simpleperf_runtest_function_indirect_recursive module_src_files := function_indirect_recursive.cpp +include $(LOCAL_PATH)/Android.build.mk + +module := simpleperf_runtest_run_and_sleep +module_src_files := run_and_sleep.cpp include $(LOCAL_PATH)/Android.build.mk
\ No newline at end of file diff --git a/simpleperf/runtest/run_and_sleep.cpp b/simpleperf/runtest/run_and_sleep.cpp new file mode 100644 index 00000000..5ed09ac8 --- /dev/null +++ b/simpleperf/runtest/run_and_sleep.cpp @@ -0,0 +1,46 @@ +#include <inttypes.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#define noinline __attribute__((__noinline__)) + +static inline uint64_t GetSystemClock() { + timespec ts; + // Assume clock_gettime() doesn't fail. + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +constexpr int LOOP_COUNT = 100000000; +uint64_t noinline RunFunction() { + uint64_t start_time_in_ns = GetSystemClock(); + for (volatile int i = 0; i < LOOP_COUNT; ++i) { + } + return GetSystemClock() - start_time_in_ns; +} + +uint64_t noinline SleepFunction(unsigned long long sleep_time_in_ns) { + uint64_t start_time_in_ns = GetSystemClock(); + struct timespec req; + req.tv_sec = sleep_time_in_ns / 1000000000; + req.tv_nsec = sleep_time_in_ns % 1000000000; + nanosleep(&req, nullptr); + return GetSystemClock() - start_time_in_ns; +} + +void noinline GlobalFunction() { + uint64_t total_sleep_time_in_ns = 0; + uint64_t total_run_time_in_ns = 0; + while (true) { + total_run_time_in_ns += RunFunction(); + if (total_sleep_time_in_ns < total_run_time_in_ns) { + total_sleep_time_in_ns += SleepFunction(total_run_time_in_ns - total_sleep_time_in_ns); + } + } +} + +int main() { + GlobalFunction(); + return 0; +} diff --git a/simpleperf/runtest/runtest.conf b/simpleperf/runtest/runtest.conf index 2fbbd985..22babf9b 100644 --- a/simpleperf/runtest/runtest.conf +++ b/simpleperf/runtest/runtest.conf @@ -203,4 +203,24 @@ </symbol_overhead> </test> +<test name="run_and_sleep"> + <executable name="simpleperf_runtest_run_and_sleep"/> + + <symbol_overhead> + <symbol name="RunFunction()" min="80"/> + </symbol_overhead> + +</test> + +<test name="run_and_sleep_trace_offcpu"> + <executable name="simpleperf_runtest_run_and_sleep"/> + <disable_host/> + <record option="--trace-offcpu"/> + + <symbol_children_overhead> + <symbol name="RunFunction()" min="20" max="80"/> + <symbol name="SleepFunction(unsigned long long)" min="20" max="80"/> + </symbol_children_overhead> +</test> + </runtests> diff --git a/simpleperf/runtest/runtest.py b/simpleperf/runtest/runtest.py index a1b4520b..e4bfe16d 100644 --- a/simpleperf/runtest/runtest.py +++ b/simpleperf/runtest/runtest.py @@ -168,12 +168,16 @@ class Test(object): self, test_name, executable_name, + disable_host, + record_options, report_options, symbol_overhead_requirements, symbol_children_overhead_requirements, symbol_relation_requirements): self.test_name = test_name self.executable_name = executable_name + self.disable_host = disable_host + self.record_options = record_options self.report_options = report_options self.symbol_overhead_requirements = symbol_overhead_requirements self.symbol_children_overhead_requirements = ( @@ -184,6 +188,8 @@ class Test(object): strs = [] strs.append('Test test_name=%s' % self.test_name) strs.append('\texecutable_name=%s' % self.executable_name) + strs.append('\tdisable_host=%s' % self.disable_host) + strs.append('\trecord_options=%s' % (' '.join(self.record_options))) strs.append('\treport_options=%s' % (' '.join(self.report_options))) strs.append('\tsymbol_overhead_requirements:') for req in self.symbol_overhead_requirements: @@ -206,6 +212,8 @@ def load_config_file(config_file): assert test.tag == 'test' test_name = test.attrib['name'] executable_name = None + disable_host = False + record_options = [] report_options = [] symbol_overhead_requirements = [] symbol_children_overhead_requirements = [] @@ -213,6 +221,10 @@ def load_config_file(config_file): for test_item in test: if test_item.tag == 'executable': executable_name = test_item.attrib['name'] + elif test_item.tag == 'disable_host': + disable_host = True + elif test_item.tag == 'record': + record_options = test_item.attrib['option'].split() elif test_item.tag == 'report': report_options = test_item.attrib['option'].split() elif (test_item.tag == 'symbol_overhead' or @@ -256,6 +268,8 @@ def load_config_file(config_file): Test( test_name, executable_name, + disable_host, + record_options, report_options, symbol_overhead_requirements, symbol_children_overhead_requirements, @@ -403,18 +417,20 @@ class ReportAnalyzer(object): continue if not line[0].isspace(): if has_callgraph: - m = re.search(r'^([\d\.]+)%\s+([\d\.]+)%\s+(\S+).*\s+(\S+)$', line) - children_overhead = float(m.group(1)) - overhead = float(m.group(2)) - comm = m.group(3) - symbol_name = m.group(4) + items = line.split(None, 6) + assert len(items) == 7 + children_overhead = float(items[0][:-1]) + overhead = float(items[1][:-1]) + comm = items[2] + symbol_name = items[6] cur_symbol = Symbol(symbol_name, comm, overhead, children_overhead) symbols.append(cur_symbol) else: - m = re.search(r'^([\d\.]+)%\s+(\S+).*\s+(\S+)$', line) - overhead = float(m.group(1)) - comm = m.group(2) - symbol_name = m.group(3) + items = line.split(None, 5) + assert len(items) == 6 + overhead = float(items[0][:-1]) + comm = items[1] + symbol_name = items[5] cur_symbol = Symbol(symbol_name, comm, overhead, 0) symbols.append(cur_symbol) # Each report item can have different column depths. @@ -551,7 +567,10 @@ def build_runner(target, use_callgraph, sampler): def test_with_runner(runner, tests): report_analyzer = ReportAnalyzer() for test in tests: - runner.record(test.executable_name, 'perf.data') + if test.disable_host and runner.target.startswith('host'): + print('Skip test %s on %s' % (test.test_name, runner.target)) + continue + runner.record(test.executable_name, 'perf.data', additional_options = test.record_options) if runner.sampler == 'inplace-sampler': # TODO: fix this when inplace-sampler actually works. runner.report('perf.data', 'perf.report') diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h index c7a0ac84..67f28dac 100644 --- a/simpleperf/sample_tree.h +++ b/simpleperf/sample_tree.h @@ -53,7 +53,7 @@ template <typename EntryT, typename AccumulateInfoT> class SampleTreeBuilder { public: - explicit SampleTreeBuilder(SampleComparator<EntryT> comparator) + explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator) : sample_set_(comparator), accumulate_callchain_(false), sample_comparator_(comparator), diff --git a/simpleperf/test_util.h b/simpleperf/test_util.h index f242c896..bb061ba2 100644 --- a/simpleperf/test_util.h +++ b/simpleperf/test_util.h @@ -40,3 +40,9 @@ bool IsRoot(); GTEST_LOG_(INFO) << "Didn't test \"" << #TestStatement << "\" requires root privileges"; \ } \ } while (0) + +#if defined(__ANDROID__) +#define TEST_REQUIRE_HOST_ROOT() +#else +#define TEST_REQUIRE_HOST_ROOT() if (!IsRoot()) return +#endif diff --git a/simpleperf/testdata/perf_with_trace_offcpu.data b/simpleperf/testdata/perf_with_trace_offcpu.data Binary files differnew file mode 100644 index 00000000..6d0c5e0a --- /dev/null +++ b/simpleperf/testdata/perf_with_trace_offcpu.data |