diff options
Diffstat (limited to 'simpleperf')
-rw-r--r-- | simpleperf/cmd_dumprecord.cpp | 71 | ||||
-rw-r--r-- | simpleperf/cmd_dumprecord_test.cpp | 4 | ||||
-rw-r--r-- | simpleperf/cmd_record.cpp | 195 | ||||
-rw-r--r-- | simpleperf/cmd_record_test.cpp | 5 | ||||
-rw-r--r-- | simpleperf/dwarf_unwind.cpp | 30 | ||||
-rw-r--r-- | simpleperf/dwarf_unwind.h | 5 | ||||
-rw-r--r-- | simpleperf/get_test_data.h | 3 | ||||
-rw-r--r-- | simpleperf/nonlinux_support/nonlinux_support.cpp | 6 | ||||
-rw-r--r-- | simpleperf/perf_regs.cpp | 19 | ||||
-rw-r--r-- | simpleperf/perf_regs.h | 1 | ||||
-rw-r--r-- | simpleperf/record.cpp | 155 | ||||
-rw-r--r-- | simpleperf/record.h | 27 | ||||
-rw-r--r-- | simpleperf/sample_tree.h | 11 | ||||
-rw-r--r-- | simpleperf/testdata/perf_with_callchain_record.data | bin | 0 -> 20743 bytes |
14 files changed, 468 insertions, 64 deletions
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp index 919b62a4..97bd7132 100644 --- a/simpleperf/cmd_dumprecord.cpp +++ b/simpleperf/cmd_dumprecord.cpp @@ -26,6 +26,7 @@ #include "command.h" #include "event_attr.h" +#include "event_type.h" #include "perf_regs.h" #include "record.h" #include "record_file.h" @@ -48,7 +49,7 @@ class DumpRecordCommand : public Command { bool ParseOptions(const std::vector<std::string>& args); void DumpFileHeader(); void DumpAttrSection(); - void DumpDataSection(); + bool DumpDataSection(); bool DumpFeatureSection(); std::string record_filename_; @@ -72,14 +73,23 @@ bool DumpRecordCommand::Run(const std::vector<std::string>& args) { } } ScopedCurrentArch scoped_arch(record_file_arch_); + std::unique_ptr<ScopedEventTypes> scoped_event_types; + if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) { + std::unordered_map<std::string, std::string> meta_info; + if (!record_file_reader_->ReadMetaInfoFeature(&meta_info)) { + return false; + } + auto it = meta_info.find("event_type_info"); + if (it != meta_info.end()) { + scoped_event_types.reset(new ScopedEventTypes(it->second)); + } + } DumpFileHeader(); DumpAttrSection(); - DumpDataSection(); - if (!DumpFeatureSection()) { + if (!DumpDataSection()) { return false; } - - return true; + return DumpFeatureSection(); } bool DumpRecordCommand::ParseOptions(const std::vector<std::string>& args) { @@ -134,7 +144,6 @@ void DumpRecordCommand::DumpFileHeader() { } } - void DumpRecordCommand::DumpAttrSection() { std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection(); for (size_t i = 0; i < attrs.size(); ++i) { @@ -151,11 +160,53 @@ void DumpRecordCommand::DumpAttrSection() { } } -void DumpRecordCommand::DumpDataSection() { - record_file_reader_->ReadDataSection([](std::unique_ptr<Record> record) { - record->Dump(); +bool DumpRecordCommand::DumpDataSection() { + ThreadTree thread_tree; + thread_tree.ShowIpForUnknownSymbol(); + record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree); + + auto get_symbol_function = [&](uint32_t pid, uint32_t tid, uint64_t ip, std::string& dso_name, + std::string& symbol_name, uint64_t& vaddr_in_file) { + ThreadEntry* thread = thread_tree.FindThreadOrNew(pid, tid); + const MapEntry* map = thread_tree.FindMap(thread, ip); + Dso* dso; + const Symbol* symbol = thread_tree.FindSymbol(map, ip, &vaddr_in_file, &dso); + dso_name = dso->Path(); + symbol_name = symbol->DemangledName(); + }; + + auto record_callback = [&](std::unique_ptr<Record> r) { + r->Dump(); + thread_tree.Update(*r); + if (r->type() == PERF_RECORD_SAMPLE) { + SampleRecord& sr = *static_cast<SampleRecord*>(r.get()); + if (sr.sample_type & PERF_SAMPLE_CALLCHAIN) { + PrintIndented(1, "callchain:\n"); + for (size_t i = 0; i < sr.callchain_data.ip_nr; ++i) { + std::string dso_name; + std::string symbol_name; + uint64_t vaddr_in_file; + get_symbol_function(sr.tid_data.pid, sr.tid_data.tid, sr.callchain_data.ips[i], + dso_name, symbol_name, vaddr_in_file); + PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(), + vaddr_in_file); + } + } + } else if (r->type() == SIMPLE_PERF_RECORD_CALLCHAIN) { + CallChainRecord& cr = *static_cast<CallChainRecord*>(r.get()); + PrintIndented(1, "callchain:\n"); + for (size_t i = 0; i < cr.ip_nr; ++i) { + std::string dso_name; + std::string symbol_name; + uint64_t vaddr_in_file; + get_symbol_function(cr.pid, cr.tid, cr.ips[i], dso_name, symbol_name, vaddr_in_file); + PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(), + vaddr_in_file); + } + } return true; - }, false); + }; + return record_file_reader_->ReadDataSection(record_callback, false); } bool DumpRecordCommand::DumpFeatureSection() { diff --git a/simpleperf/cmd_dumprecord_test.cpp b/simpleperf/cmd_dumprecord_test.cpp index b61942b5..ae959210 100644 --- a/simpleperf/cmd_dumprecord_test.cpp +++ b/simpleperf/cmd_dumprecord_test.cpp @@ -30,3 +30,7 @@ TEST(cmd_dump, record_file_option) { TEST(cmd_dump, dump_data_generated_by_linux_perf) { ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_GENERATED_BY_LINUX_PERF)})); } + +TEST(cmd_dump, dump_callchain_records) { + ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_WITH_CALLCHAIN_RECORD)})); +} diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index 15ba8f2d..41ee2ece 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -36,6 +36,7 @@ #include <android-base/properties.h> #endif +#include "CallChainJoiner.h" #include "command.h" #include "dwarf_unwind.h" #include "environment.h" @@ -52,6 +53,8 @@ #include "utils.h" #include "workload.h" +using namespace simpleperf; + static std::string default_measured_event_type = "cpu-cycles"; static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = { @@ -79,6 +82,9 @@ constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528; // successfully, the buffer size = 1024 * 4K (page size) = 4M. constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024; +// Cache size used by CallChainJoiner to cache call chains in memory. +constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024; + class RecordCommand : public Command { public: RecordCommand() @@ -164,6 +170,13 @@ class RecordCommand : public Command { "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n" " will be unwound by default. Use this option to disable the\n" " unwinding of the user's stack.\n" +"--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n" +" callchain joiner is used to break the 64k stack limit\n" +" and build more complete call graphs. However, the built\n" +" call graphs may not be correct in all cases.\n" +"--callchain-joiner-min-matching-nodes count\n" +" When callchain joiner is used, set the matched nodes needed to join\n" +" callchains. The count should be >= 1. By default it is 1.\n" "-o record_file_name Set record file name, default is perf.data.\n" "--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n" " will be unwound while recording by default. But it may lose\n" @@ -204,7 +217,9 @@ class RecordCommand : public Command { start_profiling_fd_(-1), in_app_context_(false), trace_offcpu_(false), - exclude_kernel_callchain_(false) { + exclude_kernel_callchain_(false), + allow_callchain_joiner_(true), + callchain_joiner_min_matching_nodes_(1u) { // If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes // sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing // to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to @@ -218,6 +233,9 @@ class RecordCommand : public Command { private: bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args); + bool PrepareRecording(Workload* workload); + bool DoRecording(Workload* workload); + bool PostProcessRecording(const std::vector<std::string>& args); bool TraceOffCpu(); bool SetEventSelectionFlags(); bool CreateAndInitRecordFile(); @@ -231,6 +249,7 @@ class RecordCommand : public Command { void UpdateRecordForEmbeddedElfPath(Record* record); bool UnwindRecord(SampleRecord& r); bool PostUnwind(const std::vector<std::string>& args); + bool JoinCallChains(); bool DumpAdditionalFeatures(const std::vector<std::string>& args); bool DumpBuildIdFeature(); bool DumpFileFeature(); @@ -268,15 +287,20 @@ class RecordCommand : public Command { bool in_app_context_; bool trace_offcpu_; bool exclude_kernel_callchain_; + + // For CallChainJoiner + bool allow_callchain_joiner_; + size_t callchain_joiner_min_matching_nodes_; + std::unique_ptr<CallChainJoiner> callchain_joiner_; }; bool RecordCommand::Run(const std::vector<std::string>& args) { + ScopedCurrentArch scoped_arch(GetMachineArch()); if (!CheckPerfEventLimit()) { return false; } AllowMoreOpenedFiles(); - // 1. Parse options, and use default measured event type if not given. std::vector<std::string> workload_args; if (!ParseOptions(args, &workload_args)) { return false; @@ -290,6 +314,30 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { record_filename_, true); } } + std::unique_ptr<Workload> workload; + if (!workload_args.empty()) { + workload = Workload::CreateWorkload(workload_args); + if (workload == nullptr) { + return false; + } + } + if (!PrepareRecording(workload.get())) { + return false; + } + if (!DoRecording(workload.get())) { + return false; + } + return PostProcessRecording(args); +} + +bool RecordCommand::PrepareRecording(Workload* workload) { + // 1. Prepare in other modules. + if (!InitPerfClock()) { + return false; + } + PrepareVdsoFile(); + + // 2. Add default event type. if (event_selection_set_.empty()) { size_t group_id; if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) { @@ -299,6 +347,8 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { event_selection_set_.SetSampleSpeed(group_id, *sample_speed_); } } + + // 3. Process options before opening perf event files. exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel(); if (trace_offcpu_ && !TraceOffCpu()) { return false; @@ -306,22 +356,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { if (!SetEventSelectionFlags()) { return false; } - - // 2. Do some environment preparation. - ScopedCurrentArch scoped_arch(GetMachineArch()); - if (!InitPerfClock()) { - return false; + if (unwind_dwarf_callchain_ && !post_unwind_ && allow_callchain_joiner_) { + bool keep_original_callchains = WOULD_LOG(DEBUG); + callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE, + callchain_joiner_min_matching_nodes_, + keep_original_callchains)); } - PrepareVdsoFile(); - // 3. Create workload. - std::unique_ptr<Workload> workload; - if (!workload_args.empty()) { - workload = Workload::CreateWorkload(workload_args); - if (workload == nullptr) { - return false; - } - } + // 4. Add monitored targets. bool need_to_check_targets = false; if (system_wide_collection_) { event_selection_set_.AddMonitoredThreads({-1}); @@ -350,7 +392,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { need_to_check_targets = true; } - // 4. Open perf_event_files, create mapped buffers for perf_event_files. + // 5. Open perf event files and create mapped buffers. if (!event_selection_set_.OpenEventFiles(cpus_)) { return false; } @@ -359,12 +401,12 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { return false; } - // 5. Create perf.data. + // 6. Create perf.data. if (!CreateAndInitRecordFile()) { return false; } - // 6. Add read/signal/periodic Events. + // 7. Add read/signal/periodic Events. auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1); if (!event_selection_set_.PrepareToReadMmapEventData(callback)) { @@ -378,21 +420,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { } IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, - [&]() { return loop->ExitLoop(); })) { + [loop]() { return loop->ExitLoop(); })) { return false; } if (duration_in_sec_ != 0) { if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), - [&]() { return loop->ExitLoop(); })) { + [loop]() { return loop->ExitLoop(); })) { return false; } } + return true; +} - // 7. Write records in mapped buffers of perf_event_files to output file while - // workload is running. +bool RecordCommand::DoRecording(Workload* workload) { + // Write records in mapped buffers of perf_event_files to output file while workload is running. start_sampling_time_in_ns_ = GetPerfClock(); - LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ - << " ns"; + LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns"; if (workload != nullptr && !workload->IsStarted() && !workload->Start()) { return false; } @@ -402,14 +445,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { } close(start_profiling_fd_); } - if (!loop->RunLoop()) { + if (!event_selection_set_.GetIOEventLoop()->RunLoop()) { return false; } if (!event_selection_set_.FinishReadMmapEventData()) { return false; } + return true; +} + +bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) { + // 1. Optionally join Callchains. + if (callchain_joiner_) { + JoinCallChains(); + } - // 8. Dump additional features, and close record file. + // 2. Dump additional features, and close record file. if (!DumpAdditionalFeatures(args)) { return false; } @@ -417,14 +468,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { return false; } - // 9. Unwind dwarf callchain. + // 3. Post unwind dwarf callchain. if (post_unwind_) { if (!PostUnwind(args)) { return false; } } - // 10. Show brief record result. + // 4. Show brief record result. LOG(INFO) << "Samples recorded: " << sample_record_count_ << ". Samples lost: " << lost_record_count_ << "."; if (sample_record_count_ + lost_record_count_ != 0) { @@ -438,6 +489,9 @@ bool RecordCommand::Run(const std::vector<std::string>& args) { << "or increasing sample period(-c)."; } } + if (callchain_joiner_) { + callchain_joiner_->DumpStat(); + } return true; } @@ -611,6 +665,17 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, child_inherit_ = false; } else if (args[i] == "--no-unwind") { unwind_dwarf_callchain_ = false; + } else if (args[i] == "--no-callchain-joiner") { + allow_callchain_joiner_ = false; + } else if (args[i] == "--callchain-joiner-min-matching-nodes") { + if (!NextArgumentOrError(args, &i)) { + return false; + } + if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) || + callchain_joiner_min_matching_nodes_ < 1u) { + LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option"; + return false; + } } else if (args[i] == "-o") { if (!NextArgumentOrError(args, &i)) { return false; @@ -1008,11 +1073,17 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) { // Normally do strict arch check when unwinding stack. But allow unwinding // 32-bit processes on 64-bit devices for system wide profiling. bool strict_arch_check = !system_wide_collection_; - std::vector<uint64_t> unwind_ips = - UnwindCallChain(r.regs_user_data.abi, *thread, regs, - r.stack_user_data.data, - r.GetValidStackSize(), strict_arch_check); - r.ReplaceRegAndStackWithCallChain(unwind_ips); + std::vector<uint64_t> ips; + std::vector<uint64_t> sps; + if (!UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data, + r.GetValidStackSize(), strict_arch_check, &ips, &sps)) { + return false; + } + r.ReplaceRegAndStackWithCallChain(ips); + if (callchain_joiner_) { + return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid, + CallChainJoiner::ORIGINAL_OFFLINE, ips, sps); + } } return true; } @@ -1062,6 +1133,62 @@ bool RecordCommand::PostUnwind(const std::vector<std::string>& args) { return true; } +bool RecordCommand::JoinCallChains() { + // 1. Prepare joined callchains. + if (!callchain_joiner_->JoinCallChains()) { + return false; + } + // 2. Move records from record_filename_ to a temporary file. + if (!record_file_writer_->Close()) { + return false; + } + record_file_writer_.reset(); + std::unique_ptr<TemporaryFile> tmpfile = CreateTempFileUsedInRecording(); + if (!Workload::RunCmd({"mv", record_filename_, tmpfile->path})) { + return false; + } + + // 3. Read records from the temporary file, and write record with joined call chains back + // to record_filename_. + std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile->path); + record_file_writer_ = CreateRecordFile(record_filename_); + if (!reader || !record_file_writer_) { + return false; + } + bool store_callchains = WOULD_LOG(DEBUG); + + auto record_callback = [&](std::unique_ptr<Record> r) { + if (r->type() != PERF_RECORD_SAMPLE) { + return record_file_writer_->WriteRecord(*r); + } + SampleRecord& sr = *static_cast<SampleRecord*>(r.get()); + if (!sr.HasUserCallChain()) { + return record_file_writer_->WriteRecord(sr); + } + pid_t pid; + pid_t tid; + CallChainJoiner::ChainType type; + std::vector<uint64_t> ips; + std::vector<uint64_t> sps; + do { + if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) { + return false; + } + if (store_callchains) { + CallChainRecord record(pid, tid, type, sr.Timestamp(), ips, sps); + if (!record_file_writer_->WriteRecord(record)) { + return false; + } + } + } while (type != CallChainJoiner::JOINED_OFFLINE); + CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid)); + CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid)); + sr.UpdateUserCallChain(ips); + return record_file_writer_->WriteRecord(sr); + }; + return reader->ReadDataSection(record_callback, false); +} + bool RecordCommand::DumpAdditionalFeatures( const std::vector<std::string>& args) { // Read data section of perf.data to collect hit file information. diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp index 3ccf2dc7..660e679b 100644 --- a/simpleperf/cmd_record_test.cpp +++ b/simpleperf/cmd_record_test.cpp @@ -559,3 +559,8 @@ TEST(record_cmd, generate_samples_by_hw_counters) { ASSERT_TRUE(has_sample); } } + +TEST(record_cmd, callchain_joiner_options) { + ASSERT_TRUE(RunRecordCmd({"--no-callchain-joiner"})); + ASSERT_TRUE(RunRecordCmd({"--callchain-joiner-min-matching-nodes", "2"})); +} diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp index 2e0a2985..623640cc 100644 --- a/simpleperf/dwarf_unwind.cpp +++ b/simpleperf/dwarf_unwind.cpp @@ -94,22 +94,32 @@ static ucontext_t BuildUContextFromRegs(const RegSet& regs __attribute__((unused return ucontext; } -std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, - const RegSet& regs, const char* stack, - size_t stack_size, bool strict_arch_check) { +bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs, const char* stack, + size_t stack_size, bool strict_arch_check, + std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) { std::vector<uint64_t> result; ArchType arch = (abi != PERF_SAMPLE_REGS_ABI_32) ? ScopedCurrentArch::GetCurrentArch() : ScopedCurrentArch::GetCurrentArch32(); if (!IsArchTheSame(arch, GetBuildArch(), strict_arch_check)) { - LOG(FATAL) << "simpleperf is built in arch " << GetArchString(GetBuildArch()) - << ", and can't do stack unwinding for arch " << GetArchString(arch); - return result; + LOG(ERROR) << "simpleperf is built in arch " << GetArchString(GetBuildArch()) + << ", and can't do stack unwinding for arch " << GetArchString(arch); + return false; } uint64_t sp_reg_value; if (!GetSpRegValue(regs, arch, &sp_reg_value)) { LOG(ERROR) << "can't get sp reg value"; - return result; + return false; + } + if (arch != GetBuildArch()) { + uint64_t ip_reg_value; + if (!GetIpRegValue(regs, arch, &ip_reg_value)) { + LOG(ERROR) << "can't get ip reg value"; + return false; + } + ips->push_back(ip_reg_value); + sps->push_back(sp_reg_value); + return true; } uint64_t stack_addr = sp_reg_value; @@ -121,6 +131,7 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, bt_map.end = map->start_addr + map->len; bt_map.offset = map->pgoff; bt_map.name = map->dso->GetDebugFilePath(); + bt_map.flags = PROT_READ | PROT_EXEC; } std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(thread.pid, bt_maps)); @@ -138,8 +149,9 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, if (it->pc == 0) { break; } - result.push_back(it->pc); + ips->push_back(it->pc); + sps->push_back(it->sp); } } - return result; + return !ips->empty(); } diff --git a/simpleperf/dwarf_unwind.h b/simpleperf/dwarf_unwind.h index e6fd8d35..40cc6ba8 100644 --- a/simpleperf/dwarf_unwind.h +++ b/simpleperf/dwarf_unwind.h @@ -27,7 +27,8 @@ struct ThreadEntry; using ThreadEntry = simpleperf::ThreadEntry; -std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs, - const char* stack, size_t stack_size, bool strict_arch_check); +bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs, + const char* stack, size_t stack_size, bool strict_arch_check, + std::vector<uint64_t>* ips, std::vector<uint64_t>* sps); #endif // SIMPLE_PERF_DWARF_UNWIND_H_ diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h index 3b9121c0..44dcd2c9 100644 --- a/simpleperf/get_test_data.h +++ b/simpleperf/get_test_data.h @@ -111,4 +111,7 @@ static const std::string PERF_DATA_WITH_WRONG_IP_IN_CALLCHAIN = "wrong_ip_callch // generated by `simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64`. static const std::string PERF_DATA_WITH_TRACE_OFFCPU = "perf_with_trace_offcpu.data"; +// generated by `simpleperf record -g --log debug sleep 1`. +static const std::string PERF_DATA_WITH_CALLCHAIN_RECORD = "perf_with_callchain_record.data"; + #endif // SIMPLE_PERF_GET_TEST_DATA_H_ diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp index 8c245f15..648b63a9 100644 --- a/simpleperf/nonlinux_support/nonlinux_support.cpp +++ b/simpleperf/nonlinux_support/nonlinux_support.cpp @@ -20,9 +20,9 @@ #include "dwarf_unwind.h" #include "environment.h" -std::vector<uint64_t> UnwindCallChain(int, const ThreadEntry&, const RegSet&, - const char*, size_t, bool) { - return std::vector<uint64_t>(); +bool UnwindCallChain(int, const ThreadEntry&, const RegSet&, const char*, size_t, bool, + std::vector<uint64_t>*, std::vector<uint64_t>*) { + return false; } bool GetKernelBuildId(BuildId*) { diff --git a/simpleperf/perf_regs.cpp b/simpleperf/perf_regs.cpp index 6aa8bad0..33c64917 100644 --- a/simpleperf/perf_regs.cpp +++ b/simpleperf/perf_regs.cpp @@ -233,3 +233,22 @@ bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) { } return GetRegValue(regs, regno, value); } + +bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) { + size_t regno; + switch (arch) { + case ARCH_X86_64: + case ARCH_X86_32: + regno = PERF_REG_X86_IP; + break; + case ARCH_ARM: + regno = PERF_REG_ARM_PC; + break; + case ARCH_ARM64: + regno = PERF_REG_ARM64_PC; + break; + default: + return false; + } + return GetRegValue(regs, regno, value); +} diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h index fd88de69..7fbae737 100644 --- a/simpleperf/perf_regs.h +++ b/simpleperf/perf_regs.h @@ -96,5 +96,6 @@ RegSet CreateRegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs); bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value); bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value); +bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value); #endif // SIMPLE_PERF_PERF_REGS_H_ diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp index 4e7c9beb..8f4a53dc 100644 --- a/simpleperf/record.cpp +++ b/simpleperf/record.cpp @@ -28,6 +28,8 @@ #include "tracing.h" #include "utils.h" +using namespace simpleperf; + static std::string RecordTypeToString(int record_type) { static std::unordered_map<int, std::string> record_type_names = { {PERF_RECORD_MMAP, "mmap"}, @@ -46,6 +48,7 @@ static std::string RecordTypeToString(int record_type) { {SIMPLE_PERF_RECORD_DSO, "dso"}, {SIMPLE_PERF_RECORD_SYMBOL, "symbol"}, {SIMPLE_PERF_RECORD_EVENT_ID, "event_id"}, + {SIMPLE_PERF_RECORD_CALLCHAIN, "callchain"}, }; auto it = record_type_names.find(record_type); @@ -616,6 +619,97 @@ size_t SampleRecord::ExcludeKernelCallChain() { return user_callchain_length; } +bool SampleRecord::HasUserCallChain() const { + if ((sample_type & PERF_SAMPLE_CALLCHAIN) == 0) { + return false; + } + bool in_user_context = !InKernel(); + for (size_t i = 0; i < callchain_data.ip_nr; ++i) { + if (in_user_context && callchain_data.ips[i] < PERF_CONTEXT_MAX) { + return true; + } + if (callchain_data.ips[i] == PERF_CONTEXT_USER) { + in_user_context = true; + } + } + return false; +} + +void SampleRecord::UpdateUserCallChain(const std::vector<uint64_t>& user_ips) { + std::vector<uint64_t> kernel_ips; + for (size_t i = 0; i < callchain_data.ip_nr; ++i) { + if (callchain_data.ips[i] == PERF_CONTEXT_USER) { + break; + } + kernel_ips.push_back(callchain_data.ips[i]); + } + kernel_ips.push_back(PERF_CONTEXT_USER); + size_t new_size = size() - callchain_data.ip_nr * sizeof(uint64_t) + + (kernel_ips.size() + user_ips.size()) * sizeof(uint64_t); + if (new_size == size()) { + return; + } + char* new_binary = new char[new_size]; + char* p = new_binary; + SetSize(new_size); + MoveToBinaryFormat(header, p); + if (sample_type & PERF_SAMPLE_IDENTIFIER) { + MoveToBinaryFormat(id_data, p); + } + if (sample_type & PERF_SAMPLE_IP) { + MoveToBinaryFormat(ip_data, p); + } + if (sample_type & PERF_SAMPLE_TID) { + MoveToBinaryFormat(tid_data, p); + } + if (sample_type & PERF_SAMPLE_TIME) { + MoveToBinaryFormat(time_data, p); + } + if (sample_type & PERF_SAMPLE_ADDR) { + MoveToBinaryFormat(addr_data, p); + } + if (sample_type & PERF_SAMPLE_ID) { + MoveToBinaryFormat(id_data, p); + } + if (sample_type & PERF_SAMPLE_STREAM_ID) { + MoveToBinaryFormat(stream_id_data, p); + } + if (sample_type & PERF_SAMPLE_CPU) { + MoveToBinaryFormat(cpu_data, p); + } + if (sample_type & PERF_SAMPLE_PERIOD) { + MoveToBinaryFormat(period_data, p); + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + callchain_data.ip_nr = kernel_ips.size() + user_ips.size(); + MoveToBinaryFormat(callchain_data.ip_nr, p); + callchain_data.ips = reinterpret_cast<uint64_t*>(p); + MoveToBinaryFormat(kernel_ips.data(), kernel_ips.size(), p); + MoveToBinaryFormat(user_ips.data(), user_ips.size(), p); + } + if (sample_type & PERF_SAMPLE_RAW) { + MoveToBinaryFormat(raw_data.size, p); + MoveToBinaryFormat(raw_data.data, raw_data.size, p); + raw_data.data = p - raw_data.size; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + MoveToBinaryFormat(branch_stack_data.stack_nr, p); + char* old_p = p; + MoveToBinaryFormat(branch_stack_data.stack, branch_stack_data.stack_nr, p); + branch_stack_data.stack = reinterpret_cast<BranchStackItemType*>(old_p); + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + MoveToBinaryFormat(regs_user_data.abi, p); + CHECK_EQ(regs_user_data.abi, 0u); + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + MoveToBinaryFormat(stack_user_data.size, p); + CHECK_EQ(stack_user_data.size, 0u); + } + CHECK_EQ(p, new_binary + new_size) << "sample_type = " << std::hex << sample_type; + UpdateBinary(new_binary); +} + void SampleRecord::DumpData(size_t indent) const { PrintIndented(indent, "sample_type: 0x%" PRIx64 "\n", sample_type); if (sample_type & PERF_SAMPLE_IP) { @@ -918,6 +1012,65 @@ void EventIdRecord::DumpData(size_t indent) const { } } +CallChainRecord::CallChainRecord(char* p) : Record(p) { + const char* end = p + size(); + p += header_size(); + MoveFromBinaryFormat(pid, p); + MoveFromBinaryFormat(tid, p); + MoveFromBinaryFormat(chain_type, p); + MoveFromBinaryFormat(time, p); + MoveFromBinaryFormat(ip_nr, p); + ips = reinterpret_cast<uint64_t*>(p); + p += ip_nr * sizeof(uint64_t); + sps = reinterpret_cast<uint64_t*>(p); + p += ip_nr * sizeof(uint64_t); + CHECK_EQ(p, end); +} + +CallChainRecord::CallChainRecord(pid_t pid, pid_t tid, CallChainJoiner::ChainType type, + uint64_t time, const std::vector<uint64_t>& ips, + const std::vector<uint64_t>& sps) { + CHECK_EQ(ips.size(), sps.size()); + SetTypeAndMisc(SIMPLE_PERF_RECORD_CALLCHAIN, 0); + this->pid = pid; + this->tid = tid; + this->chain_type = static_cast<int>(type); + this->time = time; + this->ip_nr = ips.size(); + SetSize(header_size() + (4 + ips.size() * 2) * sizeof(uint64_t)); + char* new_binary = new char[size()]; + char* p = new_binary; + MoveToBinaryFormat(header, p); + MoveToBinaryFormat(this->pid, p); + MoveToBinaryFormat(this->tid, p); + MoveToBinaryFormat(this->chain_type, p); + MoveToBinaryFormat(this->time, p); + MoveToBinaryFormat(this->ip_nr, p); + this->ips = reinterpret_cast<uint64_t*>(p); + MoveToBinaryFormat(ips.data(), ips.size(), p); + this->sps = reinterpret_cast<uint64_t*>(p); + MoveToBinaryFormat(sps.data(), sps.size(), p); + UpdateBinary(new_binary); +} + +void CallChainRecord::DumpData(size_t indent) const { + const char* type_name = ""; + switch (chain_type) { + case CallChainJoiner::ORIGINAL_OFFLINE: type_name = "ORIGINAL_OFFLINE"; break; + case CallChainJoiner::ORIGINAL_REMOTE: type_name = "ORIGINAL_REMOTE"; break; + case CallChainJoiner::JOINED_OFFLINE: type_name = "JOINED_OFFLINE"; break; + case CallChainJoiner::JOINED_REMOTE: type_name = "JOINED_REMOTE"; break; + } + PrintIndented(indent, "pid %u\n", pid); + PrintIndented(indent, "tid %u\n", tid); + PrintIndented(indent, "chain_type %s\n", type_name); + PrintIndented(indent, "time %" PRIu64 "\n", time); + PrintIndented(indent, "ip_nr %" PRIu64 "\n", ip_nr); + for (size_t i = 0; i < ip_nr; ++i) { + PrintIndented(indent + 1, "ip 0x%" PRIx64 ", sp 0x%" PRIx64 "\n", ips[i], sps[i]); + } +} + UnknownRecord::UnknownRecord(char* p) : Record(p) { p += header_size(); data = p; @@ -951,6 +1104,8 @@ std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr, uint32 return std::unique_ptr<Record>(new SymbolRecord(p)); case SIMPLE_PERF_RECORD_EVENT_ID: return std::unique_ptr<Record>(new EventIdRecord(p)); + case SIMPLE_PERF_RECORD_CALLCHAIN: + return std::unique_ptr<Record>(new CallChainRecord(p)); default: return std::unique_ptr<Record>(new UnknownRecord(p)); } diff --git a/simpleperf/record.h b/simpleperf/record.h index 83ddba6f..a2934598 100644 --- a/simpleperf/record.h +++ b/simpleperf/record.h @@ -28,6 +28,7 @@ #include <android-base/logging.h> #include "build_id.h" +#include "CallChainJoiner.h" #include "perf_event.h" enum user_record_type { @@ -46,6 +47,7 @@ enum user_record_type { SIMPLE_PERF_RECORD_SPLIT, SIMPLE_PERF_RECORD_SPLIT_END, SIMPLE_PERF_RECORD_EVENT_ID, + SIMPLE_PERF_RECORD_CALLCHAIN, }; // perf_event_header uses u16 to store record size. However, that is not @@ -389,6 +391,9 @@ struct SampleRecord : public Record { void ReplaceRegAndStackWithCallChain(const std::vector<uint64_t>& ips); size_t ExcludeKernelCallChain(); + bool HasUserCallChain() const; + void UpdateUserCallChain(const std::vector<uint64_t>& user_ips); + uint64_t Timestamp() const override; uint32_t Cpu() const override; uint64_t Id() const override; @@ -494,6 +499,28 @@ struct EventIdRecord : public Record { void DumpData(size_t indent) const override; }; +struct CallChainRecord : public Record { + uint32_t pid; + uint32_t tid; + uint64_t chain_type; + uint64_t time; + uint64_t ip_nr; + uint64_t* ips; + uint64_t* sps; + + explicit CallChainRecord(char* p); + + CallChainRecord(pid_t pid, pid_t tid, simpleperf::CallChainJoiner::ChainType type, uint64_t time, + const std::vector<uint64_t>& ips, const std::vector<uint64_t>& sps); + + uint64_t Timestamp() const override { + return time; + } + + protected: + void DumpData(size_t indent) const override; +}; + // UnknownRecord is used for unknown record types, it makes sure all unknown // records are not changed when modifying perf.data. struct UnknownRecord : public Record { diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h index 67f28dac..18194754 100644 --- a/simpleperf/sample_tree.h +++ b/simpleperf/sample_tree.h @@ -111,13 +111,12 @@ class SampleTreeBuilder { RegSet regs = CreateRegSet(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); - std::vector<uint64_t> unwind_ips = - UnwindCallChain(r.regs_user_data.abi, *thread, regs, - r.stack_user_data.data, - r.GetValidStackSize(), strict_unwind_arch_check_); - if (!unwind_ips.empty()) { + std::vector<uint64_t> user_ips; + std::vector<uint64_t> sps; + if (UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data, + r.GetValidStackSize(), strict_unwind_arch_check_, &user_ips, &sps)) { ips.push_back(PERF_CONTEXT_USER); - ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end()); + ips.insert(ips.end(), user_ips.begin(), user_ips.end()); } } diff --git a/simpleperf/testdata/perf_with_callchain_record.data b/simpleperf/testdata/perf_with_callchain_record.data Binary files differnew file mode 100644 index 00000000..53184213 --- /dev/null +++ b/simpleperf/testdata/perf_with_callchain_record.data |