summaryrefslogtreecommitdiff
path: root/simpleperf
diff options
context:
space:
mode:
Diffstat (limited to 'simpleperf')
-rw-r--r--simpleperf/cmd_dumprecord.cpp71
-rw-r--r--simpleperf/cmd_dumprecord_test.cpp4
-rw-r--r--simpleperf/cmd_record.cpp195
-rw-r--r--simpleperf/cmd_record_test.cpp5
-rw-r--r--simpleperf/dwarf_unwind.cpp30
-rw-r--r--simpleperf/dwarf_unwind.h5
-rw-r--r--simpleperf/get_test_data.h3
-rw-r--r--simpleperf/nonlinux_support/nonlinux_support.cpp6
-rw-r--r--simpleperf/perf_regs.cpp19
-rw-r--r--simpleperf/perf_regs.h1
-rw-r--r--simpleperf/record.cpp155
-rw-r--r--simpleperf/record.h27
-rw-r--r--simpleperf/sample_tree.h11
-rw-r--r--simpleperf/testdata/perf_with_callchain_record.databin0 -> 20743 bytes
14 files changed, 468 insertions, 64 deletions
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
index 919b62a4..97bd7132 100644
--- a/simpleperf/cmd_dumprecord.cpp
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -26,6 +26,7 @@
#include "command.h"
#include "event_attr.h"
+#include "event_type.h"
#include "perf_regs.h"
#include "record.h"
#include "record_file.h"
@@ -48,7 +49,7 @@ class DumpRecordCommand : public Command {
bool ParseOptions(const std::vector<std::string>& args);
void DumpFileHeader();
void DumpAttrSection();
- void DumpDataSection();
+ bool DumpDataSection();
bool DumpFeatureSection();
std::string record_filename_;
@@ -72,14 +73,23 @@ bool DumpRecordCommand::Run(const std::vector<std::string>& args) {
}
}
ScopedCurrentArch scoped_arch(record_file_arch_);
+ std::unique_ptr<ScopedEventTypes> scoped_event_types;
+ if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) {
+ std::unordered_map<std::string, std::string> meta_info;
+ if (!record_file_reader_->ReadMetaInfoFeature(&meta_info)) {
+ return false;
+ }
+ auto it = meta_info.find("event_type_info");
+ if (it != meta_info.end()) {
+ scoped_event_types.reset(new ScopedEventTypes(it->second));
+ }
+ }
DumpFileHeader();
DumpAttrSection();
- DumpDataSection();
- if (!DumpFeatureSection()) {
+ if (!DumpDataSection()) {
return false;
}
-
- return true;
+ return DumpFeatureSection();
}
bool DumpRecordCommand::ParseOptions(const std::vector<std::string>& args) {
@@ -134,7 +144,6 @@ void DumpRecordCommand::DumpFileHeader() {
}
}
-
void DumpRecordCommand::DumpAttrSection() {
std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
for (size_t i = 0; i < attrs.size(); ++i) {
@@ -151,11 +160,53 @@ void DumpRecordCommand::DumpAttrSection() {
}
}
-void DumpRecordCommand::DumpDataSection() {
- record_file_reader_->ReadDataSection([](std::unique_ptr<Record> record) {
- record->Dump();
+bool DumpRecordCommand::DumpDataSection() {
+ ThreadTree thread_tree;
+ thread_tree.ShowIpForUnknownSymbol();
+ record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree);
+
+ auto get_symbol_function = [&](uint32_t pid, uint32_t tid, uint64_t ip, std::string& dso_name,
+ std::string& symbol_name, uint64_t& vaddr_in_file) {
+ ThreadEntry* thread = thread_tree.FindThreadOrNew(pid, tid);
+ const MapEntry* map = thread_tree.FindMap(thread, ip);
+ Dso* dso;
+ const Symbol* symbol = thread_tree.FindSymbol(map, ip, &vaddr_in_file, &dso);
+ dso_name = dso->Path();
+ symbol_name = symbol->DemangledName();
+ };
+
+ auto record_callback = [&](std::unique_ptr<Record> r) {
+ r->Dump();
+ thread_tree.Update(*r);
+ if (r->type() == PERF_RECORD_SAMPLE) {
+ SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
+ if (sr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+ PrintIndented(1, "callchain:\n");
+ for (size_t i = 0; i < sr.callchain_data.ip_nr; ++i) {
+ std::string dso_name;
+ std::string symbol_name;
+ uint64_t vaddr_in_file;
+ get_symbol_function(sr.tid_data.pid, sr.tid_data.tid, sr.callchain_data.ips[i],
+ dso_name, symbol_name, vaddr_in_file);
+ PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(),
+ vaddr_in_file);
+ }
+ }
+ } else if (r->type() == SIMPLE_PERF_RECORD_CALLCHAIN) {
+ CallChainRecord& cr = *static_cast<CallChainRecord*>(r.get());
+ PrintIndented(1, "callchain:\n");
+ for (size_t i = 0; i < cr.ip_nr; ++i) {
+ std::string dso_name;
+ std::string symbol_name;
+ uint64_t vaddr_in_file;
+ get_symbol_function(cr.pid, cr.tid, cr.ips[i], dso_name, symbol_name, vaddr_in_file);
+ PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(),
+ vaddr_in_file);
+ }
+ }
return true;
- }, false);
+ };
+ return record_file_reader_->ReadDataSection(record_callback, false);
}
bool DumpRecordCommand::DumpFeatureSection() {
diff --git a/simpleperf/cmd_dumprecord_test.cpp b/simpleperf/cmd_dumprecord_test.cpp
index b61942b5..ae959210 100644
--- a/simpleperf/cmd_dumprecord_test.cpp
+++ b/simpleperf/cmd_dumprecord_test.cpp
@@ -30,3 +30,7 @@ TEST(cmd_dump, record_file_option) {
TEST(cmd_dump, dump_data_generated_by_linux_perf) {
ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_GENERATED_BY_LINUX_PERF)}));
}
+
+TEST(cmd_dump, dump_callchain_records) {
+ ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_WITH_CALLCHAIN_RECORD)}));
+}
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 15ba8f2d..41ee2ece 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -36,6 +36,7 @@
#include <android-base/properties.h>
#endif
+#include "CallChainJoiner.h"
#include "command.h"
#include "dwarf_unwind.h"
#include "environment.h"
@@ -52,6 +53,8 @@
#include "utils.h"
#include "workload.h"
+using namespace simpleperf;
+
static std::string default_measured_event_type = "cpu-cycles";
static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
@@ -79,6 +82,9 @@ constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
// successfully, the buffer size = 1024 * 4K (page size) = 4M.
constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
+// Cache size used by CallChainJoiner to cache call chains in memory.
+constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
+
class RecordCommand : public Command {
public:
RecordCommand()
@@ -164,6 +170,13 @@ class RecordCommand : public Command {
"--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
" will be unwound by default. Use this option to disable the\n"
" unwinding of the user's stack.\n"
+"--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
+" callchain joiner is used to break the 64k stack limit\n"
+" and build more complete call graphs. However, the built\n"
+" call graphs may not be correct in all cases.\n"
+"--callchain-joiner-min-matching-nodes count\n"
+" When callchain joiner is used, set the matched nodes needed to join\n"
+" callchains. The count should be >= 1. By default it is 1.\n"
"-o record_file_name Set record file name, default is perf.data.\n"
"--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
" will be unwound while recording by default. But it may lose\n"
@@ -204,7 +217,9 @@ class RecordCommand : public Command {
start_profiling_fd_(-1),
in_app_context_(false),
trace_offcpu_(false),
- exclude_kernel_callchain_(false) {
+ exclude_kernel_callchain_(false),
+ allow_callchain_joiner_(true),
+ callchain_joiner_min_matching_nodes_(1u) {
// If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
// sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
// to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
@@ -218,6 +233,9 @@ class RecordCommand : public Command {
private:
bool ParseOptions(const std::vector<std::string>& args,
std::vector<std::string>* non_option_args);
+ bool PrepareRecording(Workload* workload);
+ bool DoRecording(Workload* workload);
+ bool PostProcessRecording(const std::vector<std::string>& args);
bool TraceOffCpu();
bool SetEventSelectionFlags();
bool CreateAndInitRecordFile();
@@ -231,6 +249,7 @@ class RecordCommand : public Command {
void UpdateRecordForEmbeddedElfPath(Record* record);
bool UnwindRecord(SampleRecord& r);
bool PostUnwind(const std::vector<std::string>& args);
+ bool JoinCallChains();
bool DumpAdditionalFeatures(const std::vector<std::string>& args);
bool DumpBuildIdFeature();
bool DumpFileFeature();
@@ -268,15 +287,20 @@ class RecordCommand : public Command {
bool in_app_context_;
bool trace_offcpu_;
bool exclude_kernel_callchain_;
+
+ // For CallChainJoiner
+ bool allow_callchain_joiner_;
+ size_t callchain_joiner_min_matching_nodes_;
+ std::unique_ptr<CallChainJoiner> callchain_joiner_;
};
bool RecordCommand::Run(const std::vector<std::string>& args) {
+ ScopedCurrentArch scoped_arch(GetMachineArch());
if (!CheckPerfEventLimit()) {
return false;
}
AllowMoreOpenedFiles();
- // 1. Parse options, and use default measured event type if not given.
std::vector<std::string> workload_args;
if (!ParseOptions(args, &workload_args)) {
return false;
@@ -290,6 +314,30 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
record_filename_, true);
}
}
+ std::unique_ptr<Workload> workload;
+ if (!workload_args.empty()) {
+ workload = Workload::CreateWorkload(workload_args);
+ if (workload == nullptr) {
+ return false;
+ }
+ }
+ if (!PrepareRecording(workload.get())) {
+ return false;
+ }
+ if (!DoRecording(workload.get())) {
+ return false;
+ }
+ return PostProcessRecording(args);
+}
+
+bool RecordCommand::PrepareRecording(Workload* workload) {
+ // 1. Prepare in other modules.
+ if (!InitPerfClock()) {
+ return false;
+ }
+ PrepareVdsoFile();
+
+ // 2. Add default event type.
if (event_selection_set_.empty()) {
size_t group_id;
if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
@@ -299,6 +347,8 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
}
}
+
+ // 3. Process options before opening perf event files.
exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
if (trace_offcpu_ && !TraceOffCpu()) {
return false;
@@ -306,22 +356,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
if (!SetEventSelectionFlags()) {
return false;
}
-
- // 2. Do some environment preparation.
- ScopedCurrentArch scoped_arch(GetMachineArch());
- if (!InitPerfClock()) {
- return false;
+ if (unwind_dwarf_callchain_ && !post_unwind_ && allow_callchain_joiner_) {
+ bool keep_original_callchains = WOULD_LOG(DEBUG);
+ callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
+ callchain_joiner_min_matching_nodes_,
+ keep_original_callchains));
}
- PrepareVdsoFile();
- // 3. Create workload.
- std::unique_ptr<Workload> workload;
- if (!workload_args.empty()) {
- workload = Workload::CreateWorkload(workload_args);
- if (workload == nullptr) {
- return false;
- }
- }
+ // 4. Add monitored targets.
bool need_to_check_targets = false;
if (system_wide_collection_) {
event_selection_set_.AddMonitoredThreads({-1});
@@ -350,7 +392,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
need_to_check_targets = true;
}
- // 4. Open perf_event_files, create mapped buffers for perf_event_files.
+ // 5. Open perf event files and create mapped buffers.
if (!event_selection_set_.OpenEventFiles(cpus_)) {
return false;
}
@@ -359,12 +401,12 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
- // 5. Create perf.data.
+ // 6. Create perf.data.
if (!CreateAndInitRecordFile()) {
return false;
}
- // 6. Add read/signal/periodic Events.
+ // 7. Add read/signal/periodic Events.
auto callback =
std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
@@ -378,21 +420,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
}
IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
- [&]() { return loop->ExitLoop(); })) {
+ [loop]() { return loop->ExitLoop(); })) {
return false;
}
if (duration_in_sec_ != 0) {
if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
- [&]() { return loop->ExitLoop(); })) {
+ [loop]() { return loop->ExitLoop(); })) {
return false;
}
}
+ return true;
+}
- // 7. Write records in mapped buffers of perf_event_files to output file while
- // workload is running.
+bool RecordCommand::DoRecording(Workload* workload) {
+ // Write records in mapped buffers of perf_event_files to output file while workload is running.
start_sampling_time_in_ns_ = GetPerfClock();
- LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
- << " ns";
+ LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns";
if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
return false;
}
@@ -402,14 +445,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
}
close(start_profiling_fd_);
}
- if (!loop->RunLoop()) {
+ if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
return false;
}
if (!event_selection_set_.FinishReadMmapEventData()) {
return false;
}
+ return true;
+}
+
+bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
+ // 1. Optionally join Callchains.
+ if (callchain_joiner_) {
+ JoinCallChains();
+ }
- // 8. Dump additional features, and close record file.
+ // 2. Dump additional features, and close record file.
if (!DumpAdditionalFeatures(args)) {
return false;
}
@@ -417,14 +468,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
- // 9. Unwind dwarf callchain.
+ // 3. Post unwind dwarf callchain.
if (post_unwind_) {
if (!PostUnwind(args)) {
return false;
}
}
- // 10. Show brief record result.
+ // 4. Show brief record result.
LOG(INFO) << "Samples recorded: " << sample_record_count_
<< ". Samples lost: " << lost_record_count_ << ".";
if (sample_record_count_ + lost_record_count_ != 0) {
@@ -438,6 +489,9 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
<< "or increasing sample period(-c).";
}
}
+ if (callchain_joiner_) {
+ callchain_joiner_->DumpStat();
+ }
return true;
}
@@ -611,6 +665,17 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
child_inherit_ = false;
} else if (args[i] == "--no-unwind") {
unwind_dwarf_callchain_ = false;
+ } else if (args[i] == "--no-callchain-joiner") {
+ allow_callchain_joiner_ = false;
+ } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
+ if (!NextArgumentOrError(args, &i)) {
+ return false;
+ }
+ if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) ||
+ callchain_joiner_min_matching_nodes_ < 1u) {
+ LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option";
+ return false;
+ }
} else if (args[i] == "-o") {
if (!NextArgumentOrError(args, &i)) {
return false;
@@ -1008,11 +1073,17 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) {
// Normally do strict arch check when unwinding stack. But allow unwinding
// 32-bit processes on 64-bit devices for system wide profiling.
bool strict_arch_check = !system_wide_collection_;
- std::vector<uint64_t> unwind_ips =
- UnwindCallChain(r.regs_user_data.abi, *thread, regs,
- r.stack_user_data.data,
- r.GetValidStackSize(), strict_arch_check);
- r.ReplaceRegAndStackWithCallChain(unwind_ips);
+ std::vector<uint64_t> ips;
+ std::vector<uint64_t> sps;
+ if (!UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data,
+ r.GetValidStackSize(), strict_arch_check, &ips, &sps)) {
+ return false;
+ }
+ r.ReplaceRegAndStackWithCallChain(ips);
+ if (callchain_joiner_) {
+ return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
+ CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
+ }
}
return true;
}
@@ -1062,6 +1133,62 @@ bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
return true;
}
+bool RecordCommand::JoinCallChains() {
+ // 1. Prepare joined callchains.
+ if (!callchain_joiner_->JoinCallChains()) {
+ return false;
+ }
+ // 2. Move records from record_filename_ to a temporary file.
+ if (!record_file_writer_->Close()) {
+ return false;
+ }
+ record_file_writer_.reset();
+ std::unique_ptr<TemporaryFile> tmpfile = CreateTempFileUsedInRecording();
+ if (!Workload::RunCmd({"mv", record_filename_, tmpfile->path})) {
+ return false;
+ }
+
+ // 3. Read records from the temporary file, and write record with joined call chains back
+ // to record_filename_.
+ std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile->path);
+ record_file_writer_ = CreateRecordFile(record_filename_);
+ if (!reader || !record_file_writer_) {
+ return false;
+ }
+ bool store_callchains = WOULD_LOG(DEBUG);
+
+ auto record_callback = [&](std::unique_ptr<Record> r) {
+ if (r->type() != PERF_RECORD_SAMPLE) {
+ return record_file_writer_->WriteRecord(*r);
+ }
+ SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
+ if (!sr.HasUserCallChain()) {
+ return record_file_writer_->WriteRecord(sr);
+ }
+ pid_t pid;
+ pid_t tid;
+ CallChainJoiner::ChainType type;
+ std::vector<uint64_t> ips;
+ std::vector<uint64_t> sps;
+ do {
+ if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
+ return false;
+ }
+ if (store_callchains) {
+ CallChainRecord record(pid, tid, type, sr.Timestamp(), ips, sps);
+ if (!record_file_writer_->WriteRecord(record)) {
+ return false;
+ }
+ }
+ } while (type != CallChainJoiner::JOINED_OFFLINE);
+ CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
+ CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
+ sr.UpdateUserCallChain(ips);
+ return record_file_writer_->WriteRecord(sr);
+ };
+ return reader->ReadDataSection(record_callback, false);
+}
+
bool RecordCommand::DumpAdditionalFeatures(
const std::vector<std::string>& args) {
// Read data section of perf.data to collect hit file information.
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 3ccf2dc7..660e679b 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -559,3 +559,8 @@ TEST(record_cmd, generate_samples_by_hw_counters) {
ASSERT_TRUE(has_sample);
}
}
+
+TEST(record_cmd, callchain_joiner_options) {
+ ASSERT_TRUE(RunRecordCmd({"--no-callchain-joiner"}));
+ ASSERT_TRUE(RunRecordCmd({"--callchain-joiner-min-matching-nodes", "2"}));
+}
diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp
index 2e0a2985..623640cc 100644
--- a/simpleperf/dwarf_unwind.cpp
+++ b/simpleperf/dwarf_unwind.cpp
@@ -94,22 +94,32 @@ static ucontext_t BuildUContextFromRegs(const RegSet& regs __attribute__((unused
return ucontext;
}
-std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
- const RegSet& regs, const char* stack,
- size_t stack_size, bool strict_arch_check) {
+bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs, const char* stack,
+ size_t stack_size, bool strict_arch_check,
+ std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) {
std::vector<uint64_t> result;
ArchType arch = (abi != PERF_SAMPLE_REGS_ABI_32) ?
ScopedCurrentArch::GetCurrentArch() :
ScopedCurrentArch::GetCurrentArch32();
if (!IsArchTheSame(arch, GetBuildArch(), strict_arch_check)) {
- LOG(FATAL) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
- << ", and can't do stack unwinding for arch " << GetArchString(arch);
- return result;
+ LOG(ERROR) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
+ << ", and can't do stack unwinding for arch " << GetArchString(arch);
+ return false;
}
uint64_t sp_reg_value;
if (!GetSpRegValue(regs, arch, &sp_reg_value)) {
LOG(ERROR) << "can't get sp reg value";
- return result;
+ return false;
+ }
+ if (arch != GetBuildArch()) {
+ uint64_t ip_reg_value;
+ if (!GetIpRegValue(regs, arch, &ip_reg_value)) {
+ LOG(ERROR) << "can't get ip reg value";
+ return false;
+ }
+ ips->push_back(ip_reg_value);
+ sps->push_back(sp_reg_value);
+ return true;
}
uint64_t stack_addr = sp_reg_value;
@@ -121,6 +131,7 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
bt_map.end = map->start_addr + map->len;
bt_map.offset = map->pgoff;
bt_map.name = map->dso->GetDebugFilePath();
+ bt_map.flags = PROT_READ | PROT_EXEC;
}
std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(thread.pid, bt_maps));
@@ -138,8 +149,9 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
if (it->pc == 0) {
break;
}
- result.push_back(it->pc);
+ ips->push_back(it->pc);
+ sps->push_back(it->sp);
}
}
- return result;
+ return !ips->empty();
}
diff --git a/simpleperf/dwarf_unwind.h b/simpleperf/dwarf_unwind.h
index e6fd8d35..40cc6ba8 100644
--- a/simpleperf/dwarf_unwind.h
+++ b/simpleperf/dwarf_unwind.h
@@ -27,7 +27,8 @@ struct ThreadEntry;
using ThreadEntry = simpleperf::ThreadEntry;
-std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
- const char* stack, size_t stack_size, bool strict_arch_check);
+bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
+ const char* stack, size_t stack_size, bool strict_arch_check,
+ std::vector<uint64_t>* ips, std::vector<uint64_t>* sps);
#endif // SIMPLE_PERF_DWARF_UNWIND_H_
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index 3b9121c0..44dcd2c9 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -111,4 +111,7 @@ static const std::string PERF_DATA_WITH_WRONG_IP_IN_CALLCHAIN = "wrong_ip_callch
// generated by `simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64`.
static const std::string PERF_DATA_WITH_TRACE_OFFCPU = "perf_with_trace_offcpu.data";
+// generated by `simpleperf record -g --log debug sleep 1`.
+static const std::string PERF_DATA_WITH_CALLCHAIN_RECORD = "perf_with_callchain_record.data";
+
#endif // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp
index 8c245f15..648b63a9 100644
--- a/simpleperf/nonlinux_support/nonlinux_support.cpp
+++ b/simpleperf/nonlinux_support/nonlinux_support.cpp
@@ -20,9 +20,9 @@
#include "dwarf_unwind.h"
#include "environment.h"
-std::vector<uint64_t> UnwindCallChain(int, const ThreadEntry&, const RegSet&,
- const char*, size_t, bool) {
- return std::vector<uint64_t>();
+bool UnwindCallChain(int, const ThreadEntry&, const RegSet&, const char*, size_t, bool,
+ std::vector<uint64_t>*, std::vector<uint64_t>*) {
+ return false;
}
bool GetKernelBuildId(BuildId*) {
diff --git a/simpleperf/perf_regs.cpp b/simpleperf/perf_regs.cpp
index 6aa8bad0..33c64917 100644
--- a/simpleperf/perf_regs.cpp
+++ b/simpleperf/perf_regs.cpp
@@ -233,3 +233,22 @@ bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
}
return GetRegValue(regs, regno, value);
}
+
+bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
+ size_t regno;
+ switch (arch) {
+ case ARCH_X86_64:
+ case ARCH_X86_32:
+ regno = PERF_REG_X86_IP;
+ break;
+ case ARCH_ARM:
+ regno = PERF_REG_ARM_PC;
+ break;
+ case ARCH_ARM64:
+ regno = PERF_REG_ARM64_PC;
+ break;
+ default:
+ return false;
+ }
+ return GetRegValue(regs, regno, value);
+}
diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h
index fd88de69..7fbae737 100644
--- a/simpleperf/perf_regs.h
+++ b/simpleperf/perf_regs.h
@@ -96,5 +96,6 @@ RegSet CreateRegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs);
bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value);
bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
+bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
#endif // SIMPLE_PERF_PERF_REGS_H_
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 4e7c9beb..8f4a53dc 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -28,6 +28,8 @@
#include "tracing.h"
#include "utils.h"
+using namespace simpleperf;
+
static std::string RecordTypeToString(int record_type) {
static std::unordered_map<int, std::string> record_type_names = {
{PERF_RECORD_MMAP, "mmap"},
@@ -46,6 +48,7 @@ static std::string RecordTypeToString(int record_type) {
{SIMPLE_PERF_RECORD_DSO, "dso"},
{SIMPLE_PERF_RECORD_SYMBOL, "symbol"},
{SIMPLE_PERF_RECORD_EVENT_ID, "event_id"},
+ {SIMPLE_PERF_RECORD_CALLCHAIN, "callchain"},
};
auto it = record_type_names.find(record_type);
@@ -616,6 +619,97 @@ size_t SampleRecord::ExcludeKernelCallChain() {
return user_callchain_length;
}
+bool SampleRecord::HasUserCallChain() const {
+ if ((sample_type & PERF_SAMPLE_CALLCHAIN) == 0) {
+ return false;
+ }
+ bool in_user_context = !InKernel();
+ for (size_t i = 0; i < callchain_data.ip_nr; ++i) {
+ if (in_user_context && callchain_data.ips[i] < PERF_CONTEXT_MAX) {
+ return true;
+ }
+ if (callchain_data.ips[i] == PERF_CONTEXT_USER) {
+ in_user_context = true;
+ }
+ }
+ return false;
+}
+
+void SampleRecord::UpdateUserCallChain(const std::vector<uint64_t>& user_ips) {
+ std::vector<uint64_t> kernel_ips;
+ for (size_t i = 0; i < callchain_data.ip_nr; ++i) {
+ if (callchain_data.ips[i] == PERF_CONTEXT_USER) {
+ break;
+ }
+ kernel_ips.push_back(callchain_data.ips[i]);
+ }
+ kernel_ips.push_back(PERF_CONTEXT_USER);
+ size_t new_size = size() - callchain_data.ip_nr * sizeof(uint64_t) +
+ (kernel_ips.size() + user_ips.size()) * sizeof(uint64_t);
+ if (new_size == size()) {
+ return;
+ }
+ char* new_binary = new char[new_size];
+ char* p = new_binary;
+ SetSize(new_size);
+ MoveToBinaryFormat(header, p);
+ if (sample_type & PERF_SAMPLE_IDENTIFIER) {
+ MoveToBinaryFormat(id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_IP) {
+ MoveToBinaryFormat(ip_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_TID) {
+ MoveToBinaryFormat(tid_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_TIME) {
+ MoveToBinaryFormat(time_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_ADDR) {
+ MoveToBinaryFormat(addr_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_ID) {
+ MoveToBinaryFormat(id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_STREAM_ID) {
+ MoveToBinaryFormat(stream_id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_CPU) {
+ MoveToBinaryFormat(cpu_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_PERIOD) {
+ MoveToBinaryFormat(period_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ callchain_data.ip_nr = kernel_ips.size() + user_ips.size();
+ MoveToBinaryFormat(callchain_data.ip_nr, p);
+ callchain_data.ips = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(kernel_ips.data(), kernel_ips.size(), p);
+ MoveToBinaryFormat(user_ips.data(), user_ips.size(), p);
+ }
+ if (sample_type & PERF_SAMPLE_RAW) {
+ MoveToBinaryFormat(raw_data.size, p);
+ MoveToBinaryFormat(raw_data.data, raw_data.size, p);
+ raw_data.data = p - raw_data.size;
+ }
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ MoveToBinaryFormat(branch_stack_data.stack_nr, p);
+ char* old_p = p;
+ MoveToBinaryFormat(branch_stack_data.stack, branch_stack_data.stack_nr, p);
+ branch_stack_data.stack = reinterpret_cast<BranchStackItemType*>(old_p);
+ }
+ if (sample_type & PERF_SAMPLE_REGS_USER) {
+ MoveToBinaryFormat(regs_user_data.abi, p);
+ CHECK_EQ(regs_user_data.abi, 0u);
+ }
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ MoveToBinaryFormat(stack_user_data.size, p);
+ CHECK_EQ(stack_user_data.size, 0u);
+ }
+ CHECK_EQ(p, new_binary + new_size) << "sample_type = " << std::hex << sample_type;
+ UpdateBinary(new_binary);
+}
+
void SampleRecord::DumpData(size_t indent) const {
PrintIndented(indent, "sample_type: 0x%" PRIx64 "\n", sample_type);
if (sample_type & PERF_SAMPLE_IP) {
@@ -918,6 +1012,65 @@ void EventIdRecord::DumpData(size_t indent) const {
}
}
+CallChainRecord::CallChainRecord(char* p) : Record(p) {
+ const char* end = p + size();
+ p += header_size();
+ MoveFromBinaryFormat(pid, p);
+ MoveFromBinaryFormat(tid, p);
+ MoveFromBinaryFormat(chain_type, p);
+ MoveFromBinaryFormat(time, p);
+ MoveFromBinaryFormat(ip_nr, p);
+ ips = reinterpret_cast<uint64_t*>(p);
+ p += ip_nr * sizeof(uint64_t);
+ sps = reinterpret_cast<uint64_t*>(p);
+ p += ip_nr * sizeof(uint64_t);
+ CHECK_EQ(p, end);
+}
+
+CallChainRecord::CallChainRecord(pid_t pid, pid_t tid, CallChainJoiner::ChainType type,
+ uint64_t time, const std::vector<uint64_t>& ips,
+ const std::vector<uint64_t>& sps) {
+ CHECK_EQ(ips.size(), sps.size());
+ SetTypeAndMisc(SIMPLE_PERF_RECORD_CALLCHAIN, 0);
+ this->pid = pid;
+ this->tid = tid;
+ this->chain_type = static_cast<int>(type);
+ this->time = time;
+ this->ip_nr = ips.size();
+ SetSize(header_size() + (4 + ips.size() * 2) * sizeof(uint64_t));
+ char* new_binary = new char[size()];
+ char* p = new_binary;
+ MoveToBinaryFormat(header, p);
+ MoveToBinaryFormat(this->pid, p);
+ MoveToBinaryFormat(this->tid, p);
+ MoveToBinaryFormat(this->chain_type, p);
+ MoveToBinaryFormat(this->time, p);
+ MoveToBinaryFormat(this->ip_nr, p);
+ this->ips = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(ips.data(), ips.size(), p);
+ this->sps = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(sps.data(), sps.size(), p);
+ UpdateBinary(new_binary);
+}
+
+void CallChainRecord::DumpData(size_t indent) const {
+ const char* type_name = "";
+ switch (chain_type) {
+ case CallChainJoiner::ORIGINAL_OFFLINE: type_name = "ORIGINAL_OFFLINE"; break;
+ case CallChainJoiner::ORIGINAL_REMOTE: type_name = "ORIGINAL_REMOTE"; break;
+ case CallChainJoiner::JOINED_OFFLINE: type_name = "JOINED_OFFLINE"; break;
+ case CallChainJoiner::JOINED_REMOTE: type_name = "JOINED_REMOTE"; break;
+ }
+ PrintIndented(indent, "pid %u\n", pid);
+ PrintIndented(indent, "tid %u\n", tid);
+ PrintIndented(indent, "chain_type %s\n", type_name);
+ PrintIndented(indent, "time %" PRIu64 "\n", time);
+ PrintIndented(indent, "ip_nr %" PRIu64 "\n", ip_nr);
+ for (size_t i = 0; i < ip_nr; ++i) {
+ PrintIndented(indent + 1, "ip 0x%" PRIx64 ", sp 0x%" PRIx64 "\n", ips[i], sps[i]);
+ }
+}
+
UnknownRecord::UnknownRecord(char* p) : Record(p) {
p += header_size();
data = p;
@@ -951,6 +1104,8 @@ std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr, uint32
return std::unique_ptr<Record>(new SymbolRecord(p));
case SIMPLE_PERF_RECORD_EVENT_ID:
return std::unique_ptr<Record>(new EventIdRecord(p));
+ case SIMPLE_PERF_RECORD_CALLCHAIN:
+ return std::unique_ptr<Record>(new CallChainRecord(p));
default:
return std::unique_ptr<Record>(new UnknownRecord(p));
}
diff --git a/simpleperf/record.h b/simpleperf/record.h
index 83ddba6f..a2934598 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -28,6 +28,7 @@
#include <android-base/logging.h>
#include "build_id.h"
+#include "CallChainJoiner.h"
#include "perf_event.h"
enum user_record_type {
@@ -46,6 +47,7 @@ enum user_record_type {
SIMPLE_PERF_RECORD_SPLIT,
SIMPLE_PERF_RECORD_SPLIT_END,
SIMPLE_PERF_RECORD_EVENT_ID,
+ SIMPLE_PERF_RECORD_CALLCHAIN,
};
// perf_event_header uses u16 to store record size. However, that is not
@@ -389,6 +391,9 @@ struct SampleRecord : public Record {
void ReplaceRegAndStackWithCallChain(const std::vector<uint64_t>& ips);
size_t ExcludeKernelCallChain();
+ bool HasUserCallChain() const;
+ void UpdateUserCallChain(const std::vector<uint64_t>& user_ips);
+
uint64_t Timestamp() const override;
uint32_t Cpu() const override;
uint64_t Id() const override;
@@ -494,6 +499,28 @@ struct EventIdRecord : public Record {
void DumpData(size_t indent) const override;
};
+struct CallChainRecord : public Record {
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t chain_type;
+ uint64_t time;
+ uint64_t ip_nr;
+ uint64_t* ips;
+ uint64_t* sps;
+
+ explicit CallChainRecord(char* p);
+
+ CallChainRecord(pid_t pid, pid_t tid, simpleperf::CallChainJoiner::ChainType type, uint64_t time,
+ const std::vector<uint64_t>& ips, const std::vector<uint64_t>& sps);
+
+ uint64_t Timestamp() const override {
+ return time;
+ }
+
+ protected:
+ void DumpData(size_t indent) const override;
+};
+
// UnknownRecord is used for unknown record types, it makes sure all unknown
// records are not changed when modifying perf.data.
struct UnknownRecord : public Record {
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index 67f28dac..18194754 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -111,13 +111,12 @@ class SampleTreeBuilder {
RegSet regs = CreateRegSet(r.regs_user_data.abi,
r.regs_user_data.reg_mask,
r.regs_user_data.regs);
- std::vector<uint64_t> unwind_ips =
- UnwindCallChain(r.regs_user_data.abi, *thread, regs,
- r.stack_user_data.data,
- r.GetValidStackSize(), strict_unwind_arch_check_);
- if (!unwind_ips.empty()) {
+ std::vector<uint64_t> user_ips;
+ std::vector<uint64_t> sps;
+ if (UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data,
+ r.GetValidStackSize(), strict_unwind_arch_check_, &user_ips, &sps)) {
ips.push_back(PERF_CONTEXT_USER);
- ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
+ ips.insert(ips.end(), user_ips.begin(), user_ips.end());
}
}
diff --git a/simpleperf/testdata/perf_with_callchain_record.data b/simpleperf/testdata/perf_with_callchain_record.data
new file mode 100644
index 00000000..53184213
--- /dev/null
+++ b/simpleperf/testdata/perf_with_callchain_record.data
Binary files differ