summaryrefslogtreecommitdiff
path: root/simpleperf
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2017-12-10 13:09:07 -0800
committerYabin Cui <yabinc@google.com>2017-12-12 14:08:13 -0800
commit81a9d33dd0d753e4d4915dfb6f453b916be08813 (patch)
tree31cb95c6b615187b771565bab5e730cab49c56cb /simpleperf
parent36eb3ba517453b6ecea815af860c5a136ac1fa42 (diff)
downloadextras-81a9d33dd0d753e4d4915dfb6f453b916be08813.tar.gz
simpleperf: Use CallChainJoiner.
1. In record cmd, split most code in Run() into three functions to make it easier to maintain. 2. In record cmd, use CallChainJoiner by default when -g option is used. And allow using --no-callchain-joiner option to disable the joiner, and --callchain-joiner-min-matching-nodes to adjust the joiner. 3. Adjust the interface of UnwindCallChain() to return sps used by the joiner. 4. Add functions in SampleRecord to use callchains returned by the joiner. Add CallChainRecord to keep callchains returned by the joiner for debugging. 5. In dump cmd, show callchains of SampleRecord and CallChainRecord for debugging. Bug: http://b/69383534 Test: run simpleperf_unit_test. Test: run python test.py. Change-Id: I951b169dfba0f7c50b6d4d741df83f02f8010626
Diffstat (limited to 'simpleperf')
-rw-r--r--simpleperf/cmd_dumprecord.cpp71
-rw-r--r--simpleperf/cmd_dumprecord_test.cpp4
-rw-r--r--simpleperf/cmd_record.cpp195
-rw-r--r--simpleperf/cmd_record_test.cpp5
-rw-r--r--simpleperf/dwarf_unwind.cpp30
-rw-r--r--simpleperf/dwarf_unwind.h5
-rw-r--r--simpleperf/get_test_data.h3
-rw-r--r--simpleperf/nonlinux_support/nonlinux_support.cpp6
-rw-r--r--simpleperf/perf_regs.cpp19
-rw-r--r--simpleperf/perf_regs.h1
-rw-r--r--simpleperf/record.cpp155
-rw-r--r--simpleperf/record.h27
-rw-r--r--simpleperf/sample_tree.h11
-rw-r--r--simpleperf/testdata/perf_with_callchain_record.databin0 -> 20743 bytes
14 files changed, 468 insertions, 64 deletions
diff --git a/simpleperf/cmd_dumprecord.cpp b/simpleperf/cmd_dumprecord.cpp
index 919b62a4..97bd7132 100644
--- a/simpleperf/cmd_dumprecord.cpp
+++ b/simpleperf/cmd_dumprecord.cpp
@@ -26,6 +26,7 @@
#include "command.h"
#include "event_attr.h"
+#include "event_type.h"
#include "perf_regs.h"
#include "record.h"
#include "record_file.h"
@@ -48,7 +49,7 @@ class DumpRecordCommand : public Command {
bool ParseOptions(const std::vector<std::string>& args);
void DumpFileHeader();
void DumpAttrSection();
- void DumpDataSection();
+ bool DumpDataSection();
bool DumpFeatureSection();
std::string record_filename_;
@@ -72,14 +73,23 @@ bool DumpRecordCommand::Run(const std::vector<std::string>& args) {
}
}
ScopedCurrentArch scoped_arch(record_file_arch_);
+ std::unique_ptr<ScopedEventTypes> scoped_event_types;
+ if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_META_INFO)) {
+ std::unordered_map<std::string, std::string> meta_info;
+ if (!record_file_reader_->ReadMetaInfoFeature(&meta_info)) {
+ return false;
+ }
+ auto it = meta_info.find("event_type_info");
+ if (it != meta_info.end()) {
+ scoped_event_types.reset(new ScopedEventTypes(it->second));
+ }
+ }
DumpFileHeader();
DumpAttrSection();
- DumpDataSection();
- if (!DumpFeatureSection()) {
+ if (!DumpDataSection()) {
return false;
}
-
- return true;
+ return DumpFeatureSection();
}
bool DumpRecordCommand::ParseOptions(const std::vector<std::string>& args) {
@@ -134,7 +144,6 @@ void DumpRecordCommand::DumpFileHeader() {
}
}
-
void DumpRecordCommand::DumpAttrSection() {
std::vector<EventAttrWithId> attrs = record_file_reader_->AttrSection();
for (size_t i = 0; i < attrs.size(); ++i) {
@@ -151,11 +160,53 @@ void DumpRecordCommand::DumpAttrSection() {
}
}
-void DumpRecordCommand::DumpDataSection() {
- record_file_reader_->ReadDataSection([](std::unique_ptr<Record> record) {
- record->Dump();
+bool DumpRecordCommand::DumpDataSection() {
+ ThreadTree thread_tree;
+ thread_tree.ShowIpForUnknownSymbol();
+ record_file_reader_->LoadBuildIdAndFileFeatures(thread_tree);
+
+ auto get_symbol_function = [&](uint32_t pid, uint32_t tid, uint64_t ip, std::string& dso_name,
+ std::string& symbol_name, uint64_t& vaddr_in_file) {
+ ThreadEntry* thread = thread_tree.FindThreadOrNew(pid, tid);
+ const MapEntry* map = thread_tree.FindMap(thread, ip);
+ Dso* dso;
+ const Symbol* symbol = thread_tree.FindSymbol(map, ip, &vaddr_in_file, &dso);
+ dso_name = dso->Path();
+ symbol_name = symbol->DemangledName();
+ };
+
+ auto record_callback = [&](std::unique_ptr<Record> r) {
+ r->Dump();
+ thread_tree.Update(*r);
+ if (r->type() == PERF_RECORD_SAMPLE) {
+ SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
+ if (sr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+ PrintIndented(1, "callchain:\n");
+ for (size_t i = 0; i < sr.callchain_data.ip_nr; ++i) {
+ std::string dso_name;
+ std::string symbol_name;
+ uint64_t vaddr_in_file;
+ get_symbol_function(sr.tid_data.pid, sr.tid_data.tid, sr.callchain_data.ips[i],
+ dso_name, symbol_name, vaddr_in_file);
+ PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(),
+ vaddr_in_file);
+ }
+ }
+ } else if (r->type() == SIMPLE_PERF_RECORD_CALLCHAIN) {
+ CallChainRecord& cr = *static_cast<CallChainRecord*>(r.get());
+ PrintIndented(1, "callchain:\n");
+ for (size_t i = 0; i < cr.ip_nr; ++i) {
+ std::string dso_name;
+ std::string symbol_name;
+ uint64_t vaddr_in_file;
+ get_symbol_function(cr.pid, cr.tid, cr.ips[i], dso_name, symbol_name, vaddr_in_file);
+ PrintIndented(2, "%s (%s[+%" PRIx64 "])\n", symbol_name.c_str(), dso_name.c_str(),
+ vaddr_in_file);
+ }
+ }
return true;
- }, false);
+ };
+ return record_file_reader_->ReadDataSection(record_callback, false);
}
bool DumpRecordCommand::DumpFeatureSection() {
diff --git a/simpleperf/cmd_dumprecord_test.cpp b/simpleperf/cmd_dumprecord_test.cpp
index b61942b5..ae959210 100644
--- a/simpleperf/cmd_dumprecord_test.cpp
+++ b/simpleperf/cmd_dumprecord_test.cpp
@@ -30,3 +30,7 @@ TEST(cmd_dump, record_file_option) {
TEST(cmd_dump, dump_data_generated_by_linux_perf) {
ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_GENERATED_BY_LINUX_PERF)}));
}
+
+TEST(cmd_dump, dump_callchain_records) {
+ ASSERT_TRUE(DumpCmd()->Run({GetTestData(PERF_DATA_WITH_CALLCHAIN_RECORD)}));
+}
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 15ba8f2d..41ee2ece 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -36,6 +36,7 @@
#include <android-base/properties.h>
#endif
+#include "CallChainJoiner.h"
#include "command.h"
#include "dwarf_unwind.h"
#include "environment.h"
@@ -52,6 +53,8 @@
#include "utils.h"
#include "workload.h"
+using namespace simpleperf;
+
static std::string default_measured_event_type = "cpu-cycles";
static std::unordered_map<std::string, uint64_t> branch_sampling_type_map = {
@@ -79,6 +82,9 @@ constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528;
// successfully, the buffer size = 1024 * 4K (page size) = 4M.
constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024;
+// Cache size used by CallChainJoiner to cache call chains in memory.
+constexpr size_t DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE = 8 * 1024 * 1024;
+
class RecordCommand : public Command {
public:
RecordCommand()
@@ -164,6 +170,13 @@ class RecordCommand : public Command {
"--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
" will be unwound by default. Use this option to disable the\n"
" unwinding of the user's stack.\n"
+"--no-callchain-joiner If `--call-graph dwarf` option is used, then by default\n"
+" callchain joiner is used to break the 64k stack limit\n"
+" and build more complete call graphs. However, the built\n"
+" call graphs may not be correct in all cases.\n"
+"--callchain-joiner-min-matching-nodes count\n"
+" When callchain joiner is used, set the matched nodes needed to join\n"
+" callchains. The count should be >= 1. By default it is 1.\n"
"-o record_file_name Set record file name, default is perf.data.\n"
"--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
" will be unwound while recording by default. But it may lose\n"
@@ -204,7 +217,9 @@ class RecordCommand : public Command {
start_profiling_fd_(-1),
in_app_context_(false),
trace_offcpu_(false),
- exclude_kernel_callchain_(false) {
+ exclude_kernel_callchain_(false),
+ allow_callchain_joiner_(true),
+ callchain_joiner_min_matching_nodes_(1u) {
// If we run `adb shell simpleperf record xxx` and stop profiling by ctrl-c, adb closes
// sockets connecting simpleperf. After that, simpleperf will receive SIGPIPE when writing
// to stdout/stderr, which is a problem when we use '--app' option. So ignore SIGPIPE to
@@ -218,6 +233,9 @@ class RecordCommand : public Command {
private:
bool ParseOptions(const std::vector<std::string>& args,
std::vector<std::string>* non_option_args);
+ bool PrepareRecording(Workload* workload);
+ bool DoRecording(Workload* workload);
+ bool PostProcessRecording(const std::vector<std::string>& args);
bool TraceOffCpu();
bool SetEventSelectionFlags();
bool CreateAndInitRecordFile();
@@ -231,6 +249,7 @@ class RecordCommand : public Command {
void UpdateRecordForEmbeddedElfPath(Record* record);
bool UnwindRecord(SampleRecord& r);
bool PostUnwind(const std::vector<std::string>& args);
+ bool JoinCallChains();
bool DumpAdditionalFeatures(const std::vector<std::string>& args);
bool DumpBuildIdFeature();
bool DumpFileFeature();
@@ -268,15 +287,20 @@ class RecordCommand : public Command {
bool in_app_context_;
bool trace_offcpu_;
bool exclude_kernel_callchain_;
+
+ // For CallChainJoiner
+ bool allow_callchain_joiner_;
+ size_t callchain_joiner_min_matching_nodes_;
+ std::unique_ptr<CallChainJoiner> callchain_joiner_;
};
bool RecordCommand::Run(const std::vector<std::string>& args) {
+ ScopedCurrentArch scoped_arch(GetMachineArch());
if (!CheckPerfEventLimit()) {
return false;
}
AllowMoreOpenedFiles();
- // 1. Parse options, and use default measured event type if not given.
std::vector<std::string> workload_args;
if (!ParseOptions(args, &workload_args)) {
return false;
@@ -290,6 +314,30 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
record_filename_, true);
}
}
+ std::unique_ptr<Workload> workload;
+ if (!workload_args.empty()) {
+ workload = Workload::CreateWorkload(workload_args);
+ if (workload == nullptr) {
+ return false;
+ }
+ }
+ if (!PrepareRecording(workload.get())) {
+ return false;
+ }
+ if (!DoRecording(workload.get())) {
+ return false;
+ }
+ return PostProcessRecording(args);
+}
+
+bool RecordCommand::PrepareRecording(Workload* workload) {
+ // 1. Prepare in other modules.
+ if (!InitPerfClock()) {
+ return false;
+ }
+ PrepareVdsoFile();
+
+ // 2. Add default event type.
if (event_selection_set_.empty()) {
size_t group_id;
if (!event_selection_set_.AddEventType(default_measured_event_type, &group_id)) {
@@ -299,6 +347,8 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
event_selection_set_.SetSampleSpeed(group_id, *sample_speed_);
}
}
+
+ // 3. Process options before opening perf event files.
exclude_kernel_callchain_ = event_selection_set_.ExcludeKernel();
if (trace_offcpu_ && !TraceOffCpu()) {
return false;
@@ -306,22 +356,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
if (!SetEventSelectionFlags()) {
return false;
}
-
- // 2. Do some environment preparation.
- ScopedCurrentArch scoped_arch(GetMachineArch());
- if (!InitPerfClock()) {
- return false;
+ if (unwind_dwarf_callchain_ && !post_unwind_ && allow_callchain_joiner_) {
+ bool keep_original_callchains = WOULD_LOG(DEBUG);
+ callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
+ callchain_joiner_min_matching_nodes_,
+ keep_original_callchains));
}
- PrepareVdsoFile();
- // 3. Create workload.
- std::unique_ptr<Workload> workload;
- if (!workload_args.empty()) {
- workload = Workload::CreateWorkload(workload_args);
- if (workload == nullptr) {
- return false;
- }
- }
+ // 4. Add monitored targets.
bool need_to_check_targets = false;
if (system_wide_collection_) {
event_selection_set_.AddMonitoredThreads({-1});
@@ -350,7 +392,7 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
need_to_check_targets = true;
}
- // 4. Open perf_event_files, create mapped buffers for perf_event_files.
+ // 5. Open perf event files and create mapped buffers.
if (!event_selection_set_.OpenEventFiles(cpus_)) {
return false;
}
@@ -359,12 +401,12 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
- // 5. Create perf.data.
+ // 6. Create perf.data.
if (!CreateAndInitRecordFile()) {
return false;
}
- // 6. Add read/signal/periodic Events.
+ // 7. Add read/signal/periodic Events.
auto callback =
std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1);
if (!event_selection_set_.PrepareToReadMmapEventData(callback)) {
@@ -378,21 +420,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
}
IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
- [&]() { return loop->ExitLoop(); })) {
+ [loop]() { return loop->ExitLoop(); })) {
return false;
}
if (duration_in_sec_ != 0) {
if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
- [&]() { return loop->ExitLoop(); })) {
+ [loop]() { return loop->ExitLoop(); })) {
return false;
}
}
+ return true;
+}
- // 7. Write records in mapped buffers of perf_event_files to output file while
- // workload is running.
+bool RecordCommand::DoRecording(Workload* workload) {
+ // Write records in mapped buffers of perf_event_files to output file while workload is running.
start_sampling_time_in_ns_ = GetPerfClock();
- LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_
- << " ns";
+ LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns";
if (workload != nullptr && !workload->IsStarted() && !workload->Start()) {
return false;
}
@@ -402,14 +445,22 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
}
close(start_profiling_fd_);
}
- if (!loop->RunLoop()) {
+ if (!event_selection_set_.GetIOEventLoop()->RunLoop()) {
return false;
}
if (!event_selection_set_.FinishReadMmapEventData()) {
return false;
}
+ return true;
+}
+
+bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
+ // 1. Optionally join Callchains.
+ if (callchain_joiner_) {
+ JoinCallChains();
+ }
- // 8. Dump additional features, and close record file.
+ // 2. Dump additional features, and close record file.
if (!DumpAdditionalFeatures(args)) {
return false;
}
@@ -417,14 +468,14 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
- // 9. Unwind dwarf callchain.
+ // 3. Post unwind dwarf callchain.
if (post_unwind_) {
if (!PostUnwind(args)) {
return false;
}
}
- // 10. Show brief record result.
+ // 4. Show brief record result.
LOG(INFO) << "Samples recorded: " << sample_record_count_
<< ". Samples lost: " << lost_record_count_ << ".";
if (sample_record_count_ + lost_record_count_ != 0) {
@@ -438,6 +489,9 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
<< "or increasing sample period(-c).";
}
}
+ if (callchain_joiner_) {
+ callchain_joiner_->DumpStat();
+ }
return true;
}
@@ -611,6 +665,17 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
child_inherit_ = false;
} else if (args[i] == "--no-unwind") {
unwind_dwarf_callchain_ = false;
+ } else if (args[i] == "--no-callchain-joiner") {
+ allow_callchain_joiner_ = false;
+ } else if (args[i] == "--callchain-joiner-min-matching-nodes") {
+ if (!NextArgumentOrError(args, &i)) {
+ return false;
+ }
+ if (!android::base::ParseUint(args[i].c_str(), &callchain_joiner_min_matching_nodes_) ||
+ callchain_joiner_min_matching_nodes_ < 1u) {
+ LOG(ERROR) << "unexpected argument for " << args[i - 1] << " option";
+ return false;
+ }
} else if (args[i] == "-o") {
if (!NextArgumentOrError(args, &i)) {
return false;
@@ -1008,11 +1073,17 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) {
// Normally do strict arch check when unwinding stack. But allow unwinding
// 32-bit processes on 64-bit devices for system wide profiling.
bool strict_arch_check = !system_wide_collection_;
- std::vector<uint64_t> unwind_ips =
- UnwindCallChain(r.regs_user_data.abi, *thread, regs,
- r.stack_user_data.data,
- r.GetValidStackSize(), strict_arch_check);
- r.ReplaceRegAndStackWithCallChain(unwind_ips);
+ std::vector<uint64_t> ips;
+ std::vector<uint64_t> sps;
+ if (!UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data,
+ r.GetValidStackSize(), strict_arch_check, &ips, &sps)) {
+ return false;
+ }
+ r.ReplaceRegAndStackWithCallChain(ips);
+ if (callchain_joiner_) {
+ return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
+ CallChainJoiner::ORIGINAL_OFFLINE, ips, sps);
+ }
}
return true;
}
@@ -1062,6 +1133,62 @@ bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
return true;
}
+bool RecordCommand::JoinCallChains() {
+ // 1. Prepare joined callchains.
+ if (!callchain_joiner_->JoinCallChains()) {
+ return false;
+ }
+ // 2. Move records from record_filename_ to a temporary file.
+ if (!record_file_writer_->Close()) {
+ return false;
+ }
+ record_file_writer_.reset();
+ std::unique_ptr<TemporaryFile> tmpfile = CreateTempFileUsedInRecording();
+ if (!Workload::RunCmd({"mv", record_filename_, tmpfile->path})) {
+ return false;
+ }
+
+ // 3. Read records from the temporary file, and write record with joined call chains back
+ // to record_filename_.
+ std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile->path);
+ record_file_writer_ = CreateRecordFile(record_filename_);
+ if (!reader || !record_file_writer_) {
+ return false;
+ }
+ bool store_callchains = WOULD_LOG(DEBUG);
+
+ auto record_callback = [&](std::unique_ptr<Record> r) {
+ if (r->type() != PERF_RECORD_SAMPLE) {
+ return record_file_writer_->WriteRecord(*r);
+ }
+ SampleRecord& sr = *static_cast<SampleRecord*>(r.get());
+ if (!sr.HasUserCallChain()) {
+ return record_file_writer_->WriteRecord(sr);
+ }
+ pid_t pid;
+ pid_t tid;
+ CallChainJoiner::ChainType type;
+ std::vector<uint64_t> ips;
+ std::vector<uint64_t> sps;
+ do {
+ if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
+ return false;
+ }
+ if (store_callchains) {
+ CallChainRecord record(pid, tid, type, sr.Timestamp(), ips, sps);
+ if (!record_file_writer_->WriteRecord(record)) {
+ return false;
+ }
+ }
+ } while (type != CallChainJoiner::JOINED_OFFLINE);
+ CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
+ CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
+ sr.UpdateUserCallChain(ips);
+ return record_file_writer_->WriteRecord(sr);
+ };
+ return reader->ReadDataSection(record_callback, false);
+}
+
bool RecordCommand::DumpAdditionalFeatures(
const std::vector<std::string>& args) {
// Read data section of perf.data to collect hit file information.
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 3ccf2dc7..660e679b 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -559,3 +559,8 @@ TEST(record_cmd, generate_samples_by_hw_counters) {
ASSERT_TRUE(has_sample);
}
}
+
+TEST(record_cmd, callchain_joiner_options) {
+ ASSERT_TRUE(RunRecordCmd({"--no-callchain-joiner"}));
+ ASSERT_TRUE(RunRecordCmd({"--callchain-joiner-min-matching-nodes", "2"}));
+}
diff --git a/simpleperf/dwarf_unwind.cpp b/simpleperf/dwarf_unwind.cpp
index 2e0a2985..623640cc 100644
--- a/simpleperf/dwarf_unwind.cpp
+++ b/simpleperf/dwarf_unwind.cpp
@@ -94,22 +94,32 @@ static ucontext_t BuildUContextFromRegs(const RegSet& regs __attribute__((unused
return ucontext;
}
-std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
- const RegSet& regs, const char* stack,
- size_t stack_size, bool strict_arch_check) {
+bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs, const char* stack,
+ size_t stack_size, bool strict_arch_check,
+ std::vector<uint64_t>* ips, std::vector<uint64_t>* sps) {
std::vector<uint64_t> result;
ArchType arch = (abi != PERF_SAMPLE_REGS_ABI_32) ?
ScopedCurrentArch::GetCurrentArch() :
ScopedCurrentArch::GetCurrentArch32();
if (!IsArchTheSame(arch, GetBuildArch(), strict_arch_check)) {
- LOG(FATAL) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
- << ", and can't do stack unwinding for arch " << GetArchString(arch);
- return result;
+ LOG(ERROR) << "simpleperf is built in arch " << GetArchString(GetBuildArch())
+ << ", and can't do stack unwinding for arch " << GetArchString(arch);
+ return false;
}
uint64_t sp_reg_value;
if (!GetSpRegValue(regs, arch, &sp_reg_value)) {
LOG(ERROR) << "can't get sp reg value";
- return result;
+ return false;
+ }
+ if (arch != GetBuildArch()) {
+ uint64_t ip_reg_value;
+ if (!GetIpRegValue(regs, arch, &ip_reg_value)) {
+ LOG(ERROR) << "can't get ip reg value";
+ return false;
+ }
+ ips->push_back(ip_reg_value);
+ sps->push_back(sp_reg_value);
+ return true;
}
uint64_t stack_addr = sp_reg_value;
@@ -121,6 +131,7 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
bt_map.end = map->start_addr + map->len;
bt_map.offset = map->pgoff;
bt_map.name = map->dso->GetDebugFilePath();
+ bt_map.flags = PROT_READ | PROT_EXEC;
}
std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(thread.pid, bt_maps));
@@ -138,8 +149,9 @@ std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread,
if (it->pc == 0) {
break;
}
- result.push_back(it->pc);
+ ips->push_back(it->pc);
+ sps->push_back(it->sp);
}
}
- return result;
+ return !ips->empty();
}
diff --git a/simpleperf/dwarf_unwind.h b/simpleperf/dwarf_unwind.h
index e6fd8d35..40cc6ba8 100644
--- a/simpleperf/dwarf_unwind.h
+++ b/simpleperf/dwarf_unwind.h
@@ -27,7 +27,8 @@ struct ThreadEntry;
using ThreadEntry = simpleperf::ThreadEntry;
-std::vector<uint64_t> UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
- const char* stack, size_t stack_size, bool strict_arch_check);
+bool UnwindCallChain(int abi, const ThreadEntry& thread, const RegSet& regs,
+ const char* stack, size_t stack_size, bool strict_arch_check,
+ std::vector<uint64_t>* ips, std::vector<uint64_t>* sps);
#endif // SIMPLE_PERF_DWARF_UNWIND_H_
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index 3b9121c0..44dcd2c9 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -111,4 +111,7 @@ static const std::string PERF_DATA_WITH_WRONG_IP_IN_CALLCHAIN = "wrong_ip_callch
// generated by `simpleperf record --trace-offcpu --duration 2 -g ./simpleperf_runtest_run_and_sleep64`.
static const std::string PERF_DATA_WITH_TRACE_OFFCPU = "perf_with_trace_offcpu.data";
+// generated by `simpleperf record -g --log debug sleep 1`.
+static const std::string PERF_DATA_WITH_CALLCHAIN_RECORD = "perf_with_callchain_record.data";
+
#endif // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp
index 8c245f15..648b63a9 100644
--- a/simpleperf/nonlinux_support/nonlinux_support.cpp
+++ b/simpleperf/nonlinux_support/nonlinux_support.cpp
@@ -20,9 +20,9 @@
#include "dwarf_unwind.h"
#include "environment.h"
-std::vector<uint64_t> UnwindCallChain(int, const ThreadEntry&, const RegSet&,
- const char*, size_t, bool) {
- return std::vector<uint64_t>();
+bool UnwindCallChain(int, const ThreadEntry&, const RegSet&, const char*, size_t, bool,
+ std::vector<uint64_t>*, std::vector<uint64_t>*) {
+ return false;
}
bool GetKernelBuildId(BuildId*) {
diff --git a/simpleperf/perf_regs.cpp b/simpleperf/perf_regs.cpp
index 6aa8bad0..33c64917 100644
--- a/simpleperf/perf_regs.cpp
+++ b/simpleperf/perf_regs.cpp
@@ -233,3 +233,22 @@ bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
}
return GetRegValue(regs, regno, value);
}
+
+bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value) {
+ size_t regno;
+ switch (arch) {
+ case ARCH_X86_64:
+ case ARCH_X86_32:
+ regno = PERF_REG_X86_IP;
+ break;
+ case ARCH_ARM:
+ regno = PERF_REG_ARM_PC;
+ break;
+ case ARCH_ARM64:
+ regno = PERF_REG_ARM64_PC;
+ break;
+ default:
+ return false;
+ }
+ return GetRegValue(regs, regno, value);
+}
diff --git a/simpleperf/perf_regs.h b/simpleperf/perf_regs.h
index fd88de69..7fbae737 100644
--- a/simpleperf/perf_regs.h
+++ b/simpleperf/perf_regs.h
@@ -96,5 +96,6 @@ RegSet CreateRegSet(int abi, uint64_t valid_mask, const uint64_t* valid_regs);
bool GetRegValue(const RegSet& regs, size_t regno, uint64_t* value);
bool GetSpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
+bool GetIpRegValue(const RegSet& regs, ArchType arch, uint64_t* value);
#endif // SIMPLE_PERF_PERF_REGS_H_
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 4e7c9beb..8f4a53dc 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -28,6 +28,8 @@
#include "tracing.h"
#include "utils.h"
+using namespace simpleperf;
+
static std::string RecordTypeToString(int record_type) {
static std::unordered_map<int, std::string> record_type_names = {
{PERF_RECORD_MMAP, "mmap"},
@@ -46,6 +48,7 @@ static std::string RecordTypeToString(int record_type) {
{SIMPLE_PERF_RECORD_DSO, "dso"},
{SIMPLE_PERF_RECORD_SYMBOL, "symbol"},
{SIMPLE_PERF_RECORD_EVENT_ID, "event_id"},
+ {SIMPLE_PERF_RECORD_CALLCHAIN, "callchain"},
};
auto it = record_type_names.find(record_type);
@@ -616,6 +619,97 @@ size_t SampleRecord::ExcludeKernelCallChain() {
return user_callchain_length;
}
+bool SampleRecord::HasUserCallChain() const {
+ if ((sample_type & PERF_SAMPLE_CALLCHAIN) == 0) {
+ return false;
+ }
+ bool in_user_context = !InKernel();
+ for (size_t i = 0; i < callchain_data.ip_nr; ++i) {
+ if (in_user_context && callchain_data.ips[i] < PERF_CONTEXT_MAX) {
+ return true;
+ }
+ if (callchain_data.ips[i] == PERF_CONTEXT_USER) {
+ in_user_context = true;
+ }
+ }
+ return false;
+}
+
+void SampleRecord::UpdateUserCallChain(const std::vector<uint64_t>& user_ips) {
+ std::vector<uint64_t> kernel_ips;
+ for (size_t i = 0; i < callchain_data.ip_nr; ++i) {
+ if (callchain_data.ips[i] == PERF_CONTEXT_USER) {
+ break;
+ }
+ kernel_ips.push_back(callchain_data.ips[i]);
+ }
+ kernel_ips.push_back(PERF_CONTEXT_USER);
+ size_t new_size = size() - callchain_data.ip_nr * sizeof(uint64_t) +
+ (kernel_ips.size() + user_ips.size()) * sizeof(uint64_t);
+ if (new_size == size()) {
+ return;
+ }
+ char* new_binary = new char[new_size];
+ char* p = new_binary;
+ SetSize(new_size);
+ MoveToBinaryFormat(header, p);
+ if (sample_type & PERF_SAMPLE_IDENTIFIER) {
+ MoveToBinaryFormat(id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_IP) {
+ MoveToBinaryFormat(ip_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_TID) {
+ MoveToBinaryFormat(tid_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_TIME) {
+ MoveToBinaryFormat(time_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_ADDR) {
+ MoveToBinaryFormat(addr_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_ID) {
+ MoveToBinaryFormat(id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_STREAM_ID) {
+ MoveToBinaryFormat(stream_id_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_CPU) {
+ MoveToBinaryFormat(cpu_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_PERIOD) {
+ MoveToBinaryFormat(period_data, p);
+ }
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ callchain_data.ip_nr = kernel_ips.size() + user_ips.size();
+ MoveToBinaryFormat(callchain_data.ip_nr, p);
+ callchain_data.ips = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(kernel_ips.data(), kernel_ips.size(), p);
+ MoveToBinaryFormat(user_ips.data(), user_ips.size(), p);
+ }
+ if (sample_type & PERF_SAMPLE_RAW) {
+ MoveToBinaryFormat(raw_data.size, p);
+ MoveToBinaryFormat(raw_data.data, raw_data.size, p);
+ raw_data.data = p - raw_data.size;
+ }
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ MoveToBinaryFormat(branch_stack_data.stack_nr, p);
+ char* old_p = p;
+ MoveToBinaryFormat(branch_stack_data.stack, branch_stack_data.stack_nr, p);
+ branch_stack_data.stack = reinterpret_cast<BranchStackItemType*>(old_p);
+ }
+ if (sample_type & PERF_SAMPLE_REGS_USER) {
+ MoveToBinaryFormat(regs_user_data.abi, p);
+ CHECK_EQ(regs_user_data.abi, 0u);
+ }
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ MoveToBinaryFormat(stack_user_data.size, p);
+ CHECK_EQ(stack_user_data.size, 0u);
+ }
+ CHECK_EQ(p, new_binary + new_size) << "sample_type = " << std::hex << sample_type;
+ UpdateBinary(new_binary);
+}
+
void SampleRecord::DumpData(size_t indent) const {
PrintIndented(indent, "sample_type: 0x%" PRIx64 "\n", sample_type);
if (sample_type & PERF_SAMPLE_IP) {
@@ -918,6 +1012,65 @@ void EventIdRecord::DumpData(size_t indent) const {
}
}
+CallChainRecord::CallChainRecord(char* p) : Record(p) {
+ const char* end = p + size();
+ p += header_size();
+ MoveFromBinaryFormat(pid, p);
+ MoveFromBinaryFormat(tid, p);
+ MoveFromBinaryFormat(chain_type, p);
+ MoveFromBinaryFormat(time, p);
+ MoveFromBinaryFormat(ip_nr, p);
+ ips = reinterpret_cast<uint64_t*>(p);
+ p += ip_nr * sizeof(uint64_t);
+ sps = reinterpret_cast<uint64_t*>(p);
+ p += ip_nr * sizeof(uint64_t);
+ CHECK_EQ(p, end);
+}
+
+CallChainRecord::CallChainRecord(pid_t pid, pid_t tid, CallChainJoiner::ChainType type,
+ uint64_t time, const std::vector<uint64_t>& ips,
+ const std::vector<uint64_t>& sps) {
+ CHECK_EQ(ips.size(), sps.size());
+ SetTypeAndMisc(SIMPLE_PERF_RECORD_CALLCHAIN, 0);
+ this->pid = pid;
+ this->tid = tid;
+ this->chain_type = static_cast<int>(type);
+ this->time = time;
+ this->ip_nr = ips.size();
+ SetSize(header_size() + (4 + ips.size() * 2) * sizeof(uint64_t));
+ char* new_binary = new char[size()];
+ char* p = new_binary;
+ MoveToBinaryFormat(header, p);
+ MoveToBinaryFormat(this->pid, p);
+ MoveToBinaryFormat(this->tid, p);
+ MoveToBinaryFormat(this->chain_type, p);
+ MoveToBinaryFormat(this->time, p);
+ MoveToBinaryFormat(this->ip_nr, p);
+ this->ips = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(ips.data(), ips.size(), p);
+ this->sps = reinterpret_cast<uint64_t*>(p);
+ MoveToBinaryFormat(sps.data(), sps.size(), p);
+ UpdateBinary(new_binary);
+}
+
+void CallChainRecord::DumpData(size_t indent) const {
+ const char* type_name = "";
+ switch (chain_type) {
+ case CallChainJoiner::ORIGINAL_OFFLINE: type_name = "ORIGINAL_OFFLINE"; break;
+ case CallChainJoiner::ORIGINAL_REMOTE: type_name = "ORIGINAL_REMOTE"; break;
+ case CallChainJoiner::JOINED_OFFLINE: type_name = "JOINED_OFFLINE"; break;
+ case CallChainJoiner::JOINED_REMOTE: type_name = "JOINED_REMOTE"; break;
+ }
+ PrintIndented(indent, "pid %u\n", pid);
+ PrintIndented(indent, "tid %u\n", tid);
+ PrintIndented(indent, "chain_type %s\n", type_name);
+ PrintIndented(indent, "time %" PRIu64 "\n", time);
+ PrintIndented(indent, "ip_nr %" PRIu64 "\n", ip_nr);
+ for (size_t i = 0; i < ip_nr; ++i) {
+ PrintIndented(indent + 1, "ip 0x%" PRIx64 ", sp 0x%" PRIx64 "\n", ips[i], sps[i]);
+ }
+}
+
UnknownRecord::UnknownRecord(char* p) : Record(p) {
p += header_size();
data = p;
@@ -951,6 +1104,8 @@ std::unique_ptr<Record> ReadRecordFromBuffer(const perf_event_attr& attr, uint32
return std::unique_ptr<Record>(new SymbolRecord(p));
case SIMPLE_PERF_RECORD_EVENT_ID:
return std::unique_ptr<Record>(new EventIdRecord(p));
+ case SIMPLE_PERF_RECORD_CALLCHAIN:
+ return std::unique_ptr<Record>(new CallChainRecord(p));
default:
return std::unique_ptr<Record>(new UnknownRecord(p));
}
diff --git a/simpleperf/record.h b/simpleperf/record.h
index 83ddba6f..a2934598 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -28,6 +28,7 @@
#include <android-base/logging.h>
#include "build_id.h"
+#include "CallChainJoiner.h"
#include "perf_event.h"
enum user_record_type {
@@ -46,6 +47,7 @@ enum user_record_type {
SIMPLE_PERF_RECORD_SPLIT,
SIMPLE_PERF_RECORD_SPLIT_END,
SIMPLE_PERF_RECORD_EVENT_ID,
+ SIMPLE_PERF_RECORD_CALLCHAIN,
};
// perf_event_header uses u16 to store record size. However, that is not
@@ -389,6 +391,9 @@ struct SampleRecord : public Record {
void ReplaceRegAndStackWithCallChain(const std::vector<uint64_t>& ips);
size_t ExcludeKernelCallChain();
+ bool HasUserCallChain() const;
+ void UpdateUserCallChain(const std::vector<uint64_t>& user_ips);
+
uint64_t Timestamp() const override;
uint32_t Cpu() const override;
uint64_t Id() const override;
@@ -494,6 +499,28 @@ struct EventIdRecord : public Record {
void DumpData(size_t indent) const override;
};
+struct CallChainRecord : public Record {
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t chain_type;
+ uint64_t time;
+ uint64_t ip_nr;
+ uint64_t* ips;
+ uint64_t* sps;
+
+ explicit CallChainRecord(char* p);
+
+ CallChainRecord(pid_t pid, pid_t tid, simpleperf::CallChainJoiner::ChainType type, uint64_t time,
+ const std::vector<uint64_t>& ips, const std::vector<uint64_t>& sps);
+
+ uint64_t Timestamp() const override {
+ return time;
+ }
+
+ protected:
+ void DumpData(size_t indent) const override;
+};
+
// UnknownRecord is used for unknown record types, it makes sure all unknown
// records are not changed when modifying perf.data.
struct UnknownRecord : public Record {
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index 67f28dac..18194754 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -111,13 +111,12 @@ class SampleTreeBuilder {
RegSet regs = CreateRegSet(r.regs_user_data.abi,
r.regs_user_data.reg_mask,
r.regs_user_data.regs);
- std::vector<uint64_t> unwind_ips =
- UnwindCallChain(r.regs_user_data.abi, *thread, regs,
- r.stack_user_data.data,
- r.GetValidStackSize(), strict_unwind_arch_check_);
- if (!unwind_ips.empty()) {
+ std::vector<uint64_t> user_ips;
+ std::vector<uint64_t> sps;
+ if (UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data,
+ r.GetValidStackSize(), strict_unwind_arch_check_, &user_ips, &sps)) {
ips.push_back(PERF_CONTEXT_USER);
- ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
+ ips.insert(ips.end(), user_ips.begin(), user_ips.end());
}
}
diff --git a/simpleperf/testdata/perf_with_callchain_record.data b/simpleperf/testdata/perf_with_callchain_record.data
new file mode 100644
index 00000000..53184213
--- /dev/null
+++ b/simpleperf/testdata/perf_with_callchain_record.data
Binary files differ