diff options
author | Yabin Cui <yabinc@google.com> | 2023-05-01 15:09:30 -0700 |
---|---|---|
committer | Yabin Cui <yabinc@google.com> | 2023-05-03 17:22:35 -0700 |
commit | 3f9f8fd7db1bc2c683a5af605a4e6851b5dfbd21 (patch) | |
tree | 94b486e9cf4b42d1d924027c78946ffd2324662a | |
parent | 5a1b62653125618524809dd0b8536242ddc02dc5 (diff) | |
download | extras-3f9f8fd7db1bc2c683a5af605a4e6851b5dfbd21.tar.gz |
simpleperf: Add --decode-etm in the record cmd
When used, simpleperf decodes etm data while recording.
Bug: 279094308
Test: run simpleperf_unit_test
(cherry picked from https://android-review.googlesource.com/q/commit:290e9c4ed8f7547780a364626de95284ca946a07)
Merged-In: I13dac6ec298c3731aac5428e88f41997f9a922d8
Change-Id: I13dac6ec298c3731aac5428e88f41997f9a922d8
-rw-r--r-- | simpleperf/ETMBranchListFile.cpp | 207 | ||||
-rw-r--r-- | simpleperf/ETMBranchListFile.h | 10 | ||||
-rw-r--r-- | simpleperf/cmd_record.cpp | 98 | ||||
-rw-r--r-- | simpleperf/cmd_record_impl.h | 1 | ||||
-rw-r--r-- | simpleperf/cmd_record_test.cpp | 8 | ||||
-rw-r--r-- | simpleperf/dso.cpp | 26 | ||||
-rw-r--r-- | simpleperf/dso.h | 1 | ||||
-rw-r--r-- | simpleperf/nonlinux_support/nonlinux_support.cpp | 12 | ||||
-rw-r--r-- | simpleperf/record.h | 1 |
9 files changed, 313 insertions, 51 deletions
diff --git a/simpleperf/ETMBranchListFile.cpp b/simpleperf/ETMBranchListFile.cpp index 33d6d228..a3dd8fda 100644 --- a/simpleperf/ETMBranchListFile.cpp +++ b/simpleperf/ETMBranchListFile.cpp @@ -160,4 +160,211 @@ bool StringToBranchListBinaryMap(const std::string& s, BranchListBinaryMap& bina return true; } +class ETMThreadTreeWhenRecording : public ETMThreadTree { + public: + ETMThreadTreeWhenRecording(bool dump_maps_from_proc) + : dump_maps_from_proc_(dump_maps_from_proc) {} + + ThreadTree& GetThreadTree() { return thread_tree_; } + + const ThreadEntry* FindThread(int tid) override { + const ThreadEntry* thread = thread_tree_.FindThread(tid); + if (thread == nullptr) { + if (dump_maps_from_proc_) { + thread = FindThreadFromProc(tid); + } + if (thread == nullptr) { + return nullptr; + } + } + + if (dump_maps_from_proc_) { + DumpMapsFromProc(thread->pid); + } + return thread; + } + + void DisableThreadExitRecords() override { thread_tree_.DisableThreadExitRecords(); } + const MapSet& GetKernelMaps() override { return thread_tree_.GetKernelMaps(); } + + private: + const ThreadEntry* FindThreadFromProc(int tid) { + std::string comm; + pid_t pid; + if (ReadThreadNameAndPid(tid, &comm, &pid)) { + thread_tree_.SetThreadName(pid, tid, comm); + return thread_tree_.FindThread(tid); + } + return nullptr; + } + + void DumpMapsFromProc(int pid) { + if (dumped_processes_.count(pid) == 0) { + dumped_processes_.insert(pid); + std::vector<ThreadMmap> maps; + if (GetThreadMmapsInProcess(pid, &maps)) { + for (const auto& map : maps) { + thread_tree_.AddThreadMap(pid, pid, map.start_addr, map.len, map.pgoff, map.name); + } + } + } + } + + ThreadTree thread_tree_; + bool dump_maps_from_proc_; + std::unordered_set<int> dumped_processes_; +}; + +class ETMBranchListGeneratorImpl : public ETMBranchListGenerator { + public: + ETMBranchListGeneratorImpl(bool dump_maps_from_proc) : thread_tree_(dump_maps_from_proc) {} + + bool ProcessRecord(const Record& r, bool& consumed) override; + BranchListBinaryMap GetBranchListBinaryMap() override; + + private: + struct AuxRecordData { + uint64_t start; + uint64_t end; + bool formatted; + AuxRecordData(uint64_t start, uint64_t end, bool formatted) + : start(start), end(end), formatted(formatted) {} + }; + + struct PerCpuData { + std::vector<uint8_t> aux_data; + uint64_t data_offset = 0; + std::queue<AuxRecordData> aux_records; + }; + + bool ProcessAuxRecord(const AuxRecord& r); + bool ProcessAuxTraceRecord(const AuxTraceRecord& r); + void ProcessBranchList(const ETMBranchList& branch_list); + + ETMThreadTreeWhenRecording thread_tree_; + uint64_t kernel_map_start_addr_ = 0; + std::map<uint32_t, PerCpuData> cpu_map_; + std::unique_ptr<ETMDecoder> etm_decoder_; + std::unordered_map<Dso*, BranchListBinaryInfo> branch_list_binary_map_; +}; + +bool ETMBranchListGeneratorImpl::ProcessRecord(const Record& r, bool& consumed) { + consumed = true; // No need to store any records. + uint32_t type = r.type(); + if (type == PERF_RECORD_AUXTRACE_INFO) { + etm_decoder_ = ETMDecoder::Create(*static_cast<const AuxTraceInfoRecord*>(&r), thread_tree_); + if (!etm_decoder_) { + return false; + } + etm_decoder_->RegisterCallback( + [this](const ETMBranchList& branch) { ProcessBranchList(branch); }); + return true; + } + if (type == PERF_RECORD_AUX) { + return ProcessAuxRecord(*static_cast<const AuxRecord*>(&r)); + } + if (type == PERF_RECORD_AUXTRACE) { + return ProcessAuxTraceRecord(*static_cast<const AuxTraceRecord*>(&r)); + } + if (type == PERF_RECORD_MMAP && r.InKernel()) { + auto& mmap_r = *static_cast<const MmapRecord*>(&r); + if (android::base::StartsWith(mmap_r.filename, DEFAULT_KERNEL_MMAP_NAME)) { + kernel_map_start_addr_ = mmap_r.data->addr; + } + } + thread_tree_.GetThreadTree().Update(r); + return true; +} + +bool ETMBranchListGeneratorImpl::ProcessAuxRecord(const AuxRecord& r) { + OverflowResult result = SafeAdd(r.data->aux_offset, r.data->aux_size); + if (result.overflow || r.data->aux_size > SIZE_MAX) { + LOG(ERROR) << "invalid aux record"; + return false; + } + size_t size = r.data->aux_size; + uint64_t start = r.data->aux_offset; + uint64_t end = result.value; + PerCpuData& data = cpu_map_[r.Cpu()]; + if (start >= data.data_offset && end <= data.data_offset + data.aux_data.size()) { + // The ETM data is available. Process it now. + uint8_t* p = data.aux_data.data() + (start - data.data_offset); + if (!etm_decoder_) { + LOG(ERROR) << "ETMDecoder isn't created"; + return false; + } + return etm_decoder_->ProcessData(p, size, !r.Unformatted(), r.Cpu()); + } + // The ETM data isn't available. Put the aux record into queue. + data.aux_records.emplace(start, end, !r.Unformatted()); + return true; +} + +bool ETMBranchListGeneratorImpl::ProcessAuxTraceRecord(const AuxTraceRecord& r) { + OverflowResult result = SafeAdd(r.data->offset, r.data->aux_size); + if (result.overflow || r.data->aux_size > SIZE_MAX) { + LOG(ERROR) << "invalid auxtrace record"; + return false; + } + size_t size = r.data->aux_size; + uint64_t start = r.data->offset; + uint64_t end = result.value; + PerCpuData& data = cpu_map_[r.Cpu()]; + data.data_offset = start; + CHECK(r.location.addr != nullptr); + data.aux_data.resize(size); + memcpy(data.aux_data.data(), r.location.addr, size); + + // Process cached aux records. + while (!data.aux_records.empty() && data.aux_records.front().start < end) { + const AuxRecordData& aux = data.aux_records.front(); + if (aux.start >= start && aux.end <= end) { + uint8_t* p = data.aux_data.data() + (aux.start - start); + if (!etm_decoder_) { + LOG(ERROR) << "ETMDecoder isn't created"; + return false; + } + if (!etm_decoder_->ProcessData(p, aux.end - aux.start, aux.formatted, r.Cpu())) { + return false; + } + } + data.aux_records.pop(); + } + return true; +} + +void ETMBranchListGeneratorImpl::ProcessBranchList(const ETMBranchList& branch_list) { + auto& branch_map = branch_list_binary_map_[branch_list.dso].branch_map; + ++branch_map[branch_list.addr][branch_list.branch]; +} + +BranchListBinaryMap ETMBranchListGeneratorImpl::GetBranchListBinaryMap() { + BranchListBinaryMap binary_map; + for (auto& p : branch_list_binary_map_) { + Dso* dso = p.first; + BranchListBinaryInfo& binary = p.second; + binary.dso_type = dso->type(); + BuildId build_id; + GetBuildId(*dso, build_id); + BinaryKey key(dso->Path(), build_id); + if (binary.dso_type == DSO_KERNEL) { + if (kernel_map_start_addr_ == 0) { + LOG(WARNING) << "Can't convert kernel ip addresses without kernel start addr. So remove " + "branches for the kernel."; + continue; + } + key.kernel_start_addr = kernel_map_start_addr_; + } + binary_map[key] = std::move(binary); + } + return binary_map; +} + +std::unique_ptr<ETMBranchListGenerator> ETMBranchListGenerator::Create(bool dump_maps_from_proc) { + return std::unique_ptr<ETMBranchListGenerator>( + new ETMBranchListGeneratorImpl(dump_maps_from_proc)); +} + +ETMBranchListGenerator::~ETMBranchListGenerator() {} + } // namespace simpleperf diff --git a/simpleperf/ETMBranchListFile.h b/simpleperf/ETMBranchListFile.h index 47f04b9b..bb6d7335 100644 --- a/simpleperf/ETMBranchListFile.h +++ b/simpleperf/ETMBranchListFile.h @@ -104,6 +104,16 @@ using BranchListBinaryMap = std::unordered_map<BinaryKey, BranchListBinaryInfo, bool BranchListBinaryMapToString(const BranchListBinaryMap& binary_map, std::string& s); bool StringToBranchListBinaryMap(const std::string& s, BranchListBinaryMap& binary_map); +// Convert ETM data into branch lists while recording. +class ETMBranchListGenerator { + public: + static std::unique_ptr<ETMBranchListGenerator> Create(bool dump_maps_from_proc); + + virtual ~ETMBranchListGenerator(); + virtual bool ProcessRecord(const Record& r, bool& consumed) = 0; + virtual BranchListBinaryMap GetBranchListBinaryMap() = 0; +}; + // for testing std::string BranchToProtoString(const std::vector<bool>& branch); std::vector<bool> ProtoStringToBranch(const std::string& s, size_t bit_size); diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp index 2d0d26bc..4ef2b7ff 100644 --- a/simpleperf/cmd_record.cpp +++ b/simpleperf/cmd_record.cpp @@ -49,6 +49,7 @@ #include <unwindstack/Error.h> #include "CallChainJoiner.h" +#include "ETMBranchListFile.h" #include "ETMRecorder.h" #include "IOEventLoop.h" #include "JITDebugReader.h" @@ -239,29 +240,9 @@ class RecordCommand : public Command { " will be used.\n" "--user-buffer-size <buffer_size> Set buffer size in userspace to cache sample data.\n" " By default, it is %s.\n" -"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n" -" Need to be power of 2 and page size aligned.\n" -" Used memory size is (buffer_size * (cpu_count + 1).\n" -" Default is 4M.\n" "--no-inherit Don't record created child threads/processes.\n" "--cpu-percent <percent> Set the max percent of cpu time used for recording.\n" " percent is in range [1-100], default is 25.\n" -"--addr-filter filter_str1,filter_str2,...\n" -" Provide address filters for cs-etm instruction tracing.\n" -" filter_str accepts below formats:\n" -" 'filter <addr-range>' -- trace instructions in a range\n" -" 'start <addr>' -- start tracing when ip is <addr>\n" -" 'stop <addr>' -- stop tracing when ip is <addr>\n" -" <addr-range> accepts below formats:\n" -" <file_path> -- code sections in a binary file\n" -" <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n" -" <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n" -" <addr> accepts below formats:\n" -" <vaddr>@<file_path> -- virtual addr in a binary file\n" -" <kernel_addr> -- a kernel address\n" -" Examples:\n" -" 'filter 0x456-0x480@/system/lib/libc.so'\n" -" 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n" "\n" "--tp-filter filter_string Set filter_string for the previous tracepoint event.\n" " Format is in Documentation/trace/events.rst in the kernel.\n" @@ -308,6 +289,29 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING " debug information, which are used for unwinding and dumping symbols.\n" "--add-meta-info key=value Add extra meta info, which will be stored in the recording file.\n" "\n" +"ETM recording options:\n" +"--addr-filter filter_str1,filter_str2,...\n" +" Provide address filters for cs-etm instruction tracing.\n" +" filter_str accepts below formats:\n" +" 'filter <addr-range>' -- trace instructions in a range\n" +" 'start <addr>' -- start tracing when ip is <addr>\n" +" 'stop <addr>' -- stop tracing when ip is <addr>\n" +" <addr-range> accepts below formats:\n" +" <file_path> -- code sections in a binary file\n" +" <vaddr_start>-<vaddr_end>@<file_path> -- part of a binary file\n" +" <kernel_addr_start>-<kernel_addr_end> -- part of kernel space\n" +" <addr> accepts below formats:\n" +" <vaddr>@<file_path> -- virtual addr in a binary file\n" +" <kernel_addr> -- a kernel address\n" +" Examples:\n" +" 'filter 0x456-0x480@/system/lib/libc.so'\n" +" 'start 0x456@/system/lib/libc.so,stop 0x480@/system/lib/libc.so'\n" +"--aux-buffer-size <buffer_size> Set aux buffer size, only used in cs-etm event type.\n" +" Need to be power of 2 and page size aligned.\n" +" Used memory size is (buffer_size * (cpu_count + 1).\n" +" Default is 4M.\n" +"--decode-etm Convert ETM data into branch lists while recording.\n" +"\n" "Other options:\n" "--exit-with-parent Stop recording when the thread starting simpleperf dies.\n" "--use-cmd-exit-code Exit with the same exit code as the monitored cmdline.\n" @@ -468,6 +472,8 @@ RECORD_FILTER_OPTION_HELP_MSG_FOR_RECORDING std::unordered_map<std::string, std::string> extra_meta_info_; bool use_cmd_exit_code_ = false; std::vector<std::string> add_counters_; + + std::unique_ptr<ETMBranchListGenerator> etm_branch_list_generator_; }; std::string RecordCommand::LongHelpString() const { @@ -1004,6 +1010,10 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args, return false; } + if (options.PullBoolValue("--decode-etm")) { + etm_branch_list_generator_ = ETMBranchListGenerator::Create(system_wide_collection_); + } + if (!options.PullDoubleValue("--duration", &duration_in_sec_, 1e-9)) { return false; } @@ -1443,9 +1453,11 @@ bool RecordCommand::DumpMaps() { // For system wide recording: // If not aux tracing, only dump kernel maps. Maps of a process is dumped when needed (the // first time a sample hits that process). - // If aux tracing, we don't know which maps will be needed, so dump all process maps. To - // reduce pre recording time, we dump process maps in map record thread while recording. - if (event_selection_set_.HasAuxTrace()) { + // If aux tracing with decoding etm data, the maps are dumped by etm_branch_list_generator. + // If aux tracing without decoding etm data, we don't know which maps will be needed, so dump + // all process maps. To reduce pre recording time, we dump process maps in map record thread + // while recording. + if (event_selection_set_.HasAuxTrace() && !etm_branch_list_generator_) { map_record_thread_.emplace(*map_record_reader_); return true; } @@ -1504,6 +1516,15 @@ bool RecordCommand::ProcessRecord(Record* record) { return true; } } + if (etm_branch_list_generator_) { + bool consumed = false; + if (!etm_branch_list_generator_->ProcessRecord(*record, consumed)) { + return false; + } + if (consumed) { + return true; + } + } if (unwind_dwarf_callchain_) { if (post_unwind_) { return SaveRecordForPostUnwinding(record); @@ -2034,34 +2055,9 @@ bool RecordCommand::DumpBuildIdFeature() { if (!dso->HasDumpId() && !event_selection_set_.HasAuxTrace()) { continue; } - if (dso->type() == DSO_KERNEL) { - if (!GetKernelBuildId(&build_id)) { - continue; - } - build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path())); - } else if (dso->type() == DSO_KERNEL_MODULE) { - bool has_build_id = false; - if (android::base::EndsWith(dso->Path(), ".ko")) { - has_build_id = GetBuildIdFromDsoPath(dso->Path(), &build_id); - } else if (const std::string& path = dso->Path(); - path.size() > 2 && path[0] == '[' && path.back() == ']') { - // For kernel modules that we can't find the corresponding file, read build id from /sysfs. - has_build_id = GetModuleBuildId(path.substr(1, path.size() - 2), &build_id); - } - if (has_build_id) { - build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, dso->Path())); - } else { - LOG(DEBUG) << "Can't read build_id for module " << dso->Path(); - } - } else if (dso->type() == DSO_ELF_FILE) { - if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP || dso->IsForJavaMethod()) { - continue; - } - if (!GetBuildIdFromDsoPath(dso->Path(), &build_id)) { - LOG(DEBUG) << "Can't read build_id from file " << dso->Path(); - continue; - } - build_id_records.push_back(BuildIdRecord(false, UINT_MAX, build_id, dso->Path())); + if (GetBuildId(*dso, build_id)) { + bool in_kernel = dso->type() == DSO_KERNEL || dso->type() == DSO_KERNEL_MODULE; + build_id_records.emplace_back(in_kernel, UINT_MAX, build_id, dso->Path()); } } if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) { diff --git a/simpleperf/cmd_record_impl.h b/simpleperf/cmd_record_impl.h index 846bfbc9..25f69105 100644 --- a/simpleperf/cmd_record_impl.h +++ b/simpleperf/cmd_record_impl.h @@ -47,6 +47,7 @@ inline const OptionFormatMap& GetRecordCmdOptionFormats() { {"--clockid", {OptionValueType::STRING, OptionType::SINGLE, AppRunnerType::ALLOWED}}, {"--cpu", {OptionValueType::STRING, OptionType::SINGLE, AppRunnerType::ALLOWED}}, {"--cpu-percent", {OptionValueType::UINT, OptionType::SINGLE, AppRunnerType::ALLOWED}}, + {"--decode-etm", {OptionValueType::NONE, OptionType::SINGLE, AppRunnerType::ALLOWED}}, {"--duration", {OptionValueType::DOUBLE, OptionType::SINGLE, AppRunnerType::ALLOWED}}, {"-e", {OptionValueType::STRING, OptionType::ORDERED, AppRunnerType::ALLOWED}}, {"--exclude-perf", {OptionValueType::NONE, OptionType::SINGLE, AppRunnerType::ALLOWED}}, diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp index 395bfd19..4e06cd68 100644 --- a/simpleperf/cmd_record_test.cpp +++ b/simpleperf/cmd_record_test.cpp @@ -1056,6 +1056,14 @@ TEST(record_cmd, addr_filter_option) { ASSERT_TRUE(RunRecordCmd({"-e", "cs-etm", "--addr-filter", filter})); } +TEST(record_cmd, decode_etm_option) { + if (!ETMRecorder::GetInstance().CheckEtmSupport().ok()) { + GTEST_LOG_(INFO) << "Omit this test since etm isn't supported on this device"; + return; + } + ASSERT_TRUE(RunRecordCmd({"-e", "cs-etm", "--decode-etm"})); +} + TEST(record_cmd, pmu_event_option) { TEST_REQUIRE_PMU_COUNTER(); TEST_REQUIRE_HW_COUNTER(); diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp index c8f4b3a1..288f75ed 100644 --- a/simpleperf/dso.cpp +++ b/simpleperf/dso.cpp @@ -1025,4 +1025,30 @@ bool GetBuildIdFromDsoPath(const std::string& dso_path, BuildId* build_id) { return false; } +bool GetBuildId(const Dso& dso, BuildId& build_id) { + if (dso.type() == DSO_KERNEL) { + if (GetKernelBuildId(&build_id)) { + return true; + } + } else if (dso.type() == DSO_KERNEL_MODULE) { + bool has_build_id = false; + if (android::base::EndsWith(dso.Path(), ".ko")) { + return GetBuildIdFromDsoPath(dso.Path(), &build_id); + } + if (const std::string& path = dso.Path(); + path.size() > 2 && path[0] == '[' && path.back() == ']') { + // For kernel modules that we can't find the corresponding file, read build id from /sysfs. + return GetModuleBuildId(path.substr(1, path.size() - 2), &build_id); + } + } else if (dso.type() == DSO_ELF_FILE) { + if (dso.Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP || dso.IsForJavaMethod()) { + return false; + } + if (GetBuildIdFromDsoPath(dso.Path(), &build_id)) { + return true; + } + } + return false; +} + } // namespace simpleperf diff --git a/simpleperf/dso.h b/simpleperf/dso.h index 30427766..41cab757 100644 --- a/simpleperf/dso.h +++ b/simpleperf/dso.h @@ -228,6 +228,7 @@ class Dso { const char* DsoTypeToString(DsoType dso_type); bool GetBuildIdFromDsoPath(const std::string& dso_path, BuildId* build_id); +bool GetBuildId(const Dso& dso, BuildId& build_id); } // namespace simpleperf diff --git a/simpleperf/nonlinux_support/nonlinux_support.cpp b/simpleperf/nonlinux_support/nonlinux_support.cpp index 23471f6a..e7084fef 100644 --- a/simpleperf/nonlinux_support/nonlinux_support.cpp +++ b/simpleperf/nonlinux_support/nonlinux_support.cpp @@ -23,10 +23,22 @@ namespace simpleperf { +bool GetThreadMmapsInProcess(pid_t, std::vector<ThreadMmap>*) { + return false; +} + bool GetKernelBuildId(BuildId*) { return false; } +bool GetModuleBuildId(const std::string&, BuildId*, const std::string&) { + return false; +} + +bool ReadThreadNameAndPid(pid_t, std::string*, pid_t*) { + return false; +} + bool CanRecordRawData() { return false; } diff --git a/simpleperf/record.h b/simpleperf/record.h index 54ce89df..925eaaa0 100644 --- a/simpleperf/record.h +++ b/simpleperf/record.h @@ -550,6 +550,7 @@ struct AuxTraceRecord : public Record { AuxTraceRecord(uint64_t aux_size, uint64_t offset, uint32_t idx, uint32_t tid, uint32_t cpu); bool Parse(const perf_event_attr& attr, char* p, char* end) override; + uint32_t Cpu() const override { return data->cpu; } static size_t Size() { return sizeof(perf_event_header) + sizeof(DataType); } protected: |