/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "command.h" #include "dwarf_unwind.h" #include "environment.h" #include "event_selection_set.h" #include "event_type.h" #include "IOEventLoop.h" #include "perf_clock.h" #include "read_apk.h" #include "read_elf.h" #include "record.h" #include "record_file.h" #include "thread_tree.h" #include "tracing.h" #include "utils.h" #include "workload.h" static std::string default_measured_event_type = "cpu-cycles"; static std::unordered_map branch_sampling_type_map = { {"u", PERF_SAMPLE_BRANCH_USER}, {"k", PERF_SAMPLE_BRANCH_KERNEL}, {"any", PERF_SAMPLE_BRANCH_ANY}, {"any_call", PERF_SAMPLE_BRANCH_ANY_CALL}, {"any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN}, {"ind_call", PERF_SAMPLE_BRANCH_IND_CALL}, }; // The max size of records dumped by kernel is 65535, and dump stack size // should be a multiply of 8, so MAX_DUMP_STACK_SIZE is 65528. constexpr uint32_t MAX_DUMP_STACK_SIZE = 65528; // The max allowed pages in mapped buffer is decided by rlimit(RLIMIT_MEMLOCK). // Here 1024 is a desired value for pages in mapped buffer. If mapped // successfully, the buffer size = 1024 * 4K (page size) = 4M. constexpr size_t DESIRED_PAGES_IN_MAPPED_BUFFER = 1024; class RecordCommand : public Command { public: RecordCommand() : Command( "record", "record sampling info in perf.data", // clang-format off "Usage: simpleperf record [options] [command [command-args]]\n" " Gather sampling information of running [command]. And -a/-p/-t option\n" " can be used to change target of sampling information.\n" " The default options are: -e cpu-cycles -f 4000 -o perf.data.\n" "-a System-wide collection.\n" #if defined(__ANDROID__) "--app package_name Profile the process of an Android application.\n" " On non-rooted devices, the app must be debuggable,\n" " because we use run-as to switch to the app's context.\n" #endif "-b Enable take branch stack sampling. Same as '-j any'\n" "-c count Set event sample period. It means recording one sample when\n" " [count] events happen. Can't be used with -f/-F option.\n" " For tracepoint events, the default option is -c 1.\n" "--call-graph fp | dwarf[,]\n" " Enable call graph recording. Use frame pointer or dwarf debug\n" " frame as the method to parse call graph in stack.\n" " Default is dwarf,65528.\n" "--cpu cpu_item1,cpu_item2,...\n" " Collect samples only on the selected cpus. cpu_item can be cpu\n" " number like 1, or cpu range like 0-3.\n" "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n" " [command]. Here time_in_sec may be any positive\n" " floating point number.\n" "-e event1[:modifier1],event2[:modifier2],...\n" " Select the event list to sample. Use `simpleperf list` to find\n" " all possible event names. Modifiers can be added to define how\n" " the event should be monitored.\n" " Possible modifiers are:\n" " u - monitor user space events only\n" " k - monitor kernel space events only\n" "-f freq Set event sample frequency. It means recording at most [freq]\n" " samples every second. For non-tracepoint events, the default\n" " option is -f 4000.\n" "-F freq Same as '-f freq'.\n" "-g Same as '--call-graph dwarf'.\n" "--group event1[:modifier],event2[:modifier2],...\n" " Similar to -e option. But events specified in the same --group\n" " option are monitored as a group, and scheduled in and out at the\n" " same time.\n" "-j branch_filter1,branch_filter2,...\n" " Enable taken branch stack sampling. Each sample captures a series\n" " of consecutive taken branches.\n" " The following filters are defined:\n" " any: any type of branch\n" " any_call: any function call or system call\n" " any_ret: any function return or system call return\n" " ind_call: any indirect branch\n" " u: only when the branch target is at the user level\n" " k: only when the branch target is in the kernel\n" " This option requires at least one branch type among any, any_call,\n" " any_ret, ind_call.\n" "-m mmap_pages Set the size of the buffer used to receiving sample data from\n" " the kernel. It should be a power of 2. If not set, the max\n" " possible value <= 1024 will be used.\n" "--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n" " kernel symbols will be dumped when needed.\n" "--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n" " dumped in perf.data, to support reporting in another\n" " environment.\n" "--no-inherit Don't record created child threads/processes.\n" "--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n" " will be unwound by default. Use this option to disable the\n" " unwinding of the user's stack.\n" "-o record_file_name Set record file name, default is perf.data.\n" "-p pid1,pid2,... Record events on existing processes. Mutually exclusive\n" " with -a.\n" "--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n" " will be unwound while recording by default. But it may lose\n" " records as stacking unwinding can be time consuming. Use this\n" " option to unwind the user's stack after recording.\n" "--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n" " , then close .\n" "--symfs Look for files with symbols relative to this directory.\n" " This option is used to provide files with symbol table and\n" " debug information, which are used for unwinding and dumping symbols.\n" "-t tid1,tid2,... Record events on existing threads. Mutually exclusive with -a.\n" #if 0 // Below options are only used internally and shouldn't be visible to the public. "--in-app We are already running in the app's context.\n" "--tracepoint-events file_name Read tracepoint events from [file_name] instead of tracefs.\n" #endif // clang-format on ), use_sample_freq_(false), sample_freq_(0), use_sample_period_(false), sample_period_(0), system_wide_collection_(false), branch_sampling_(0), fp_callchain_sampling_(false), dwarf_callchain_sampling_(false), dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE), unwind_dwarf_callchain_(true), post_unwind_(false), child_inherit_(true), duration_in_sec_(0), can_dump_kernel_symbols_(true), dump_symbols_(true), event_selection_set_(false), mmap_page_range_(std::make_pair(1, DESIRED_PAGES_IN_MAPPED_BUFFER)), record_filename_("perf.data"), start_sampling_time_in_ns_(0), sample_record_count_(0), lost_record_count_(0), start_profiling_fd_(-1), in_app_context_(false) { // Stop profiling if parent exits. prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0); app_package_name_ = GetDefaultAppPackageName(); } bool Run(const std::vector& args); private: bool ParseOptions(const std::vector& args, std::vector* non_option_args); bool SetEventSelectionFlags(); bool CreateAndInitRecordFile(); std::unique_ptr CreateRecordFile( const std::string& filename); bool DumpKernelSymbol(); bool DumpTracingData(); bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id); bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id); bool ProcessRecord(Record* record); void UpdateRecordForEmbeddedElfPath(Record* record); bool UnwindRecord(Record* record); bool PostUnwind(const std::vector& args); bool DumpAdditionalFeatures(const std::vector& args); bool DumpBuildIdFeature(); bool DumpFileFeature(); void CollectHitFileInfo(const SampleRecord& r); bool use_sample_freq_; uint64_t sample_freq_; // Sample 'sample_freq_' times per second. bool use_sample_period_; uint64_t sample_period_; // Sample once when 'sample_period_' events occur. bool system_wide_collection_; uint64_t branch_sampling_; bool fp_callchain_sampling_; bool dwarf_callchain_sampling_; uint32_t dump_stack_size_in_dwarf_sampling_; bool unwind_dwarf_callchain_; bool post_unwind_; bool child_inherit_; double duration_in_sec_; bool can_dump_kernel_symbols_; bool dump_symbols_; std::vector cpus_; EventSelectionSet event_selection_set_; std::pair mmap_page_range_; ThreadTree thread_tree_; std::string record_filename_; std::unique_ptr record_file_writer_; uint64_t start_sampling_time_in_ns_; // nanoseconds from machine starting uint64_t sample_record_count_; uint64_t lost_record_count_; int start_profiling_fd_; std::string app_package_name_; bool in_app_context_; }; bool RecordCommand::Run(const std::vector& args) { if (!CheckPerfEventLimit()) { return false; } // 1. Parse options, and use default measured event type if not given. std::vector workload_args; if (!ParseOptions(args, &workload_args)) { return false; } if (!app_package_name_.empty() && !in_app_context_) { // Some users want to profile non debuggable apps on rooted devices. If we use run-as, // it will be impossible when using --app. So don't switch to app's context when we are // root. if (!IsRoot()) { return RunInAppContext(app_package_name_, "record", args, workload_args.size(), record_filename_, !event_selection_set_.GetTracepointEvents().empty()); } } if (event_selection_set_.empty()) { if (!event_selection_set_.AddEventType(default_measured_event_type)) { return false; } } if (!SetEventSelectionFlags()) { return false; } // 2. Do some environment preparation. ScopedCurrentArch scoped_arch(GetMachineArch()); if (!InitPerfClock()) { return false; } PrepareVdsoFile(); // 3. Create workload. std::unique_ptr workload; if (!workload_args.empty()) { workload = Workload::CreateWorkload(workload_args); if (workload == nullptr) { return false; } } bool need_to_check_targets = false; if (system_wide_collection_) { event_selection_set_.AddMonitoredThreads({-1}); } else if (!event_selection_set_.HasMonitoredTarget()) { if (workload != nullptr) { event_selection_set_.AddMonitoredProcesses({workload->GetPid()}); event_selection_set_.SetEnableOnExec(true); if (event_selection_set_.HasInplaceSampler()) { // Start worker early, because the worker process has to setup inplace-sampler server // before we try to connect it. if (!workload->Start()) { return false; } } } else if (!app_package_name_.empty()) { // If app process is not created, wait for it. This allows simpleperf starts before // app process. In this way, we can have a better support of app start-up time profiling. int pid = WaitForAppProcess(app_package_name_); event_selection_set_.AddMonitoredProcesses({pid}); } else { LOG(ERROR) << "No threads to monitor. Try `simpleperf help record` for help"; return false; } } else { need_to_check_targets = true; } // 4. Open perf_event_files, create mapped buffers for perf_event_files. if (!event_selection_set_.OpenEventFiles(cpus_)) { return false; } if (!event_selection_set_.MmapEventFiles(mmap_page_range_.first, mmap_page_range_.second)) { return false; } // 5. Create perf.data. if (!CreateAndInitRecordFile()) { return false; } // 6. Add read/signal/periodic Events. auto callback = std::bind(&RecordCommand::ProcessRecord, this, std::placeholders::_1); if (!event_selection_set_.PrepareToReadMmapEventData(callback)) { return false; } if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) { return false; } if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) { return false; } IOEventLoop* loop = event_selection_set_.GetIOEventLoop(); if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP}, [&]() { return loop->ExitLoop(); })) { return false; } if (duration_in_sec_ != 0) { if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_), [&]() { return loop->ExitLoop(); })) { return false; } } // 7. Write records in mapped buffers of perf_event_files to output file while // workload is running. start_sampling_time_in_ns_ = GetPerfClock(); LOG(VERBOSE) << "start_sampling_time is " << start_sampling_time_in_ns_ << " ns"; if (workload != nullptr && !workload->IsStarted() && !workload->Start()) { return false; } if (start_profiling_fd_ != -1) { if (!android::base::WriteStringToFd("STARTED", start_profiling_fd_)) { PLOG(ERROR) << "failed to write to start_profiling_fd_"; } close(start_profiling_fd_); } if (!loop->RunLoop()) { return false; } if (!event_selection_set_.FinishReadMmapEventData()) { return false; } // 8. Dump additional features, and close record file. if (!DumpAdditionalFeatures(args)) { return false; } if (!record_file_writer_->Close()) { return false; } // 9. Unwind dwarf callchain. if (post_unwind_) { if (!PostUnwind(args)) { return false; } } // 10. Show brief record result. LOG(INFO) << "Samples recorded: " << sample_record_count_ << ". Samples lost: " << lost_record_count_ << "."; if (sample_record_count_ + lost_record_count_ != 0) { double lost_percent = static_cast(lost_record_count_) / (lost_record_count_ + sample_record_count_); constexpr double LOST_PERCENT_WARNING_BAR = 0.1; if (lost_percent >= LOST_PERCENT_WARNING_BAR) { LOG(WARNING) << "Lost " << (lost_percent * 100) << "% of samples, " << "consider increasing mmap_pages(-m), " << "or decreasing sample frequency(-f), " << "or increasing sample period(-c)."; } } return true; } bool RecordCommand::ParseOptions(const std::vector& args, std::vector* non_option_args) { size_t i; for (i = 0; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) { if (args[i] == "-a") { system_wide_collection_ = true; } else if (args[i] == "--app") { if (!NextArgumentOrError(args, &i)) { return false; } app_package_name_ = args[i]; } else if (args[i] == "-b") { branch_sampling_ = branch_sampling_type_map["any"]; } else if (args[i] == "-c") { if (!NextArgumentOrError(args, &i)) { return false; } char* endptr; sample_period_ = strtoull(args[i].c_str(), &endptr, 0); if (*endptr != '\0' || sample_period_ == 0) { LOG(ERROR) << "Invalid sample period: '" << args[i] << "'"; return false; } use_sample_period_ = true; } else if (args[i] == "--call-graph") { if (!NextArgumentOrError(args, &i)) { return false; } std::vector strs = android::base::Split(args[i], ","); if (strs[0] == "fp") { fp_callchain_sampling_ = true; dwarf_callchain_sampling_ = false; } else if (strs[0] == "dwarf") { fp_callchain_sampling_ = false; dwarf_callchain_sampling_ = true; if (strs.size() > 1) { char* endptr; uint64_t size = strtoull(strs[1].c_str(), &endptr, 0); if (*endptr != '\0' || size > UINT_MAX) { LOG(ERROR) << "invalid dump stack size in --call-graph option: " << strs[1]; return false; } if ((size & 7) != 0) { LOG(ERROR) << "dump stack size " << size << " is not 8-byte aligned."; return false; } if (size >= MAX_DUMP_STACK_SIZE) { LOG(ERROR) << "dump stack size " << size << " is bigger than max allowed size " << MAX_DUMP_STACK_SIZE << "."; return false; } dump_stack_size_in_dwarf_sampling_ = static_cast(size); } } else { LOG(ERROR) << "unexpected argument for --call-graph option: " << args[i]; return false; } } else if (args[i] == "--cpu") { if (!NextArgumentOrError(args, &i)) { return false; } cpus_ = GetCpusFromString(args[i]); } else if (args[i] == "--duration") { if (!NextArgumentOrError(args, &i)) { return false; } if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_, 1e-9)) { LOG(ERROR) << "Invalid duration: " << args[i].c_str(); return false; } } else if (args[i] == "-e") { if (!NextArgumentOrError(args, &i)) { return false; } std::vector event_types = android::base::Split(args[i], ","); for (auto& event_type : event_types) { if (!event_selection_set_.AddEventType(event_type)) { return false; } } } else if (args[i] == "-f" || args[i] == "-F") { if (!NextArgumentOrError(args, &i)) { return false; } if (!android::base::ParseUint(args[i].c_str(), &sample_freq_)) { LOG(ERROR) << "Invalid sample frequency: " << args[i]; return false; } if (!CheckSampleFrequency(sample_freq_)) { return false; } use_sample_freq_ = true; } else if (args[i] == "-g") { fp_callchain_sampling_ = false; dwarf_callchain_sampling_ = true; } else if (args[i] == "--group") { if (!NextArgumentOrError(args, &i)) { return false; } std::vector event_types = android::base::Split(args[i], ","); if (!event_selection_set_.AddEventGroup(event_types)) { return false; } } else if (args[i] == "--in-app") { in_app_context_ = true; } else if (args[i] == "-j") { if (!NextArgumentOrError(args, &i)) { return false; } std::vector branch_sampling_types = android::base::Split(args[i], ","); for (auto& type : branch_sampling_types) { auto it = branch_sampling_type_map.find(type); if (it == branch_sampling_type_map.end()) { LOG(ERROR) << "unrecognized branch sampling filter: " << type; return false; } branch_sampling_ |= it->second; } } else if (args[i] == "-m") { if (!NextArgumentOrError(args, &i)) { return false; } char* endptr; uint64_t pages = strtoull(args[i].c_str(), &endptr, 0); if (*endptr != '\0' || !IsPowerOfTwo(pages)) { LOG(ERROR) << "Invalid mmap_pages: '" << args[i] << "'"; return false; } mmap_page_range_.first = mmap_page_range_.second = pages; } else if (args[i] == "--no-dump-kernel-symbols") { can_dump_kernel_symbols_ = false; } else if (args[i] == "--no-dump-symbols") { dump_symbols_ = false; } else if (args[i] == "--no-inherit") { child_inherit_ = false; } else if (args[i] == "--no-unwind") { unwind_dwarf_callchain_ = false; } else if (args[i] == "-o") { if (!NextArgumentOrError(args, &i)) { return false; } record_filename_ = args[i]; } else if (args[i] == "-p") { if (!NextArgumentOrError(args, &i)) { return false; } std::set pids; if (!GetValidThreadsFromThreadString(args[i], &pids)) { return false; } event_selection_set_.AddMonitoredProcesses(pids); } else if (args[i] == "--post-unwind") { post_unwind_ = true; } else if (args[i] == "--start_profiling_fd") { if (!NextArgumentOrError(args, &i)) { return false; } if (!android::base::ParseInt(args[i].c_str(), &start_profiling_fd_, 0)) { LOG(ERROR) << "Invalid start_profiling_fd: " << args[i]; return false; } } else if (args[i] == "--symfs") { if (!NextArgumentOrError(args, &i)) { return false; } if (!Dso::SetSymFsDir(args[i])) { return false; } } else if (args[i] == "-t") { if (!NextArgumentOrError(args, &i)) { return false; } std::set tids; if (!GetValidThreadsFromThreadString(args[i], &tids)) { return false; } event_selection_set_.AddMonitoredThreads(tids); } else if (args[i] == "--tracepoint-events") { if (!NextArgumentOrError(args, &i)) { return false; } if (!SetTracepointEventsFilePath(args[i])) { return false; } } else { ReportUnknownOption(args, i); return false; } } if (use_sample_freq_ && use_sample_period_) { LOG(ERROR) << "-f option can't be used with -c option."; return false; } if (!dwarf_callchain_sampling_) { if (!unwind_dwarf_callchain_) { LOG(ERROR) << "--no-unwind is only used with `--call-graph dwarf` option."; return false; } unwind_dwarf_callchain_ = false; } if (post_unwind_) { if (!dwarf_callchain_sampling_) { LOG(ERROR) << "--post-unwind is only used with `--call-graph dwarf` option."; return false; } if (!unwind_dwarf_callchain_) { LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option."; return false; } } if (fp_callchain_sampling_) { if (GetBuildArch() == ARCH_ARM) { LOG(WARNING) << "`--callgraph fp` option doesn't work well on arm architecture, " << "consider using `-g` option or profiling on aarch64 architecture."; } } if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) { LOG(ERROR) << "Record system wide and existing processes/threads can't be " "used at the same time."; return false; } if (system_wide_collection_ && !IsRoot()) { LOG(ERROR) << "System wide profiling needs root privilege."; return false; } if (dump_symbols_ && can_dump_kernel_symbols_) { // No need to dump kernel symbols as we will dump all required symbols. can_dump_kernel_symbols_ = false; } non_option_args->clear(); for (; i < args.size(); ++i) { non_option_args->push_back(args[i]); } return true; } bool RecordCommand::SetEventSelectionFlags() { if (use_sample_freq_) { event_selection_set_.SetSampleFreq(sample_freq_); } else if (use_sample_period_) { event_selection_set_.SetSamplePeriod(sample_period_); } else { event_selection_set_.UseDefaultSampleFreq(); } event_selection_set_.SampleIdAll(); if (!event_selection_set_.SetBranchSampling(branch_sampling_)) { return false; } if (fp_callchain_sampling_) { event_selection_set_.EnableFpCallChainSampling(); } else if (dwarf_callchain_sampling_) { if (!event_selection_set_.EnableDwarfCallChainSampling( dump_stack_size_in_dwarf_sampling_)) { return false; } } event_selection_set_.SetInherit(child_inherit_); return true; } bool RecordCommand::CreateAndInitRecordFile() { record_file_writer_ = CreateRecordFile(record_filename_); if (record_file_writer_ == nullptr) { return false; } // Use first perf_event_attr and first event id to dump mmap and comm records. EventAttrWithId attr_id = event_selection_set_.GetEventAttrWithId()[0]; if (!DumpKernelSymbol()) { return false; } if (!DumpTracingData()) { return false; } if (!DumpKernelAndModuleMmaps(*attr_id.attr, attr_id.ids[0])) { return false; } if (!DumpThreadCommAndMmaps(*attr_id.attr, attr_id.ids[0])) { return false; } return true; } std::unique_ptr RecordCommand::CreateRecordFile( const std::string& filename) { std::unique_ptr writer = RecordFileWriter::CreateInstance(filename); if (writer == nullptr) { return nullptr; } if (!writer->WriteAttrSection(event_selection_set_.GetEventAttrWithId())) { return nullptr; } return writer; } bool RecordCommand::DumpKernelSymbol() { if (can_dump_kernel_symbols_) { std::string kallsyms; if (event_selection_set_.NeedKernelSymbol() && CheckKernelSymbolAddresses()) { if (!android::base::ReadFileToString("/proc/kallsyms", &kallsyms)) { PLOG(ERROR) << "failed to read /proc/kallsyms"; return false; } KernelSymbolRecord r(kallsyms); if (!ProcessRecord(&r)) { return false; } } } return true; } bool RecordCommand::DumpTracingData() { std::vector tracepoint_event_types = event_selection_set_.GetTracepointEvents(); if (tracepoint_event_types.empty() || !CanRecordRawData()) { return true; // No need to dump tracing data, or can't do it. } std::vector tracing_data; if (!GetTracingData(tracepoint_event_types, &tracing_data)) { return false; } TracingDataRecord record(tracing_data); if (!ProcessRecord(&record)) { return false; } return true; } bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id) { KernelMmap kernel_mmap; std::vector module_mmaps; GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps); MmapRecord mmap_record(attr, true, UINT_MAX, 0, kernel_mmap.start_addr, kernel_mmap.len, 0, kernel_mmap.filepath, event_id); if (!ProcessRecord(&mmap_record)) { return false; } for (auto& module_mmap : module_mmaps) { MmapRecord mmap_record(attr, true, UINT_MAX, 0, module_mmap.start_addr, module_mmap.len, 0, module_mmap.filepath, event_id); if (!ProcessRecord(&mmap_record)) { return false; } } return true; } bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id) { // Decide which processes and threads to dump. // For system_wide profiling, dump all threads. // For non system wide profiling, build dump_threads. bool all_threads = system_wide_collection_; std::set dump_threads = event_selection_set_.GetMonitoredThreads(); for (const auto& pid : event_selection_set_.GetMonitoredProcesses()) { std::vector tids = GetThreadsInProcess(pid); dump_threads.insert(tids.begin(), tids.end()); } // Collect processes to dump. std::vector processes; if (all_threads) { processes = GetAllProcesses(); } else { std::set process_set; for (const auto& tid : dump_threads) { pid_t pid; if (!GetProcessForThread(tid, &pid)) { continue; } process_set.insert(pid); } processes.insert(processes.end(), process_set.begin(), process_set.end()); } // Dump each process and its threads. for (auto& pid : processes) { // Dump mmap records. std::vector thread_mmaps; if (!GetThreadMmapsInProcess(pid, &thread_mmaps)) { // The process may exit before we get its info. continue; } for (const auto& map : thread_mmaps) { if (map.executable == 0) { continue; // No need to dump non-executable mmap info. } MmapRecord record(attr, false, pid, pid, map.start_addr, map.len, map.pgoff, map.name, event_id); if (!ProcessRecord(&record)) { return false; } } // Dump process name. std::string name; if (GetThreadName(pid, &name)) { CommRecord record(attr, pid, pid, name, event_id, 0); if (!ProcessRecord(&record)) { return false; } } // Dump thread info. std::vector threads = GetThreadsInProcess(pid); for (const auto& tid : threads) { if (tid == pid) { continue; } if (all_threads || dump_threads.find(tid) != dump_threads.end()) { ForkRecord fork_record(attr, pid, tid, pid, pid, event_id); if (!ProcessRecord(&fork_record)) { return false; } if (GetThreadName(tid, &name)) { CommRecord comm_record(attr, pid, tid, name, event_id, 0); if (!ProcessRecord(&comm_record)) { return false; } } } } } return true; } bool RecordCommand::ProcessRecord(Record* record) { if (system_wide_collection_ && record->type() == PERF_RECORD_SAMPLE) { auto& r = *static_cast(record); // Omit samples get before start sampling time. if (r.time_data.time < start_sampling_time_in_ns_) { return true; } } UpdateRecordForEmbeddedElfPath(record); if (unwind_dwarf_callchain_ && !post_unwind_) { thread_tree_.Update(*record); if (!UnwindRecord(record)) { return false; } } if (record->type() == PERF_RECORD_SAMPLE) { sample_record_count_++; } else if (record->type() == PERF_RECORD_LOST) { lost_record_count_ += static_cast(record)->lost; } bool result = record_file_writer_->WriteRecord(*record); return result; } template void UpdateMmapRecordForEmbeddedElfPath(RecordType* record) { RecordType& r = *record; if (!r.InKernel() && r.data->pgoff != 0) { // For the case of a shared library "foobar.so" embedded // inside an APK, we rewrite the original MMAP from // ["path.apk" offset=X] to ["path.apk!/foobar.so" offset=W] // so as to make the library name explicit. This update is // done here (as part of the record operation) as opposed to // on the host during the report, since we want to report // the correct library name even if the the APK in question // is not present on the host. The new offset W is // calculated to be with respect to the start of foobar.so, // not to the start of path.apk. EmbeddedElf* ee = ApkInspector::FindElfInApkByOffset(r.filename, r.data->pgoff); if (ee != nullptr) { // Compute new offset relative to start of elf in APK. auto data = *r.data; data.pgoff -= ee->entry_offset(); r.SetDataAndFilename(data, GetUrlInApk(r.filename, ee->entry_name())); } } } void RecordCommand::UpdateRecordForEmbeddedElfPath(Record* record) { if (record->type() == PERF_RECORD_MMAP) { UpdateMmapRecordForEmbeddedElfPath(static_cast(record)); } else if (record->type() == PERF_RECORD_MMAP2) { UpdateMmapRecordForEmbeddedElfPath(static_cast(record)); } } bool RecordCommand::UnwindRecord(Record* record) { if (record->type() == PERF_RECORD_SAMPLE) { SampleRecord& r = *static_cast(record); if ((r.sample_type & PERF_SAMPLE_CALLCHAIN) && (r.sample_type & PERF_SAMPLE_REGS_USER) && (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) && (r.GetValidStackSize() > 0)) { ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); RegSet regs = CreateRegSet(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs); // Normally do strict arch check when unwinding stack. But allow unwinding // 32-bit processes on 64-bit devices for system wide profiling. bool strict_arch_check = !system_wide_collection_; std::vector unwind_ips = UnwindCallChain(r.regs_user_data.abi, *thread, regs, r.stack_user_data.data, r.GetValidStackSize(), strict_arch_check); r.ReplaceRegAndStackWithCallChain(unwind_ips); } } return true; } bool RecordCommand::PostUnwind(const std::vector& args) { thread_tree_.ClearThreadAndMap(); std::unique_ptr reader = RecordFileReader::CreateInstance(record_filename_); if (reader == nullptr) { return false; } std::string tmp_filename = record_filename_ + ".tmp"; record_file_writer_ = CreateRecordFile(tmp_filename); if (record_file_writer_ == nullptr) { return false; } bool result = reader->ReadDataSection( [this](std::unique_ptr record) { thread_tree_.Update(*record); if (!UnwindRecord(record.get())) { return false; } return record_file_writer_->WriteRecord(*record); }, false); if (!result) { return false; } if (!DumpAdditionalFeatures(args)) { return false; } if (!record_file_writer_->Close()) { return false; } if (unlink(record_filename_.c_str()) != 0) { PLOG(ERROR) << "failed to remove " << record_filename_; return false; } if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) { PLOG(ERROR) << "failed to rename " << tmp_filename << " to " << record_filename_; return false; } return true; } bool RecordCommand::DumpAdditionalFeatures( const std::vector& args) { // Read data section of perf.data to collect hit file information. thread_tree_.ClearThreadAndMap(); if (CheckKernelSymbolAddresses()) { Dso::ReadKernelSymbolsFromProc(); } auto callback = [&](const Record* r) { thread_tree_.Update(*r); if (r->type() == PERF_RECORD_SAMPLE) { CollectHitFileInfo(*reinterpret_cast(r)); } }; if (!record_file_writer_->ReadDataSection(callback)) { return false; } size_t feature_count = 5; if (branch_sampling_) { feature_count++; } if (dump_symbols_) { feature_count++; } if (!record_file_writer_->BeginWriteFeatures(feature_count)) { return false; } if (!DumpBuildIdFeature()) { return false; } if (dump_symbols_ && !DumpFileFeature()) { return false; } utsname uname_buf; if (TEMP_FAILURE_RETRY(uname(&uname_buf)) != 0) { PLOG(ERROR) << "uname() failed"; return false; } if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_OSRELEASE, uname_buf.release)) { return false; } if (!record_file_writer_->WriteFeatureString(PerfFileFormat::FEAT_ARCH, uname_buf.machine)) { return false; } std::string exec_path = android::base::GetExecutablePath(); if (exec_path.empty()) exec_path = "simpleperf"; std::vector cmdline; cmdline.push_back(exec_path); cmdline.push_back("record"); cmdline.insert(cmdline.end(), args.begin(), args.end()); if (!record_file_writer_->WriteCmdlineFeature(cmdline)) { return false; } if (branch_sampling_ != 0 && !record_file_writer_->WriteBranchStackFeature()) { return false; } std::unordered_map info_map; info_map["simpleperf_version"] = GetSimpleperfVersion(); if (!record_file_writer_->WriteMetaInfoFeature(info_map)) { return false; } if (!record_file_writer_->EndWriteFeatures()) { return false; } return true; } bool RecordCommand::DumpBuildIdFeature() { std::vector build_id_records; BuildId build_id; std::vector dso_v = thread_tree_.GetAllDsos(); for (Dso* dso : dso_v) { if (!dso->HasDumpId()) { continue; } if (dso->type() == DSO_KERNEL) { if (!GetKernelBuildId(&build_id)) { continue; } build_id_records.push_back( BuildIdRecord(true, UINT_MAX, build_id, dso->Path())); } else if (dso->type() == DSO_KERNEL_MODULE) { std::string path = dso->Path(); std::string module_name = basename(&path[0]); if (android::base::EndsWith(module_name, ".ko")) { module_name = module_name.substr(0, module_name.size() - 3); } if (!GetModuleBuildId(module_name, &build_id)) { LOG(DEBUG) << "can't read build_id for module " << module_name; continue; } build_id_records.push_back(BuildIdRecord(true, UINT_MAX, build_id, path)); } else { if (dso->Path() == DEFAULT_EXECNAME_FOR_THREAD_MMAP) { continue; } auto tuple = SplitUrlInApk(dso->Path()); if (std::get<0>(tuple)) { ElfStatus result = GetBuildIdFromApkFile(std::get<1>(tuple), std::get<2>(tuple), &build_id); if (result != ElfStatus::NO_ERROR) { LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": " << result; continue; } } else { ElfStatus result = GetBuildIdFromElfFile(dso->Path(), &build_id); if (result != ElfStatus::NO_ERROR) { LOG(DEBUG) << "can't read build_id from file " << dso->Path() << ": " << result; continue; } } build_id_records.push_back( BuildIdRecord(false, UINT_MAX, build_id, dso->Path())); } } if (!record_file_writer_->WriteBuildIdFeature(build_id_records)) { return false; } return true; } bool RecordCommand::DumpFileFeature() { std::vector dso_v = thread_tree_.GetAllDsos(); for (Dso* dso : dso_v) { if (!dso->HasDumpId()) { continue; } uint32_t dso_type = dso->type(); uint64_t min_vaddr = dso->MinVirtualAddress(); // Dumping all symbols in hit files takes too much space, so only dump // needed symbols. const std::vector& symbols = dso->GetSymbols(); std::vector dump_symbols; for (const auto& sym : symbols) { if (sym.HasDumpId()) { dump_symbols.push_back(&sym); } } std::sort(dump_symbols.begin(), dump_symbols.end(), Symbol::CompareByAddr); if (!record_file_writer_->WriteFileFeature(dso->Path(), dso_type, min_vaddr, dump_symbols)) { return false; } } return true; } void RecordCommand::CollectHitFileInfo(const SampleRecord& r) { const ThreadEntry* thread = thread_tree_.FindThreadOrNew(r.tid_data.pid, r.tid_data.tid); const MapEntry* map = thread_tree_.FindMap(thread, r.ip_data.ip, r.InKernel()); Dso* dso = map->dso; const Symbol* symbol; if (dump_symbols_) { symbol = thread_tree_.FindSymbol(map, r.ip_data.ip, nullptr, &dso); if (!symbol->HasDumpId()) { dso->CreateSymbolDumpId(symbol); } } if (!dso->HasDumpId()) { dso->CreateDumpId(); } if (r.sample_type & PERF_SAMPLE_CALLCHAIN) { bool in_kernel = r.InKernel(); bool first_ip = true; for (uint64_t i = 0; i < r.callchain_data.ip_nr; ++i) { uint64_t ip = r.callchain_data.ips[i]; if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_KERNEL: in_kernel = true; break; case PERF_CONTEXT_USER: in_kernel = false; break; default: LOG(DEBUG) << "Unexpected perf_context in callchain: " << std::hex << ip; } } else { if (first_ip) { first_ip = false; // Remove duplication with sample ip. if (ip == r.ip_data.ip) { continue; } } map = thread_tree_.FindMap(thread, ip, in_kernel); dso = map->dso; if (dump_symbols_) { symbol = thread_tree_.FindSymbol(map, ip, nullptr, &dso); if (!symbol->HasDumpId()) { dso->CreateSymbolDumpId(symbol); } } if (!dso->HasDumpId()) { dso->CreateDumpId(); } } } } } void RegisterRecordCommand() { RegisterCommand("record", [] { return std::unique_ptr(new RecordCommand()); }); }