summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2018-01-29 16:02:16 -0800
committerYabin Cui <yabinc@google.com>2018-01-31 14:24:17 -0800
commit5a5e92c7a4ad07559b50edd1c484f919a2b067b1 (patch)
treefb520be6de98bb66a48f05d29750259fde64960a
parent8a9c923f3ba0bd4f67669a7ff5125258bd778bcd (diff)
downloadextras-5a5e92c7a4ad07559b50edd1c484f919a2b067b1.tar.gz
simpleperf: Switch to use post-unwind by default in record cmd.
When recording google.sample.tunnel app for 30s: It took 3s to unwind samples and write unwound samples to file. It took 0.3s to write samples containing stack/reg data to file. The result shows recording with post unwinding consumes much less time than unwinding samples immediately. This means we can record with higher freq and get smaller lose rate when using post unwinding. So make below changes: 1. Make post unwinding by default. 2. Replace --post-unwind with --no-post-unwind option. 3. Make --trace-offcpu and callchain joiner work with post unwinding. 4. Remove special operations in --log debug mode. Those will be supported in a new command. Bug: http://b/72556486 Test: run simpleperf_unit_test. Test: run python test.py. Change-Id: I9a5a5defda9d040985e674c43db19ee68e7aa305
-rw-r--r--simpleperf/cmd_record.cpp198
-rw-r--r--simpleperf/cmd_record_test.cpp8
-rw-r--r--simpleperf/record.cpp25
-rw-r--r--simpleperf/record.h1
-rw-r--r--simpleperf/record_file.h5
-rw-r--r--simpleperf/record_file_writer.cpp3
6 files changed, 135 insertions, 105 deletions
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index 1e2bbc19..2a6bf334 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -161,12 +161,13 @@ class RecordCommand : public Command {
"-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
" the kernel. It should be a power of 2. If not set, the max\n"
" possible value <= 1024 will be used.\n"
-"--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
-" kernel symbols will be dumped when needed.\n"
-"--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
-" dumped in perf.data, to support reporting in another\n"
-" environment.\n"
"--no-inherit Don't record created child threads/processes.\n"
+"\n"
+"Dwarf unwinding options:\n"
+"--no-post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
+" will be recorded in perf.data and unwound after recording.\n"
+" However, this takes a lot of disk space. Use this option to\n"
+" unwind while recording.\n"
"--no-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
" will be unwound by default. Use this option to disable the\n"
" unwinding of the user's stack.\n"
@@ -177,11 +178,14 @@ class RecordCommand : public Command {
"--callchain-joiner-min-matching-nodes count\n"
" When callchain joiner is used, set the matched nodes needed to join\n"
" callchains. The count should be >= 1. By default it is 1.\n"
+"\n"
+"Recording file options:\n"
+"--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
+" kernel symbols will be dumped when needed.\n"
+"--no-dump-symbols Don't dump symbols in perf.data. By default symbols are\n"
+" dumped in perf.data, to support reporting in another\n"
+" environment.\n"
"-o record_file_name Set record file name, default is perf.data.\n"
-"--post-unwind If `--call-graph dwarf` option is used, then the user's stack\n"
-" will be unwound while recording by default. But it may lose\n"
-" records as stacking unwinding can be time consuming. Use this\n"
-" option to unwind the user's stack after recording.\n"
"--exit-with-parent Stop recording when the process starting\n"
" simpleperf dies.\n"
"--start_profiling_fd fd_no After starting profiling, write \"STARTED\" to\n"
@@ -202,7 +206,7 @@ class RecordCommand : public Command {
dwarf_callchain_sampling_(false),
dump_stack_size_in_dwarf_sampling_(MAX_DUMP_STACK_SIZE),
unwind_dwarf_callchain_(true),
- post_unwind_(false),
+ post_unwind_(true),
child_inherit_(true),
duration_in_sec_(0),
can_dump_kernel_symbols_(true),
@@ -246,9 +250,13 @@ class RecordCommand : public Command {
bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id);
bool ProcessRecord(Record* record);
+ bool SaveRecordForPostUnwinding(Record* record);
+ bool SaveRecordAfterUnwinding(Record* record);
+ bool SaveRecordWithoutUnwinding(Record* record);
+
void UpdateRecordForEmbeddedElfPath(Record* record);
bool UnwindRecord(SampleRecord& r);
- bool PostUnwind(const std::vector<std::string>& args);
+ bool PostUnwindRecords();
bool JoinCallChains();
bool DumpAdditionalFeatures(const std::vector<std::string>& args);
bool DumpBuildIdFeature();
@@ -361,14 +369,12 @@ bool RecordCommand::PrepareRecording(Workload* workload) {
// Normally do strict arch check when unwinding stack. But allow unwinding
// 32-bit processes on 64-bit devices for system wide profiling.
bool strict_arch_check = !system_wide_collection_;
- bool collect_stat = WOULD_LOG(DEBUG);
- offline_unwinder_.reset(new OfflineUnwinder(strict_arch_check, collect_stat));
+ offline_unwinder_.reset(new OfflineUnwinder(strict_arch_check, false));
}
- if (unwind_dwarf_callchain_ && !post_unwind_ && allow_callchain_joiner_) {
- bool keep_original_callchains = WOULD_LOG(DEBUG);
+ if (unwind_dwarf_callchain_ && allow_callchain_joiner_) {
callchain_joiner_.reset(new CallChainJoiner(DEFAULT_CALL_CHAIN_JOINER_CACHE_SIZE,
callchain_joiner_min_matching_nodes_,
- keep_original_callchains));
+ false));
}
// 4. Add monitored targets.
@@ -469,12 +475,19 @@ bool RecordCommand::DoRecording(Workload* workload) {
}
bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
- // 1. Optionally join Callchains.
+ // 1. Post unwind dwarf callchain.
+ if (unwind_dwarf_callchain_ && post_unwind_) {
+ if (!PostUnwindRecords()) {
+ return false;
+ }
+ }
+
+ // 2. Optionally join Callchains.
if (callchain_joiner_) {
JoinCallChains();
}
- // 2. Dump additional features, and close record file.
+ // 3. Dump additional features, and close record file.
if (!DumpAdditionalFeatures(args)) {
return false;
}
@@ -482,13 +495,6 @@ bool RecordCommand::PostProcessRecording(const std::vector<std::string>& args) {
return false;
}
- // 3. Post unwind dwarf callchain.
- if (post_unwind_) {
- if (!PostUnwind(args)) {
- return false;
- }
- }
-
// 4. Show brief record result.
LOG(INFO) << "Samples recorded: " << sample_record_count_
<< ". Samples lost: " << lost_record_count_ << ".";
@@ -704,8 +710,8 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
return false;
}
event_selection_set_.AddMonitoredProcesses(pids);
- } else if (args[i] == "--post-unwind") {
- post_unwind_ = true;
+ } else if (args[i] == "--no-post-unwind") {
+ post_unwind_ = false;
} else if (args[i] == "--start_profiling_fd") {
if (!NextArgumentOrError(args, &i)) {
return false;
@@ -754,13 +760,17 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
unwind_dwarf_callchain_ = false;
}
if (post_unwind_) {
+ if (!dwarf_callchain_sampling_ || !unwind_dwarf_callchain_) {
+ post_unwind_ = false;
+ }
+ } else {
if (!dwarf_callchain_sampling_) {
LOG(ERROR)
- << "--post-unwind is only used with `--call-graph dwarf` option.";
+ << "--no-post-unwind is only used with `--call-graph dwarf` option.";
return false;
}
if (!unwind_dwarf_callchain_) {
- LOG(ERROR) << "--post-unwind can't be used with `--no-unwind` option.";
+ LOG(ERROR) << "--no-post-unwind can't be used with `--no-unwind` option.";
return false;
}
}
@@ -1005,26 +1015,35 @@ bool RecordCommand::DumpThreadCommAndMmaps(const perf_event_attr& attr,
}
bool RecordCommand::ProcessRecord(Record* record) {
- if (system_wide_collection_ && record->type() == PERF_RECORD_SAMPLE) {
- auto& r = *static_cast<SampleRecord*>(record);
- // Omit samples get before start sampling time.
- if (r.time_data.time < start_sampling_time_in_ns_) {
- return true;
+ if (unwind_dwarf_callchain_) {
+ if (post_unwind_) {
+ return SaveRecordForPostUnwinding(record);
}
+ return SaveRecordAfterUnwinding(record);
}
- UpdateRecordForEmbeddedElfPath(record);
+ return SaveRecordWithoutUnwinding(record);
+}
- bool unwind_callchain = unwind_dwarf_callchain_ && !post_unwind_;
+bool RecordCommand::SaveRecordForPostUnwinding(Record* record) {
+ if (record->type() == PERF_RECORD_SAMPLE) {
+ static_cast<SampleRecord*>(record)->RemoveInvalidStackData();
+ }
+ if (!record_file_writer_->WriteRecord(*record)) {
+ LOG(ERROR) << "If there isn't enough space for storing profiling data, consider using "
+ << "--no-post-unwind option.";
+ return false;
+ }
+ return true;
+}
+bool RecordCommand::SaveRecordAfterUnwinding(Record* record) {
if (record->type() == PERF_RECORD_SAMPLE) {
auto& r = *static_cast<SampleRecord*>(record);
// AdjustCallChainGeneratedByKernel() should go before UnwindRecord(). Because we don't want
// to adjust callchains generated by dwarf unwinder.
- if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
- r.AdjustCallChainGeneratedByKernel();
- }
- if (unwind_callchain) {
- UnwindRecord(r);
+ r.AdjustCallChainGeneratedByKernel();
+ if (!UnwindRecord(r)) {
+ return false;
}
// ExcludeKernelCallChain() should go after UnwindRecord() to notice the generated user call
// chain.
@@ -1035,11 +1054,28 @@ bool RecordCommand::ProcessRecord(Record* record) {
sample_record_count_++;
} else if (record->type() == PERF_RECORD_LOST) {
lost_record_count_ += static_cast<LostRecord*>(record)->lost;
- } else if (unwind_callchain) {
+ } else {
+ UpdateRecordForEmbeddedElfPath(record);
thread_tree_.Update(*record);
}
- bool result = record_file_writer_->WriteRecord(*record);
- return result;
+ return record_file_writer_->WriteRecord(*record);
+}
+
+bool RecordCommand::SaveRecordWithoutUnwinding(Record* record) {
+ if (record->type() == PERF_RECORD_SAMPLE) {
+ auto& r = *static_cast<SampleRecord*>(record);
+ if (fp_callchain_sampling_ || dwarf_callchain_sampling_) {
+ r.AdjustCallChainGeneratedByKernel();
+ }
+ if (r.InKernel() && exclude_kernel_callchain_ && r.ExcludeKernelCallChain() == 0u) {
+ // If current record contains no user callchain, skip it.
+ return true;
+ }
+ sample_record_count_++;
+ } else if (record->type() == PERF_RECORD_LOST) {
+ lost_record_count_ += static_cast<LostRecord*>(record)->lost;
+ }
+ return record_file_writer_->WriteRecord(*record);
}
template <class RecordType>
@@ -1093,13 +1129,6 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) {
&ips, &sps)) {
return false;
}
- if (offline_unwinder_->HasStat()) {
- const UnwindingResult& unwinding_result = offline_unwinder_->GetUnwindingResult();
- UnwindingResultRecord record(r.time_data.time, unwinding_result);
- if (!record_file_writer_->WriteRecord(record)) {
- return false;
- }
- }
r.ReplaceRegAndStackWithCallChain(ips);
if (callchain_joiner_) {
return callchain_joiner_->AddCallChain(r.tid_data.pid, r.tid_data.tid,
@@ -1109,49 +1138,32 @@ bool RecordCommand::UnwindRecord(SampleRecord& r) {
return true;
}
-bool RecordCommand::PostUnwind(const std::vector<std::string>& args) {
- thread_tree_.ClearThreadAndMap();
- std::unique_ptr<RecordFileReader> reader =
- RecordFileReader::CreateInstance(record_filename_);
- if (reader == nullptr) {
- return false;
- }
- std::string tmp_filename = record_filename_ + ".tmp";
- record_file_writer_ = CreateRecordFile(tmp_filename);
- if (record_file_writer_ == nullptr) {
- return false;
- }
- bool result = reader->ReadDataSection(
- [this](std::unique_ptr<Record> record) {
- thread_tree_.Update(*record);
- if (record->type() == PERF_RECORD_SAMPLE) {
- if (!UnwindRecord(*static_cast<SampleRecord*>(record.get()))) {
- return false;
- }
- }
- return record_file_writer_->WriteRecord(*record);
- },
- false);
- if (!result) {
+bool RecordCommand::PostUnwindRecords() {
+ // 1. Move records from record_filename_ to a temporary file.
+ if (!record_file_writer_->Close()) {
return false;
}
- if (!DumpAdditionalFeatures(args)) {
+ record_file_writer_.reset();
+ std::unique_ptr<TemporaryFile> tmpfile = CreateTempFileUsedInRecording();
+ if (!Workload::RunCmd({"mv", record_filename_, tmpfile->path})) {
return false;
}
- if (!record_file_writer_->Close()) {
+ std::unique_ptr<RecordFileReader> reader = RecordFileReader::CreateInstance(tmpfile->path);
+ if (!reader) {
return false;
}
- if (unlink(record_filename_.c_str()) != 0) {
- PLOG(ERROR) << "failed to remove " << record_filename_;
- return false;
- }
- if (rename(tmp_filename.c_str(), record_filename_.c_str()) != 0) {
- PLOG(ERROR) << "failed to rename " << tmp_filename << " to "
- << record_filename_;
+ // 2. Read records from the temporary file, and write unwound records back to record_filename_.
+ record_file_writer_ = CreateRecordFile(record_filename_);
+ if (!record_file_writer_) {
return false;
}
- return true;
+ sample_record_count_ = 0;
+ lost_record_count_ = 0;
+ auto callback = [this](std::unique_ptr<Record> record) {
+ return SaveRecordAfterUnwinding(record.get());
+ };
+ return reader->ReadDataSection(callback, false);
}
bool RecordCommand::JoinCallChains() {
@@ -1176,7 +1188,6 @@ bool RecordCommand::JoinCallChains() {
if (!reader || !record_file_writer_) {
return false;
}
- bool store_callchains = WOULD_LOG(DEBUG);
auto record_callback = [&](std::unique_ptr<Record> r) {
if (r->type() != PERF_RECORD_SAMPLE) {
@@ -1191,17 +1202,10 @@ bool RecordCommand::JoinCallChains() {
CallChainJoiner::ChainType type;
std::vector<uint64_t> ips;
std::vector<uint64_t> sps;
- do {
- if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
- return false;
- }
- if (store_callchains) {
- CallChainRecord record(pid, tid, type, sr.Timestamp(), ips, sps);
- if (!record_file_writer_->WriteRecord(record)) {
- return false;
- }
- }
- } while (type != CallChainJoiner::JOINED_OFFLINE);
+ if (!callchain_joiner_->GetNextCallChain(pid, tid, type, ips, sps)) {
+ return false;
+ }
+ CHECK_EQ(type, CallChainJoiner::JOINED_OFFLINE);
CHECK_EQ(pid, static_cast<pid_t>(sr.tid_data.pid));
CHECK_EQ(tid, static_cast<pid_t>(sr.tid_data.tid));
sr.UpdateUserCallChain(ips);
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 660e679b..78690371 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -241,16 +241,16 @@ TEST(record_cmd, no_unwind_option) {
ASSERT_FALSE(RunRecordCmd({"--no-unwind"}));
}
-TEST(record_cmd, post_unwind_option) {
+TEST(record_cmd, no_post_unwind_option) {
OMIT_TEST_ON_NON_NATIVE_ABIS();
ASSERT_TRUE(IsDwarfCallChainSamplingSupported());
std::vector<std::unique_ptr<Workload>> workloads;
CreateProcesses(1, &workloads);
std::string pid = std::to_string(workloads[0]->GetPid());
- ASSERT_TRUE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf", "--post-unwind"}));
- ASSERT_FALSE(RunRecordCmd({"--post-unwind"}));
+ ASSERT_TRUE(RunRecordCmd({"-p", pid, "--call-graph", "dwarf", "--no-post-unwind"}));
+ ASSERT_FALSE(RunRecordCmd({"--no-post-unwind"}));
ASSERT_FALSE(
- RunRecordCmd({"--call-graph", "dwarf", "--no-unwind", "--post-unwind"}));
+ RunRecordCmd({"--call-graph", "dwarf", "--no-unwind", "--no-post-unwind"}));
}
TEST(record_cmd, existing_processes) {
diff --git a/simpleperf/record.cpp b/simpleperf/record.cpp
index 44e3d550..a03f3771 100644
--- a/simpleperf/record.cpp
+++ b/simpleperf/record.cpp
@@ -712,6 +712,31 @@ void SampleRecord::UpdateUserCallChain(const std::vector<uint64_t>& user_ips) {
UpdateBinary(new_binary);
}
+// When simpleperf requests the kernel to dump 64K stack per sample, it will allocate 64K space in
+// each sample to store stack data. However, a thread may use less stack than 64K. So not all the
+// 64K stack data in a sample is valid. And this function is used to remove invalid stack data in
+// a sample, which can save time and disk space when storing samples in file.
+void SampleRecord::RemoveInvalidStackData() {
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ uint64_t valid_stack_size = GetValidStackSize();
+ if (stack_user_data.size > valid_stack_size) {
+ // Shrink stack size to valid_stack_size, and update it in binary.
+ stack_user_data.size = valid_stack_size;
+ char* p = stack_user_data.data - sizeof(stack_user_data.size);
+ MoveToBinaryFormat(stack_user_data.size, p);
+ p += valid_stack_size;
+ // Update dyn_size in binary.
+ if (valid_stack_size != 0u) {
+ MoveToBinaryFormat(stack_user_data.dyn_size, p);
+ }
+ // Update sample size.
+ header.size = p - binary_;
+ p = binary_;
+ header.MoveToBinaryFormat(p);
+ }
+ }
+}
+
void SampleRecord::DumpData(size_t indent) const {
PrintIndented(indent, "sample_type: 0x%" PRIx64 "\n", sample_type);
if (sample_type & PERF_SAMPLE_IP) {
diff --git a/simpleperf/record.h b/simpleperf/record.h
index 6e1cb37e..b862e401 100644
--- a/simpleperf/record.h
+++ b/simpleperf/record.h
@@ -395,6 +395,7 @@ struct SampleRecord : public Record {
size_t ExcludeKernelCallChain();
bool HasUserCallChain() const;
void UpdateUserCallChain(const std::vector<uint64_t>& user_ips);
+ void RemoveInvalidStackData();
uint64_t Timestamp() const override;
uint32_t Cpu() const override;
diff --git a/simpleperf/record_file.h b/simpleperf/record_file.h
index 32c91fa4..4e09d48e 100644
--- a/simpleperf/record_file.h
+++ b/simpleperf/record_file.h
@@ -36,6 +36,8 @@
#include "thread_tree.h"
// RecordFileWriter writes to a perf record file, like perf.data.
+// User should call RecordFileWriter::Close() to finish writing the file, otherwise the file will
+// be removed in RecordFileWriter::~RecordFileWriter().
class RecordFileWriter {
public:
static std::unique_ptr<RecordFileWriter> CreateInstance(const std::string& filename);
@@ -59,9 +61,6 @@ class RecordFileWriter {
bool WriteMetaInfoFeature(const std::unordered_map<std::string, std::string>& info_map);
bool EndWriteFeatures();
- // Normally, Close() should be called after writing. But if something
- // wrong happens and we need to finish in advance, the destructor
- // will take care of calling Close().
bool Close();
private:
diff --git a/simpleperf/record_file_writer.cpp b/simpleperf/record_file_writer.cpp
index 083a7462..6d0fdd9f 100644
--- a/simpleperf/record_file_writer.cpp
+++ b/simpleperf/record_file_writer.cpp
@@ -64,7 +64,8 @@ RecordFileWriter::RecordFileWriter(const std::string& filename, FILE* fp)
RecordFileWriter::~RecordFileWriter() {
if (record_fp_ != nullptr) {
- Close();
+ fclose(record_fp_);
+ unlink(filename_.c_str());
}
}