summaryrefslogtreecommitdiff
path: root/simpleperf
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2016-07-08 13:56:48 -0700
committerYabin Cui <yabinc@google.com>2016-07-12 15:18:49 -0700
commit6173592d3d086b8e493590d626fdbdb4e344c433 (patch)
tree74412077b3158288e4a7a078bfe144e42d2710db /simpleperf
parentb17b6c3edf1c4027f38683dd97458a7aea176dc4 (diff)
downloadextras-6173592d3d086b8e493590d626fdbdb4e344c433.tar.gz
simpleperf: combine mapped buffer used for the same event and cpu.
Non root users have a size limit of buffers used for profiling. By combining buffers used for the same event and cpu, we can reduce buffer cost when monitoring multiple threads, thus avoid mmap() failures. Increase default value for -m option for non system wide profiling. Bug: 28911532 Change-Id: I91148061eb54840c144cf72e7bb901e7b74897ec Test: run simpleperf_unit_test.
Diffstat (limited to 'simpleperf')
-rw-r--r--simpleperf/cmd_record.cpp9
-rw-r--r--simpleperf/cmd_record_test.cpp4
-rw-r--r--simpleperf/event_fd.cpp29
-rw-r--r--simpleperf/event_fd.h23
-rw-r--r--simpleperf/event_selection_set.cpp31
-rw-r--r--simpleperf/event_selection_set.h3
6 files changed, 59 insertions, 40 deletions
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index c97f8968..9cec453f 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -115,7 +115,7 @@ class RecordCommand : public Command {
"-m mmap_pages Set the size of the buffer used to receiving sample data from\n"
" the kernel. It should be a power of 2. The default value for\n"
" system wide profiling is 256. The default value for non system\n"
-" wide profiling is 16.\n"
+" wide profiling is 128.\n"
"--no-dump-kernel-symbols Don't dump kernel symbols in perf.data. By default\n"
" kernel symbols will be dumped when needed.\n"
"--no-inherit Don't record created child threads/processes.\n"
@@ -267,11 +267,10 @@ bool RecordCommand::Run(const std::vector<std::string>& args) {
return false;
}
}
- if (!event_selection_set_.MmapEventFiles(perf_mmap_pages_)) {
+ std::vector<pollfd> pollfds;
+ if (!event_selection_set_.MmapEventFiles(perf_mmap_pages_, &pollfds)) {
return false;
}
- std::vector<pollfd> pollfds;
- event_selection_set_.PrepareToPollForEventFiles(&pollfds);
// 4. Create perf.data.
if (!CreateAndInitRecordFile()) {
@@ -532,7 +531,7 @@ bool RecordCommand::ParseOptions(const std::vector<std::string>& args,
if (mmap_pages != 0) {
perf_mmap_pages_ = mmap_pages;
} else {
- perf_mmap_pages_ = (system_wide_collection_ ? 256 : 16);
+ perf_mmap_pages_ = (system_wide_collection_ ? 256 : 128);
}
if (non_option_args != nullptr) {
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index fd86b4c7..9313cbe7 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -296,10 +296,10 @@ TEST(record_cmd, dump_symbols) {
}
TEST(record_cmd, group_option) {
- ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock"}));
+ ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock", "-m", "16"}));
ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock", "--group",
"cpu-cycles:u,cpu-clock:u", "--group",
- "cpu-cycles:k,cpu-clock:k"}));
+ "cpu-cycles:k,cpu-clock:k", "-m", "16"}));
}
TEST(record_cmd, symfs_option) { ASSERT_TRUE(RunRecordCmd({"--symfs", "/"})); }
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index 09910768..2fd4d522 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -115,7 +115,7 @@ bool EventFd::ReadCounter(PerfCounter* counter) const {
return true;
}
-bool EventFd::MmapContent(size_t mmap_pages) {
+bool EventFd::CreateMappedBuffer(size_t mmap_pages, pollfd* poll_fd) {
CHECK(IsPowerOfTwo(mmap_pages));
size_t page_size = sysconf(_SC_PAGE_SIZE);
size_t mmap_len = (mmap_pages + 1) * page_size;
@@ -126,7 +126,8 @@ bool EventFd::MmapContent(size_t mmap_pages) {
if (is_perm_error) {
LOG(ERROR) << "It seems the kernel doesn't allow allocating enough "
<< "buffer for dumping samples, consider decreasing the number of "
- << "monitored threads(-t), or decreasing mmap pages(-m).";
+ << "monitored threads(-t), or decreasing mmap pages(-m), or "
+ << "decreasing the number of events(-e).";
}
return false;
}
@@ -138,10 +139,28 @@ bool EventFd::MmapContent(size_t mmap_pages) {
if (data_process_buffer_.size() < mmap_data_buffer_size_) {
data_process_buffer_.resize(mmap_data_buffer_size_);
}
+ memset(poll_fd, 0, sizeof(pollfd));
+ poll_fd->fd = perf_event_fd_;
+ poll_fd->events = POLLIN;
+ return true;
+}
+
+bool EventFd::ShareMappedBuffer(const EventFd& event_fd) {
+ CHECK(!HasMappedBuffer());
+ CHECK(event_fd.HasMappedBuffer());
+ int result = ioctl(perf_event_fd_, PERF_EVENT_IOC_SET_OUTPUT, event_fd.perf_event_fd_);
+ if (result != 0) {
+ PLOG(ERROR) << "failed to share mapped buffer of "
+ << event_fd.perf_event_fd_ << " with " << perf_event_fd_;
+ return false;
+ }
return true;
}
size_t EventFd::GetAvailableMmapData(char** pdata) {
+ if (!HasMappedBuffer()) {
+ return 0;
+ }
// The mmap_data_buffer is used as a ring buffer like below. The kernel continuously writes
// records to the buffer, and the user continuously read records out.
// _________________________________________
@@ -193,12 +212,6 @@ void EventFd::DiscardMmapData(size_t discard_size) {
mmap_metadata_page_->data_tail += discard_size;
}
-void EventFd::PrepareToPollForMmapData(pollfd* poll_fd) {
- memset(poll_fd, 0, sizeof(pollfd));
- poll_fd->fd = perf_event_fd_;
- poll_fd->events = POLLIN;
-}
-
bool IsEventAttrSupportedByKernel(perf_event_attr attr) {
auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false);
return event_fd != nullptr;
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
index c54c3e61..651f3fd4 100644
--- a/simpleperf/event_fd.h
+++ b/simpleperf/event_fd.h
@@ -62,17 +62,23 @@ class EventFd {
bool ReadCounter(PerfCounter* counter) const;
- // Call mmap() for this perf_event_file, so we can read sampled records from mapped area.
- // mmap_pages should be power of 2.
- bool MmapContent(size_t mmap_pages);
+ // Create mapped buffer used to receive records sent by the kernel.
+ // mmap_pages should be power of 2. If created successfully, fill pollfd,
+ // which is used to poll() on available mapped data.
+ bool CreateMappedBuffer(size_t mmap_pages, pollfd* poll_fd);
+
+ // Share the mapped buffer used by event_fd. The two EventFds should monitor
+ // the same event on the same cpu, but have different thread ids.
+ bool ShareMappedBuffer(const EventFd& event_fd);
+
+ bool HasMappedBuffer() const {
+ return mmap_data_buffer_size_ != 0;
+ }
// When the kernel writes new sampled records to the mapped area, we can get them by returning
// the start address and size of the data.
size_t GetAvailableMmapData(char** pdata);
- // Prepare pollfd for poll() to wait on available mmap_data.
- void PrepareToPollForMmapData(pollfd* poll_fd);
-
private:
EventFd(int perf_event_fd, const std::string& event_name, pid_t tid, int cpu)
: perf_event_fd_(perf_event_fd),
@@ -81,7 +87,10 @@ class EventFd {
tid_(tid),
cpu_(cpu),
mmap_addr_(nullptr),
- mmap_len_(0) {
+ mmap_len_(0),
+ mmap_metadata_page_(nullptr),
+ mmap_data_buffer_(nullptr),
+ mmap_data_buffer_size_(0) {
}
// Discard how much data we have read, so the kernel can reuse this part of mapped area to store
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 1c832505..267f40d1 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -335,25 +335,24 @@ bool EventSelectionSet::ReadCounters(std::vector<CountersInfo>* counters) {
return true;
}
-void EventSelectionSet::PrepareToPollForEventFiles(
- std::vector<pollfd>* pollfds) {
+bool EventSelectionSet::MmapEventFiles(size_t mmap_pages, std::vector<pollfd>* pollfds) {
for (auto& group : groups_) {
for (auto& selection : group) {
+ // For each event, allocate a mapped buffer for each cpu.
+ std::map<int, EventFd*> cpu_map;
for (auto& event_fd : selection.event_fds) {
- pollfd poll_fd;
- event_fd->PrepareToPollForMmapData(&poll_fd);
- pollfds->push_back(poll_fd);
- }
- }
- }
-}
-
-bool EventSelectionSet::MmapEventFiles(size_t mmap_pages) {
- for (auto& group : groups_) {
- for (auto& selection : group) {
- for (auto& event_fd : selection.event_fds) {
- if (!event_fd->MmapContent(mmap_pages)) {
- return false;
+ auto it = cpu_map.find(event_fd->Cpu());
+ if (it != cpu_map.end()) {
+ if (!event_fd->ShareMappedBuffer(*(it->second))) {
+ return false;
+ }
+ } else {
+ pollfd poll_fd;
+ if (!event_fd->CreateMappedBuffer(mmap_pages, &poll_fd)) {
+ return false;
+ }
+ pollfds->push_back(poll_fd);
+ cpu_map.insert(std::make_pair(event_fd->Cpu(), event_fd.get()));
}
}
}
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index d393a3b8..87bdeab7 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -88,8 +88,7 @@ class EventSelectionSet {
bool OpenEventFilesForCpus(const std::vector<int>& cpus);
bool OpenEventFilesForThreadsOnCpus(const std::vector<pid_t>& threads, std::vector<int> cpus);
bool ReadCounters(std::vector<CountersInfo>* counters);
- void PrepareToPollForEventFiles(std::vector<pollfd>* pollfds);
- bool MmapEventFiles(size_t mmap_pages);
+ bool MmapEventFiles(size_t mmap_pages, std::vector<pollfd>* pollfds);
void PrepareToReadMmapEventData(std::function<bool (Record*)> callback);
bool ReadMmapEventData();
bool FinishReadMmapEventData();