summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYabin Cui <yabinc@google.com>2016-06-15 11:41:42 -0700
committerYabin Cui <yabinc@google.com>2016-06-20 17:45:08 -0700
commit6965d42c43f12fd2dfcca3c490b51edc67822586 (patch)
tree70ea518ecc0dfc4ca725d6be500232517fb8dd7e
parent90d56472a6981cbbeba5adfc293f920004cd9a22 (diff)
downloadextras-6965d42c43f12fd2dfcca3c490b51edc67822586.tar.gz
simpleperf: add kmem command to report slab allocation information.
Bug: 27403614 Change-Id: Id3015a4828ec32170ea1db3a1580b14a8bd159ba
-rw-r--r--simpleperf/Android.mk2
-rw-r--r--simpleperf/SampleDisplayer.h6
-rw-r--r--simpleperf/cmd_kmem.cpp711
-rw-r--r--simpleperf/cmd_kmem_test.cpp148
-rw-r--r--simpleperf/command.cpp2
-rw-r--r--simpleperf/dso.cpp6
-rw-r--r--simpleperf/get_test_data.h3
-rw-r--r--simpleperf/sample_tree.h4
-rw-r--r--simpleperf/testdata/perf_with_kmem_slab_callgraph.databin0 -> 9831776 bytes
-rw-r--r--simpleperf/thread_tree.cpp5
-rw-r--r--simpleperf/thread_tree.h1
-rw-r--r--simpleperf/tracing.h16
12 files changed, 895 insertions, 9 deletions
diff --git a/simpleperf/Android.mk b/simpleperf/Android.mk
index 56fe6ec7..847a4cb6 100644
--- a/simpleperf/Android.mk
+++ b/simpleperf/Android.mk
@@ -78,6 +78,7 @@ simpleperf_ldlibs_host_linux := -lrt
libsimpleperf_src_files := \
cmd_dumprecord.cpp \
cmd_help.cpp \
+ cmd_kmem.cpp \
cmd_report.cpp \
cmd_report_sample.cpp \
command.cpp \
@@ -192,6 +193,7 @@ include $(BUILD_HOST_EXECUTABLE)
# simpleperf_unit_test
# =========================================================
simpleperf_unit_test_src_files := \
+ cmd_kmem_test.cpp \
cmd_report_test.cpp \
cmd_report_sample_test.cpp \
command_test.cpp \
diff --git a/simpleperf/SampleDisplayer.h b/simpleperf/SampleDisplayer.h
index c1a058be..168c9e20 100644
--- a/simpleperf/SampleDisplayer.h
+++ b/simpleperf/SampleDisplayer.h
@@ -49,6 +49,12 @@ std::string DisplaySelfOverhead(const EntryT* sample, const InfoT* info) {
return android::base::StringPrintf("%" PRIu64, sample->display_part); \
}
+#define BUILD_DISPLAY_HEX64_FUNCTION(function_name, display_part) \
+ template <typename EntryT> \
+ std::string function_name(const EntryT* sample) { \
+ return android::base::StringPrintf("0x%" PRIx64, sample->display_part);\
+}
+
BUILD_DISPLAY_UINT64_FUNCTION(DisplaySampleCount, sample_count);
template <typename EntryT>
diff --git a/simpleperf/cmd_kmem.cpp b/simpleperf/cmd_kmem.cpp
new file mode 100644
index 00000000..3c3023f5
--- /dev/null
+++ b/simpleperf/cmd_kmem.cpp
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "command.h"
+
+#include <unordered_map>
+
+#include <android-base/logging.h>
+#include <android-base/strings.h>
+
+#include "callchain.h"
+#include "event_attr.h"
+#include "event_type.h"
+#include "record_file.h"
+#include "sample_tree.h"
+#include "tracing.h"
+#include "utils.h"
+
+namespace {
+
+struct SlabSample {
+ const Symbol* symbol; // the function making allocation
+ uint64_t ptr; // the start address of the allocated space
+ uint64_t bytes_req; // requested space size
+ uint64_t bytes_alloc; // allocated space size
+ uint64_t sample_count; // count of allocations
+ uint64_t gfp_flags; // flags used for allocation
+ uint64_t cross_cpu_allocations; // count of allocations freed not on the
+ // cpu allocating them
+ CallChainRoot<SlabSample> callchain; // a callchain tree representing all
+ // callchains in this sample
+ SlabSample(const Symbol* symbol, uint64_t ptr, uint64_t bytes_req,
+ uint64_t bytes_alloc, uint64_t sample_count, uint64_t gfp_flags,
+ uint64_t cross_cpu_allocations)
+ : symbol(symbol),
+ ptr(ptr),
+ bytes_req(bytes_req),
+ bytes_alloc(bytes_alloc),
+ sample_count(sample_count),
+ gfp_flags(gfp_flags),
+ cross_cpu_allocations(cross_cpu_allocations) {}
+};
+
+struct SlabAccumulateInfo {
+ uint64_t bytes_req;
+ uint64_t bytes_alloc;
+};
+
+BUILD_COMPARE_VALUE_FUNCTION(ComparePtr, ptr);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesReq, bytes_req);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareBytesAlloc, bytes_alloc);
+BUILD_COMPARE_VALUE_FUNCTION(CompareGfpFlags, gfp_flags);
+BUILD_COMPARE_VALUE_FUNCTION_REVERSE(CompareCrossCpuAllocations,
+ cross_cpu_allocations);
+
+BUILD_DISPLAY_HEX64_FUNCTION(DisplayPtr, ptr);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesReq, bytes_req);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayBytesAlloc, bytes_alloc);
+BUILD_DISPLAY_HEX64_FUNCTION(DisplayGfpFlags, gfp_flags);
+BUILD_DISPLAY_UINT64_FUNCTION(DisplayCrossCpuAllocations,
+ cross_cpu_allocations);
+
+static int CompareFragment(const SlabSample* sample1,
+ const SlabSample* sample2) {
+ uint64_t frag1 = sample1->bytes_alloc - sample1->bytes_req;
+ uint64_t frag2 = sample2->bytes_alloc - sample2->bytes_req;
+ return Compare(frag2, frag1);
+}
+
+static std::string DisplayFragment(const SlabSample* sample) {
+ return android::base::StringPrintf("%" PRIu64,
+ sample->bytes_alloc - sample->bytes_req);
+}
+
+struct SlabSampleTree {
+ std::vector<SlabSample*> samples;
+ uint64_t total_requested_bytes;
+ uint64_t total_allocated_bytes;
+ uint64_t nr_allocations;
+ uint64_t nr_frees;
+ uint64_t nr_cross_cpu_allocations;
+};
+
+struct SlabFormat {
+ enum {
+ KMEM_ALLOC,
+ KMEM_FREE,
+ } type;
+ TracingFieldPlace call_site;
+ TracingFieldPlace ptr;
+ TracingFieldPlace bytes_req;
+ TracingFieldPlace bytes_alloc;
+ TracingFieldPlace gfp_flags;
+};
+
+class SlabSampleTreeBuilder
+ : public SampleTreeBuilder<SlabSample, SlabAccumulateInfo> {
+ public:
+ SlabSampleTreeBuilder(SampleComparator<SlabSample> sample_comparator,
+ ThreadTree* thread_tree)
+ : SampleTreeBuilder(sample_comparator),
+ thread_tree_(thread_tree),
+ total_requested_bytes_(0),
+ total_allocated_bytes_(0),
+ nr_allocations_(0),
+ nr_cross_cpu_allocations_(0) {}
+
+ SlabSampleTree GetSampleTree() const {
+ SlabSampleTree sample_tree;
+ sample_tree.samples = GetSamples();
+ sample_tree.total_requested_bytes = total_requested_bytes_;
+ sample_tree.total_allocated_bytes = total_allocated_bytes_;
+ sample_tree.nr_allocations = nr_allocations_;
+ sample_tree.nr_frees = nr_frees_;
+ sample_tree.nr_cross_cpu_allocations = nr_cross_cpu_allocations_;
+ return sample_tree;
+ }
+
+ void AddSlabFormat(const std::vector<uint64_t>& event_ids,
+ SlabFormat format) {
+ std::unique_ptr<SlabFormat> p(new SlabFormat(format));
+ for (auto id : event_ids) {
+ event_id_to_format_map_[id] = p.get();
+ }
+ formats_.push_back(std::move(p));
+ }
+
+ protected:
+ SlabSample* CreateSample(const SampleRecord& r, bool in_kernel,
+ SlabAccumulateInfo* acc_info) override {
+ if (!in_kernel) {
+ // Normally we don't parse records in user space because tracepoint
+ // events all happen in kernel. But if r.ip_data.ip == 0, it may be
+ // a kernel record failed to dump ip register and is still useful.
+ if (r.ip_data.ip == 0) {
+ // It seems we are on a kernel can't dump regset for tracepoint events
+ // because of lacking perf_arch_fetch_caller_regs(). We can't get
+ // callchain, but we can still do a normal report.
+ static bool first = true;
+ if (first) {
+ first = false;
+ if (accumulate_callchain_) {
+ // The kernel doesn't seem to support dumping registers for
+ // tracepoint events because of lacking
+ // perf_arch_fetch_caller_regs().
+ LOG(WARNING) << "simpleperf may not get callchains for tracepoint"
+ << " events because of lacking kernel support.";
+ }
+ }
+ } else {
+ return nullptr;
+ }
+ }
+ uint64_t id = r.id_data.id;
+ auto it = event_id_to_format_map_.find(id);
+ if (it == event_id_to_format_map_.end()) {
+ return nullptr;
+ }
+ const char* raw_data = r.raw_data.data.data();
+ SlabFormat* format = it->second;
+ if (format->type == SlabFormat::KMEM_ALLOC) {
+ uint64_t call_site = format->call_site.ReadFromData(raw_data);
+ const Symbol* symbol = thread_tree_->FindKernelSymbol(call_site);
+ uint64_t ptr = format->ptr.ReadFromData(raw_data);
+ uint64_t bytes_req = format->bytes_req.ReadFromData(raw_data);
+ uint64_t bytes_alloc = format->bytes_alloc.ReadFromData(raw_data);
+ uint64_t gfp_flags = format->gfp_flags.ReadFromData(raw_data);
+ SlabSample* sample =
+ InsertSample(std::unique_ptr<SlabSample>(new SlabSample(
+ symbol, ptr, bytes_req, bytes_alloc, 1, gfp_flags, 0)));
+ alloc_cpu_record_map_.insert(
+ std::make_pair(ptr, std::make_pair(r.cpu_data.cpu, sample)));
+ acc_info->bytes_req = bytes_req;
+ acc_info->bytes_alloc = bytes_alloc;
+ return sample;
+ } else if (format->type == SlabFormat::KMEM_FREE) {
+ uint64_t ptr = format->ptr.ReadFromData(raw_data);
+ auto it = alloc_cpu_record_map_.find(ptr);
+ if (it != alloc_cpu_record_map_.end()) {
+ SlabSample* sample = it->second.second;
+ if (r.cpu_data.cpu != it->second.first) {
+ sample->cross_cpu_allocations++;
+ nr_cross_cpu_allocations_++;
+ }
+ alloc_cpu_record_map_.erase(it);
+ }
+ nr_frees_++;
+ }
+ return nullptr;
+ }
+
+ SlabSample* CreateBranchSample(const SampleRecord&,
+ const BranchStackItemType&) override {
+ return nullptr;
+ }
+
+ SlabSample* CreateCallChainSample(
+ const SlabSample* sample, uint64_t ip, bool in_kernel,
+ const std::vector<SlabSample*>& callchain,
+ const SlabAccumulateInfo& acc_info) override {
+ if (!in_kernel) {
+ return nullptr;
+ }
+ const Symbol* symbol = thread_tree_->FindKernelSymbol(ip);
+ return InsertCallChainSample(
+ std::unique_ptr<SlabSample>(
+ new SlabSample(symbol, sample->ptr, acc_info.bytes_req,
+ acc_info.bytes_alloc, 1, sample->gfp_flags, 0)),
+ callchain);
+ }
+
+ const ThreadEntry* GetThreadOfSample(SlabSample*) override { return nullptr; }
+
+ void InsertCallChainForSample(SlabSample* sample,
+ const std::vector<SlabSample*>& callchain,
+ const SlabAccumulateInfo&) override {
+ // Decide the percentage of callchain by the sample_count, so use 1 as the
+ // period when calling AddCallChain().
+ sample->callchain.AddCallChain(callchain, 1);
+ }
+
+ void UpdateSummary(const SlabSample* sample) override {
+ total_requested_bytes_ += sample->bytes_req;
+ total_allocated_bytes_ += sample->bytes_alloc;
+ nr_allocations_++;
+ }
+
+ void MergeSample(SlabSample* sample1, SlabSample* sample2) override {
+ sample1->bytes_req += sample2->bytes_req;
+ sample1->bytes_alloc += sample2->bytes_alloc;
+ sample1->sample_count += sample2->sample_count;
+ }
+
+ private:
+ ThreadTree* thread_tree_;
+ uint64_t total_requested_bytes_;
+ uint64_t total_allocated_bytes_;
+ uint64_t nr_allocations_;
+ uint64_t nr_frees_;
+ uint64_t nr_cross_cpu_allocations_;
+
+ std::unordered_map<uint64_t, SlabFormat*> event_id_to_format_map_;
+ std::vector<std::unique_ptr<SlabFormat>> formats_;
+ std::unordered_map<uint64_t, std::pair<uint32_t, SlabSample*>>
+ alloc_cpu_record_map_;
+};
+
+using SlabSampleTreeSorter = SampleTreeSorter<SlabSample>;
+using SlabSampleTreeDisplayer = SampleTreeDisplayer<SlabSample, SlabSampleTree>;
+
+struct EventAttrWithName {
+ perf_event_attr attr;
+ std::string name;
+ std::vector<uint64_t> event_ids;
+};
+
+class KmemCommand : public Command {
+ public:
+ KmemCommand()
+ : Command(
+ "kmem", "collect kernel memory allocation information",
+ // clang-format off
+"Usage: kmem (record [record options] | report [report options])\n"
+"kmem record\n"
+"-g Enable call graph recording. Same as '--call-graph fp'.\n"
+"--slab Collect slab allocation information. Default option.\n"
+"Other record options provided by simpleperf record command are also available.\n"
+"kmem report\n"
+"--children Print the accumulated allocation info appeared in the callchain.\n"
+" Can be used on perf.data recorded with `--call-graph fp` option.\n"
+"-g [callee|caller] Print call graph for perf.data recorded with\n"
+" `--call-graph fp` option. If callee mode is used, the graph\n"
+" shows how functions are called from others. Otherwise, the\n"
+" graph shows how functions call others. Default is callee\n"
+" mode. The percentage shown in the graph is determined by\n"
+" the hit count of the callchain.\n"
+"-i Specify path of record file, default is perf.data\n"
+"-o report_file_name Set report file name, default is stdout.\n"
+"--slab Report slab allocation information. Default option.\n"
+"--slab-sort key1,key2,...\n"
+" Select the keys to sort and print slab allocation information.\n"
+" Should be used with --slab option. Possible keys include:\n"
+" hit -- the allocation count.\n"
+" caller -- the function calling allocation.\n"
+" ptr -- the address of the allocated space.\n"
+" bytes_req -- the total requested space size.\n"
+" bytes_alloc -- the total allocated space size.\n"
+" fragment -- the extra allocated space size\n"
+" (bytes_alloc - bytes_req).\n"
+" gfp_flags -- the flags used for allocation.\n"
+" pingpong -- the count of allocations that are freed not on\n"
+" the cpu allocating them.\n"
+" The default slab sort keys are:\n"
+" hit,caller,bytes_req,bytes_alloc,fragment,pingpong.\n"
+ // clang-format on
+ ),
+ is_record_(false),
+ use_slab_(false),
+ accumulate_callchain_(false),
+ print_callgraph_(false),
+ callgraph_show_callee_(false),
+ record_filename_("perf.data"),
+ record_file_arch_(GetBuildArch()) {}
+
+ bool Run(const std::vector<std::string>& args);
+
+ private:
+ bool ParseOptions(const std::vector<std::string>& args,
+ std::vector<std::string>* left_args);
+ bool RecordKmemInfo(const std::vector<std::string>& record_args);
+ bool ReportKmemInfo();
+ bool PrepareToBuildSampleTree();
+ void ReadEventAttrsFromRecordFile();
+ bool ReadFeaturesFromRecordFile();
+ bool ReadSampleTreeFromRecordFile();
+ bool ProcessRecord(std::unique_ptr<Record> record);
+ void ProcessTracingData(const std::vector<char>& data);
+ bool PrintReport();
+ void PrintReportContext(FILE* fp);
+ void PrintSlabReportContext(FILE* fp);
+
+ bool is_record_;
+ bool use_slab_;
+ std::vector<std::string> slab_sort_keys_;
+ bool accumulate_callchain_;
+ bool print_callgraph_;
+ bool callgraph_show_callee_;
+
+ std::string record_filename_;
+ std::unique_ptr<RecordFileReader> record_file_reader_;
+ std::vector<EventAttrWithName> event_attrs_;
+ std::string record_cmdline_;
+ ArchType record_file_arch_;
+
+ ThreadTree thread_tree_;
+ SlabSampleTree slab_sample_tree_;
+ std::unique_ptr<SlabSampleTreeBuilder> slab_sample_tree_builder_;
+ std::unique_ptr<SlabSampleTreeSorter> slab_sample_tree_sorter_;
+ std::unique_ptr<SlabSampleTreeDisplayer> slab_sample_tree_displayer_;
+
+ std::string report_filename_;
+};
+
+bool KmemCommand::Run(const std::vector<std::string>& args) {
+ std::vector<std::string> left_args;
+ if (!ParseOptions(args, &left_args)) {
+ return false;
+ }
+ if (!use_slab_) {
+ use_slab_ = true;
+ }
+ if (is_record_) {
+ return RecordKmemInfo(left_args);
+ }
+ return ReportKmemInfo();
+}
+
+bool KmemCommand::ParseOptions(const std::vector<std::string>& args,
+ std::vector<std::string>* left_args) {
+ if (args.empty()) {
+ LOG(ERROR) << "No subcommand specified";
+ return false;
+ }
+ if (args[0] == "record") {
+ if (!IsRoot()) {
+ LOG(ERROR) << "simpleperf kmem record command needs root privilege";
+ return false;
+ }
+ is_record_ = true;
+ size_t i;
+ for (i = 1; i < args.size() && !args[i].empty() && args[i][0] == '-'; ++i) {
+ if (args[i] == "-g") {
+ left_args->push_back("--call-graph");
+ left_args->push_back("fp");
+ } else if (args[i] == "--slab") {
+ use_slab_ = true;
+ } else {
+ left_args->push_back(args[i]);
+ }
+ }
+ left_args->insert(left_args->end(), args.begin() + i, args.end());
+ } else if (args[0] == "report") {
+ is_record_ = false;
+ for (size_t i = 1; i < args.size(); ++i) {
+ if (args[i] == "--children") {
+ accumulate_callchain_ = true;
+ } else if (args[i] == "-g") {
+ print_callgraph_ = true;
+ accumulate_callchain_ = true;
+ callgraph_show_callee_ = true;
+ if (i + 1 < args.size() && args[i + 1][0] != '-') {
+ ++i;
+ if (args[i] == "callee") {
+ callgraph_show_callee_ = true;
+ } else if (args[i] == "caller") {
+ callgraph_show_callee_ = false;
+ } else {
+ LOG(ERROR) << "Unknown argument with -g option: " << args[i];
+ return false;
+ }
+ }
+ } else if (args[i] == "-i") {
+ if (!NextArgumentOrError(args, &i)) {
+ return false;
+ }
+ record_filename_ = args[i];
+ } else if (args[i] == "-o") {
+ if (!NextArgumentOrError(args, &i)) {
+ return false;
+ }
+ report_filename_ = args[i];
+ } else if (args[i] == "--slab") {
+ use_slab_ = true;
+ } else if (args[i] == "--slab-sort") {
+ if (!NextArgumentOrError(args, &i)) {
+ return false;
+ }
+ slab_sort_keys_ = android::base::Split(args[i], ",");
+ } else {
+ ReportUnknownOption(args, i);
+ return false;
+ }
+ }
+ } else {
+ LOG(ERROR) << "Unknown subcommand for " << Name() << ": " << args[0]
+ << ". Try `simpleperf help " << Name() << "`";
+ return false;
+ }
+ return true;
+}
+
+bool KmemCommand::RecordKmemInfo(const std::vector<std::string>& record_args) {
+ std::vector<std::string> args;
+ if (use_slab_) {
+ std::vector<std::string> trace_events = {
+ "kmem:kmalloc", "kmem:kmem_cache_alloc",
+ "kmem:kmalloc_node", "kmem:kmem_cache_alloc_node",
+ "kmem:kfree", "kmem:kmem_cache_free"};
+ for (const auto& name : trace_events) {
+ if (ParseEventType(name)) {
+ args.insert(args.end(), {"-e", name});
+ }
+ }
+ }
+ if (args.empty()) {
+ LOG(ERROR) << "Kernel allocation related trace events are not supported.";
+ return false;
+ }
+ args.push_back("-a");
+ args.insert(args.end(), record_args.begin(), record_args.end());
+ std::unique_ptr<Command> record_cmd = CreateCommandInstance("record");
+ if (record_cmd == nullptr) {
+ LOG(ERROR) << "record command isn't available";
+ return false;
+ }
+ return record_cmd->Run(args);
+}
+
+bool KmemCommand::ReportKmemInfo() {
+ if (!PrepareToBuildSampleTree()) {
+ return false;
+ }
+ record_file_reader_ = RecordFileReader::CreateInstance(record_filename_);
+ if (record_file_reader_ == nullptr) {
+ return false;
+ }
+ ReadEventAttrsFromRecordFile();
+ if (!ReadFeaturesFromRecordFile()) {
+ return false;
+ }
+ if (!ReadSampleTreeFromRecordFile()) {
+ return false;
+ }
+ if (!PrintReport()) {
+ return false;
+ }
+ return true;
+}
+
+bool KmemCommand::PrepareToBuildSampleTree() {
+ if (use_slab_) {
+ if (slab_sort_keys_.empty()) {
+ slab_sort_keys_ = {"hit", "caller", "bytes_req",
+ "bytes_alloc", "fragment", "pingpong"};
+ }
+ SampleComparator<SlabSample> comparator;
+ SampleComparator<SlabSample> sort_comparator;
+ SampleDisplayer<SlabSample, SlabSampleTree> displayer;
+ std::string accumulated_name = accumulate_callchain_ ? "Accumulated_" : "";
+
+ if (print_callgraph_) {
+ displayer.AddExclusiveDisplayFunction(DisplayCallgraph);
+ }
+
+ for (const auto& key : slab_sort_keys_) {
+ if (key == "hit") {
+ sort_comparator.AddCompareFunction(CompareSampleCount);
+ displayer.AddDisplayFunction(accumulated_name + "Hit",
+ DisplaySampleCount);
+ } else if (key == "caller") {
+ comparator.AddCompareFunction(CompareSymbol);
+ displayer.AddDisplayFunction("Caller", DisplaySymbol);
+ } else if (key == "ptr") {
+ comparator.AddCompareFunction(ComparePtr);
+ displayer.AddDisplayFunction("Ptr", DisplayPtr);
+ } else if (key == "bytes_req") {
+ sort_comparator.AddCompareFunction(CompareBytesReq);
+ displayer.AddDisplayFunction(accumulated_name + "BytesReq",
+ DisplayBytesReq);
+ } else if (key == "bytes_alloc") {
+ sort_comparator.AddCompareFunction(CompareBytesAlloc);
+ displayer.AddDisplayFunction(accumulated_name + "BytesAlloc",
+ DisplayBytesAlloc);
+ } else if (key == "fragment") {
+ sort_comparator.AddCompareFunction(CompareFragment);
+ displayer.AddDisplayFunction(accumulated_name + "Fragment",
+ DisplayFragment);
+ } else if (key == "gfp_flags") {
+ comparator.AddCompareFunction(CompareGfpFlags);
+ displayer.AddDisplayFunction("GfpFlags", DisplayGfpFlags);
+ } else if (key == "pingpong") {
+ sort_comparator.AddCompareFunction(CompareCrossCpuAllocations);
+ displayer.AddDisplayFunction("Pingpong", DisplayCrossCpuAllocations);
+ } else {
+ LOG(ERROR) << "Unknown sort key for slab allocation: " << key;
+ return false;
+ }
+ slab_sample_tree_builder_.reset(
+ new SlabSampleTreeBuilder(comparator, &thread_tree_));
+ slab_sample_tree_builder_->SetCallChainSampleOptions(
+ accumulate_callchain_, print_callgraph_, !callgraph_show_callee_,
+ false);
+ sort_comparator.AddComparator(comparator);
+ slab_sample_tree_sorter_.reset(new SlabSampleTreeSorter(sort_comparator));
+ slab_sample_tree_displayer_.reset(new SlabSampleTreeDisplayer(displayer));
+ }
+ }
+ return true;
+}
+
+void KmemCommand::ReadEventAttrsFromRecordFile() {
+ std::vector<AttrWithId> attrs = record_file_reader_->AttrSection();
+ for (const auto& attr_with_id : attrs) {
+ EventAttrWithName attr;
+ attr.attr = *attr_with_id.attr;
+ attr.event_ids = attr_with_id.ids;
+ attr.name = GetEventNameByAttr(attr.attr);
+ event_attrs_.push_back(attr);
+ }
+}
+
+bool KmemCommand::ReadFeaturesFromRecordFile() {
+ std::string arch =
+ record_file_reader_->ReadFeatureString(PerfFileFormat::FEAT_ARCH);
+ if (!arch.empty()) {
+ record_file_arch_ = GetArchType(arch);
+ if (record_file_arch_ == ARCH_UNSUPPORTED) {
+ return false;
+ }
+ }
+ std::vector<std::string> cmdline = record_file_reader_->ReadCmdlineFeature();
+ if (!cmdline.empty()) {
+ record_cmdline_ = android::base::Join(cmdline, ' ');
+ }
+ if (record_file_reader_->HasFeature(PerfFileFormat::FEAT_TRACING_DATA)) {
+ std::vector<char> tracing_data;
+ if (!record_file_reader_->ReadFeatureSection(
+ PerfFileFormat::FEAT_TRACING_DATA, &tracing_data)) {
+ return false;
+ }
+ ProcessTracingData(tracing_data);
+ }
+ return true;
+}
+
+bool KmemCommand::ReadSampleTreeFromRecordFile() {
+ if (!record_file_reader_->ReadDataSection(
+ [this](std::unique_ptr<Record> record) {
+ return ProcessRecord(std::move(record));
+ })) {
+ return false;
+ }
+ if (use_slab_) {
+ slab_sample_tree_ = slab_sample_tree_builder_->GetSampleTree();
+ slab_sample_tree_sorter_->Sort(slab_sample_tree_.samples, print_callgraph_);
+ }
+ return true;
+}
+
+bool KmemCommand::ProcessRecord(std::unique_ptr<Record> record) {
+ thread_tree_.Update(*record);
+ if (record->type() == PERF_RECORD_SAMPLE) {
+ if (use_slab_) {
+ slab_sample_tree_builder_->ProcessSampleRecord(
+ *static_cast<const SampleRecord*>(record.get()));
+ }
+ } else if (record->type() == PERF_RECORD_TRACING_DATA) {
+ const auto& r = *static_cast<TracingDataRecord*>(record.get());
+ ProcessTracingData(r.data);
+ }
+ return true;
+}
+
+void KmemCommand::ProcessTracingData(const std::vector<char>& data) {
+ Tracing tracing(data);
+ for (auto& attr : event_attrs_) {
+ if (attr.attr.type == PERF_TYPE_TRACEPOINT) {
+ uint64_t trace_event_id = attr.attr.config;
+ attr.name = tracing.GetTracingEventNameHavingId(trace_event_id);
+ TracingFormat format = tracing.GetTracingFormatHavingId(trace_event_id);
+ if (use_slab_) {
+ if (format.name == "kmalloc" || format.name == "kmem_cache_alloc" ||
+ format.name == "kmalloc_node" ||
+ format.name == "kmem_cache_alloc_node") {
+ SlabFormat f;
+ f.type = SlabFormat::KMEM_ALLOC;
+ format.GetField("call_site", f.call_site);
+ format.GetField("ptr", f.ptr);
+ format.GetField("bytes_req", f.bytes_req);
+ format.GetField("bytes_alloc", f.bytes_alloc);
+ format.GetField("gfp_flags", f.gfp_flags);
+ slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
+ } else if (format.name == "kfree" || format.name == "kmem_cache_free") {
+ SlabFormat f;
+ f.type = SlabFormat::KMEM_FREE;
+ format.GetField("call_site", f.call_site);
+ format.GetField("ptr", f.ptr);
+ slab_sample_tree_builder_->AddSlabFormat(attr.event_ids, f);
+ }
+ }
+ }
+ }
+}
+
+bool KmemCommand::PrintReport() {
+ std::unique_ptr<FILE, decltype(&fclose)> file_handler(nullptr, fclose);
+ FILE* report_fp = stdout;
+ if (!report_filename_.empty()) {
+ file_handler.reset(fopen(report_filename_.c_str(), "w"));
+ if (file_handler == nullptr) {
+ PLOG(ERROR) << "failed to open " << report_filename_;
+ return false;
+ }
+ report_fp = file_handler.get();
+ }
+ PrintReportContext(report_fp);
+ if (use_slab_) {
+ fprintf(report_fp, "\n\n");
+ PrintSlabReportContext(report_fp);
+ slab_sample_tree_displayer_->DisplaySamples(
+ report_fp, slab_sample_tree_.samples, &slab_sample_tree_);
+ }
+ return true;
+}
+
+void KmemCommand::PrintReportContext(FILE* fp) {
+ if (!record_cmdline_.empty()) {
+ fprintf(fp, "Cmdline: %s\n", record_cmdline_.c_str());
+ }
+ fprintf(fp, "Arch: %s\n", GetArchString(record_file_arch_).c_str());
+ for (const auto& attr : event_attrs_) {
+ fprintf(fp, "Event: %s (type %u, config %llu)\n", attr.name.c_str(),
+ attr.attr.type, attr.attr.config);
+ }
+}
+
+void KmemCommand::PrintSlabReportContext(FILE* fp) {
+ fprintf(fp, "Slab allocation information:\n");
+ fprintf(fp, "Total requested bytes: %" PRIu64 "\n",
+ slab_sample_tree_.total_requested_bytes);
+ fprintf(fp, "Total allocated bytes: %" PRIu64 "\n",
+ slab_sample_tree_.total_allocated_bytes);
+ uint64_t fragment = slab_sample_tree_.total_allocated_bytes -
+ slab_sample_tree_.total_requested_bytes;
+ double percentage = 0.0;
+ if (slab_sample_tree_.total_allocated_bytes != 0) {
+ percentage = 100.0 * fragment / slab_sample_tree_.total_allocated_bytes;
+ }
+ fprintf(fp, "Total fragment: %" PRIu64 ", %f%%\n", fragment, percentage);
+ fprintf(fp, "Total allocations: %" PRIu64 "\n",
+ slab_sample_tree_.nr_allocations);
+ fprintf(fp, "Total frees: %" PRIu64 "\n", slab_sample_tree_.nr_frees);
+ percentage = 0.0;
+ if (slab_sample_tree_.nr_allocations != 0) {
+ percentage = 100.0 * slab_sample_tree_.nr_cross_cpu_allocations /
+ slab_sample_tree_.nr_allocations;
+ }
+ fprintf(fp, "Total cross cpu allocation/free: %" PRIu64 ", %f%%\n",
+ slab_sample_tree_.nr_cross_cpu_allocations, percentage);
+ fprintf(fp, "\n");
+}
+
+} // namespace
+
+void RegisterKmemCommand() {
+ RegisterCommand("kmem",
+ [] { return std::unique_ptr<Command>(new KmemCommand()); });
+}
diff --git a/simpleperf/cmd_kmem_test.cpp b/simpleperf/cmd_kmem_test.cpp
new file mode 100644
index 00000000..dd18858e
--- /dev/null
+++ b/simpleperf/cmd_kmem_test.cpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <android-base/file.h>
+#include <android-base/strings.h>
+#include <android-base/test_utils.h>
+
+#include <memory>
+
+#include "command.h"
+#include "environment.h"
+#include "event_selection_set.h"
+#include "get_test_data.h"
+#include "record.h"
+#include "record_file.h"
+#include "test_util.h"
+
+static std::unique_ptr<Command> KmemCmd() {
+ return CreateCommandInstance("kmem");
+}
+
+struct ReportResult {
+ bool success;
+ std::string content;
+ std::vector<std::string> lines;
+};
+
+static void KmemReportRawFile(const std::string& perf_data,
+ const std::vector<std::string>& additional_args,
+ ReportResult* result) {
+ result->success = false;
+ TemporaryFile tmp_file;
+ std::vector<std::string> args = {"report", "-i", perf_data, "-o",
+ tmp_file.path};
+ args.insert(args.end(), additional_args.begin(), additional_args.end());
+ ASSERT_TRUE(KmemCmd()->Run(args));
+ ASSERT_TRUE(android::base::ReadFileToString(tmp_file.path, &result->content));
+ ASSERT_TRUE(!result->content.empty());
+ std::vector<std::string> raw_lines =
+ android::base::Split(result->content, "\n");
+ result->lines.clear();
+ for (const auto& line : raw_lines) {
+ std::string s = android::base::Trim(line);
+ if (!s.empty()) {
+ result->lines.push_back(s);
+ }
+ }
+ ASSERT_GE(result->lines.size(), 2u);
+ result->success = true;
+}
+
+static void KmemReportFile(const std::string& perf_data,
+ const std::vector<std::string>& additional_args,
+ ReportResult* result) {
+ KmemReportRawFile(GetTestData(perf_data), additional_args, result);
+}
+
+#if defined(__linux__)
+
+static bool RunKmemRecordCmd(std::vector<std::string> v,
+ const char* output_file = nullptr) {
+ std::unique_ptr<TemporaryFile> tmpfile;
+ std::string out_file;
+ if (output_file != nullptr) {
+ out_file = output_file;
+ } else {
+ tmpfile.reset(new TemporaryFile);
+ out_file = tmpfile->path;
+ }
+ v.insert(v.begin(), "record");
+ v.insert(v.end(), {"-o", out_file, "sleep", SLEEP_SEC});
+ return KmemCmd()->Run(v);
+}
+
+TEST(kmem_cmd, record_slab) {
+ TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab"})));
+}
+
+TEST(kmem_cmd, record_fp_callchain_sampling) {
+ TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab", "-g"})));
+ TEST_IN_ROOT(ASSERT_TRUE(RunKmemRecordCmd({"--slab", "--call-graph", "fp"})));
+}
+
+TEST(kmem_cmd, record_and_report) {
+ TemporaryFile tmp_file;
+ TEST_IN_ROOT({
+ ASSERT_TRUE(RunKmemRecordCmd({"--slab"}, tmp_file.path));
+ ReportResult result;
+ KmemReportRawFile(tmp_file.path, {}, &result);
+ ASSERT_TRUE(result.success);
+ });
+}
+
+TEST(kmem_cmd, record_and_report_callgraph) {
+ TemporaryFile tmp_file;
+ TEST_IN_ROOT({
+ ASSERT_TRUE(RunKmemRecordCmd({"--slab", "-g"}, tmp_file.path));
+ ReportResult result;
+ KmemReportRawFile(tmp_file.path, {"-g"}, &result);
+ ASSERT_TRUE(result.success);
+ });
+}
+
+#endif
+
+TEST(kmem_cmd, report) {
+ ReportResult result;
+ KmemReportFile(PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD, {}, &result);
+ ASSERT_TRUE(result.success);
+ ASSERT_NE(result.content.find("kmem:kmalloc"), std::string::npos);
+ ASSERT_NE(result.content.find("__alloc_skb"), std::string::npos);
+}
+
+TEST(kmem_cmd, report_all_sort_options) {
+ ReportResult result;
+ KmemReportFile(
+ PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD,
+ {"--slab-sort",
+ "hit,caller,ptr,bytes_req,bytes_alloc,fragment,gfp_flags,pingpong"},
+ &result);
+ ASSERT_TRUE(result.success);
+ ASSERT_NE(result.content.find("Ptr"), std::string::npos);
+ ASSERT_NE(result.content.find("GfpFlags"), std::string::npos);
+}
+
+TEST(kmem_cmd, report_callgraph) {
+ ReportResult result;
+ KmemReportFile(PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD, {"-g"}, &result);
+ ASSERT_TRUE(result.success);
+ ASSERT_NE(result.content.find("kmem:kmalloc"), std::string::npos);
+ ASSERT_NE(result.content.find("__alloc_skb"), std::string::npos);
+ ASSERT_NE(result.content.find("system_call_fastpath"), std::string::npos);
+}
diff --git a/simpleperf/command.cpp b/simpleperf/command.cpp
index 4db88b77..a5ae2a89 100644
--- a/simpleperf/command.cpp
+++ b/simpleperf/command.cpp
@@ -72,6 +72,7 @@ const std::vector<std::string> GetAllCommandNames() {
extern void RegisterDumpRecordCommand();
extern void RegisterHelpCommand();
extern void RegisterListCommand();
+extern void RegisterKmemCommand();
extern void RegisterRecordCommand();
extern void RegisterReportCommand();
extern void RegisterReportSampleCommand();
@@ -82,6 +83,7 @@ class CommandRegister {
CommandRegister() {
RegisterDumpRecordCommand();
RegisterHelpCommand();
+ RegisterKmemCommand();
RegisterReportCommand();
RegisterReportSampleCommand();
#if defined(__linux__)
diff --git a/simpleperf/dso.cpp b/simpleperf/dso.cpp
index 4453817a..45ae124d 100644
--- a/simpleperf/dso.cpp
+++ b/simpleperf/dso.cpp
@@ -262,8 +262,7 @@ bool Dso::LoadKernel() {
if (all_zero) {
LOG(WARNING)
<< "Symbol addresses in /proc/kallsyms on device are all zero. "
- "`echo 0 >/proc/sys/kernel/kptr_restrict` or use root privilege "
- "if possible.";
+ "`echo 0 >/proc/sys/kernel/kptr_restrict` if possible.";
symbols_.clear();
return false;
}
@@ -296,8 +295,7 @@ bool Dso::LoadKernel() {
}
if (all_zero) {
LOG(WARNING) << "Symbol addresses in /proc/kallsyms are all zero. "
- "`echo 0 >/proc/sys/kernel/kptr_restrict` or use root "
- "privilege if possible.";
+ "`echo 0 >/proc/sys/kernel/kptr_restrict` if possible.";
symbols_.clear();
return false;
}
diff --git a/simpleperf/get_test_data.h b/simpleperf/get_test_data.h
index f5ec5e74..f5cdb353 100644
--- a/simpleperf/get_test_data.h
+++ b/simpleperf/get_test_data.h
@@ -74,4 +74,7 @@ static const std::string PERF_DATA_WITH_KERNEL_SYMBOL = "perf_with_kernel_symbol
// perf_with_symbols.data is generated by `sudo simpleperf record --dump-symbols sleep 1`.
static const std::string PERF_DATA_WITH_SYMBOLS = "perf_with_symbols.data";
+// perf_kmem_slab_callgraph.data is generated by `simpleperf kmem record --slab --call-graph fp sleep 0.0001`.
+static const std::string PERF_DATA_WITH_KMEM_SLAB_CALLGRAPH_RECORD = "perf_with_kmem_slab_callgraph.data";
+
#endif // SIMPLE_PERF_GET_TEST_DATA_H_
diff --git a/simpleperf/sample_tree.h b/simpleperf/sample_tree.h
index c35a91a2..86becbe3 100644
--- a/simpleperf/sample_tree.h
+++ b/simpleperf/sample_tree.h
@@ -53,9 +53,9 @@ class SampleTreeBuilder {
public:
SampleTreeBuilder(SampleComparator<EntryT> comparator)
: sample_set_(comparator),
+ accumulate_callchain_(false),
callchain_sample_set_(comparator),
use_branch_address_(false),
- accumulate_callchain_(false),
build_callchain_(false),
use_caller_as_callchain_root_(false),
strict_unwind_arch_check_(false) {}
@@ -246,6 +246,7 @@ class SampleTreeBuilder {
}
std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
+ bool accumulate_callchain_;
private:
// If a CallChainSample is filtered out, it is stored in callchain_sample_set_
@@ -254,7 +255,6 @@ class SampleTreeBuilder {
std::vector<std::unique_ptr<EntryT>> sample_storage_;
bool use_branch_address_;
- bool accumulate_callchain_;
bool build_callchain_;
bool use_caller_as_callchain_root_;
bool strict_unwind_arch_check_;
diff --git a/simpleperf/testdata/perf_with_kmem_slab_callgraph.data b/simpleperf/testdata/perf_with_kmem_slab_callgraph.data
new file mode 100644
index 00000000..255ba2f9
--- /dev/null
+++ b/simpleperf/testdata/perf_with_kmem_slab_callgraph.data
Binary files differ
diff --git a/simpleperf/thread_tree.cpp b/simpleperf/thread_tree.cpp
index 42104f39..58b02006 100644
--- a/simpleperf/thread_tree.cpp
+++ b/simpleperf/thread_tree.cpp
@@ -212,6 +212,11 @@ const Symbol* ThreadTree::FindSymbol(const MapEntry* map, uint64_t ip) {
return symbol;
}
+const Symbol* ThreadTree::FindKernelSymbol(uint64_t ip) {
+ const MapEntry* map = FindMap(nullptr, ip, true);
+ return FindSymbol(map, ip);
+}
+
void ThreadTree::ClearThreadAndMap() {
thread_tree_.clear();
thread_comm_storage_.clear();
diff --git a/simpleperf/thread_tree.h b/simpleperf/thread_tree.h
index f6853503..b58c5061 100644
--- a/simpleperf/thread_tree.h
+++ b/simpleperf/thread_tree.h
@@ -83,6 +83,7 @@ class ThreadTree {
// Find map for an ip address when we don't know whether it is in kernel.
const MapEntry* FindMap(const ThreadEntry* thread, uint64_t ip);
const Symbol* FindSymbol(const MapEntry* map, uint64_t ip);
+ const Symbol* FindKernelSymbol(uint64_t ip);
const MapEntry* UnknownMap() const {
return &unknown_map_;
}
diff --git a/simpleperf/tracing.h b/simpleperf/tracing.h
index bd3c1bfe..86ec9815 100644
--- a/simpleperf/tracing.h
+++ b/simpleperf/tracing.h
@@ -22,6 +22,7 @@
#include <android-base/logging.h>
#include "event_type.h"
+#include "utils.h"
struct TracingField {
std::string name;
@@ -31,16 +32,25 @@ struct TracingField {
bool is_signed;
};
+struct TracingFieldPlace {
+ uint32_t offset;
+ uint32_t size;
+
+ uint64_t ReadFromData(const char* raw_data) {
+ return ConvertBytesToValue(raw_data + offset, size);
+ }
+};
+
struct TracingFormat {
std::string system_name;
std::string name;
uint64_t id;
std::vector<TracingField> fields;
- void GetField(const std::string& name, uint32_t& offset, uint32_t& size) {
+ void GetField(const std::string& name, TracingFieldPlace& place) {
const TracingField& field = GetField(name);
- offset = field.offset;
- size = field.elem_size;
+ place.offset = field.offset;
+ place.size = field.elem_size;
}
private: