diff options
Diffstat (limited to 'simpleperf/scripts/pprof_proto_generator.py')
-rw-r--r-- | simpleperf/scripts/pprof_proto_generator.py | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py new file mode 100644 index 00000000..5f8d143b --- /dev/null +++ b/simpleperf/scripts/pprof_proto_generator.py @@ -0,0 +1,549 @@ +#!/usr/bin/env python +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be + used by pprof. + + Example: + python app_profiler.py + python pprof_proto_generator.py + pprof -text pprof.profile +""" + +from __future__ import print_function +import argparse +import os +import os.path +import profile_pb2 +import re +import shutil +import subprocess +import sys +import time + +from annotate import Addr2Line +from simpleperf_report_lib import * +from utils import * + + +def load_pprof_profile(filename): + profile = profile_pb2.Profile() + with open(filename, "rb") as f: + profile.ParseFromString(f.read()) + return profile + + +def store_pprof_profile(filename, profile): + with open(filename, 'wb') as f: + f.write(profile.SerializeToString()) + + +class PprofProfilePrinter(object): + + def __init__(self, profile): + self.profile = profile + self.string_table = profile.string_table + + def show(self): + p = self.profile + sub_space = ' ' + print('Profile {') + print('%d sample_types' % len(p.sample_type)) + for i in range(len(p.sample_type)): + print('sample_type[%d] = ' % i, end='') + self.show_value_type(p.sample_type[i]) + print('%d samples' % len(p.sample)) + for i in range(len(p.sample)): + print('sample[%d]:' % i) + self.show_sample(p.sample[i], sub_space) + print('%d mappings' % len(p.mapping)) + for i in range(len(p.mapping)): + print('mapping[%d]:' % i) + self.show_mapping(p.mapping[i], sub_space) + print('%d locations' % len(p.location)) + for i in range(len(p.location)): + print('location[%d]:' % i) + self.show_location(p.location[i], sub_space) + for i in range(len(p.function)): + print('function[%d]:' % i) + self.show_function(p.function[i], sub_space) + print('%d strings' % len(p.string_table)) + for i in range(len(p.string_table)): + print('string[%d]: %s' % (i, p.string_table[i])) + print('drop_frames: %s' % self.string(p.drop_frames)) + print('keep_frames: %s' % self.string(p.keep_frames)) + print('time_nanos: %u' % p.time_nanos) + print('duration_nanos: %u' % p.duration_nanos) + print('period_type: ', end='') + self.show_value_type(p.period_type) + print('period: %u' % p.period) + for i in range(len(p.comment)): + print('comment[%d] = %s' % (i, self.string(p.comment[i]))) + print('default_sample_type: %d' % p.default_sample_type) + print('} // Profile') + print() + + def show_value_type(self, value_type, space=''): + print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % + (space, value_type.type, value_type.unit, + self.string(value_type.type), self.string(value_type.unit))) + + def show_sample(self, sample, space=''): + sub_space = space + ' ' + for i in range(len(sample.location_id)): + print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) + self.show_location_id(sample.location_id[i], sub_space) + for i in range(len(sample.value)): + print('%svalue[%d] = %d' % (space, i, sample.value[i])) + for i in range(len(sample.label)): + print('%slabel[%d] = ', (space, i)) + + def show_location_id(self, location_id, space=''): + location = self.profile.location[location_id - 1] + self.show_location(location, space) + + def show_location(self, location, space=''): + sub_space = space + ' ' + print('%sid: %d' % (space, location.id)) + print('%smapping_id: %d' % (space, location.mapping_id)) + self.show_mapping_id(location.mapping_id, sub_space) + print('%saddress: %x' % (space, location.address)) + for i in range(len(location.line)): + print('%sline[%d]:' % (space, i)) + self.show_line(location.line[i], sub_space) + + def show_mapping_id(self, mapping_id, space=''): + mapping = self.profile.mapping[mapping_id - 1] + self.show_mapping(mapping, space) + + def show_mapping(self, mapping, space=''): + print('%sid: %d' % (space, mapping.id)) + print('%smemory_start: %x' % (space, mapping.memory_start)) + print('%smemory_limit: %x' % (space, mapping.memory_limit)) + print('%sfile_offset: %x' % (space, mapping.file_offset)) + print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), + mapping.filename)) + print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), + mapping.build_id)) + print('%shas_functions: %s' % (space, mapping.has_functions)) + print('%shas_filenames: %s' % (space, mapping.has_filenames)) + print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) + print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) + + def show_line(self, line, space=''): + sub_space = space + ' ' + print('%sfunction_id: %d' % (space, line.function_id)) + self.show_function_id(line.function_id, sub_space) + print('%sline: %d' % (space, line.line)) + + def show_function_id(self, function_id, space=''): + function = self.profile.function[function_id - 1] + self.show_function(function, space) + + def show_function(self, function, space=''): + print('%sid: %d' % (space, function.id)) + print('%sname: %s' % (space, self.string(function.name))) + print('%ssystem_name: %s' % (space, self.string(function.system_name))) + print('%sfilename: %s' % (space, self.string(function.filename))) + print('%sstart_line: %d' % (space, function.start_line)) + + def show_label(self, label, space=''): + print('%sLabel(%s =', space, self.string(label.key), end='') + if label.HasField('str'): + print('%s)' % self.get_string(label.str)) + else: + print('%d)' % label.num) + + def string(self, id): + return self.string_table[id] + + +class Sample(object): + + def __init__(self): + self.location_ids = [] + self.values = {} + + def add_location_id(self, location_id): + self.location_ids.append(location_id) + + def add_value(self, id, value): + self.values[id] = self.values.get(id, 0) + value + + def add_values(self, values): + for id in values.keys(): + value = values[id] + self.add_value(id, value) + + @property + def key(self): + return tuple(self.location_ids) + + +class Location(object): + + def __init__(self, mapping_id, address, vaddr_in_dso): + self.id = -1 # unset + self.mapping_id = mapping_id + self.address = address + self.vaddr_in_dso = vaddr_in_dso + self.lines = [] + + @property + def key(self): + return (self.mapping_id, self.address) + + +class Line(object): + + def __init__(self): + self.function_id = 0 + self.line = 0 + + +class Mapping(object): + + def __init__(self, start, end, pgoff, filename_id, build_id_id): + self.id = -1 # unset + self.memory_start = start + self.memory_limit = end + self.file_offset = pgoff + self.filename_id = filename_id + self.build_id_id = build_id_id + + @property + def key(self): + return ( + self.memory_start, + self.memory_limit, + self.file_offset, + self.filename_id, + self.build_id_id) + + +class Function(object): + + def __init__(self, name_id, dso_name_id, vaddr_in_dso): + self.id = -1 # unset + self.name_id = name_id + self.dso_name_id = dso_name_id + self.vaddr_in_dso = vaddr_in_dso + self.source_filename_id = 0 + self.start_line = 0 + + @property + def key(self): + return (self.name_id, self.dso_name_id) + + +class PprofProfileGenerator(object): + + def __init__(self, config): + self.config = config + self.lib = ReportLib() + + if config.get('binary_cache_dir'): + self.lib.SetSymfs(config['binary_cache_dir']) + if config.get('record_file'): + self.lib.SetRecordFile(config['record_file']) + if config.get('kallsyms'): + self.lib.SetKallsymsFile(config['kallsyms']) + self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None + if config.get('pid_filters'): + self.pid_filter = {int(x) for x in config['pid_filters']} + else: + self.pid_filter = None + if config.get('tid_filters'): + self.tid_filter = {int(x) for x in config['tid_filters']} + else: + self.tid_filter = None + self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None + + def gen(self): + self.profile = profile_pb2.Profile() + self.profile.string_table.append('') + self.string_table = {} + self.sample_types = {} + self.sample_map = {} + self.sample_list = [] + self.location_map = {} + self.location_list = [] + self.mapping_map = {} + self.mapping_list = [] + self.function_map = {} + self.function_list = [] + + # 1. Process all samples in perf.data, aggregate samples. + while True: + report_sample = self.lib.GetNextSample() + if report_sample is None: + self.lib.Close() + break + event = self.lib.GetEventOfCurrentSample() + symbol = self.lib.GetSymbolOfCurrentSample() + callchain = self.lib.GetCallChainOfCurrentSample() + + if not self._filter_report_sample(report_sample): + continue + + sample_type_id = self.get_sample_type_id(event.name) + sample = Sample() + sample.add_value(sample_type_id, 1) + sample.add_value(sample_type_id + 1, report_sample.period) + if self._filter_symbol(symbol): + location_id = self.get_location_id(symbol.vaddr_in_file, symbol) + sample.add_location_id(location_id) + for i in range(callchain.nr): + entry = callchain.entries[i] + if self._filter_symbol(symbol): + location_id = self.get_location_id(entry.ip, entry.symbol) + sample.add_location_id(location_id) + if sample.location_ids: + self.add_sample(sample) + + # 2. Generate line info for locations and functions. + self.gen_source_lines() + + # 3. Produce samples/locations/functions in profile + for sample in self.sample_list: + self.gen_profile_sample(sample) + for mapping in self.mapping_list: + self.gen_profile_mapping(mapping) + for location in self.location_list: + self.gen_profile_location(location) + for function in self.function_list: + self.gen_profile_function(function) + + return self.profile + + def _filter_report_sample(self, sample): + """Return true if the sample can be used.""" + if self.comm_filter: + if sample.thread_comm not in self.comm_filter: + return False + if self.pid_filter: + if sample.pid not in self.pid_filter: + return False + if self.tid_filter: + if sample.tid not in self.tid_filter: + return False + return True + + def _filter_symbol(self, symbol): + if not self.dso_filter or symbol.dso_name in self.dso_filter: + return True + return False + + def get_string_id(self, str): + if len(str) == 0: + return 0 + id = self.string_table.get(str) + if id is not None: + return id + id = len(self.string_table) + 1 + self.string_table[str] = id + self.profile.string_table.append(str) + return id + + def get_string(self, string_id): + return self.profile.string_table[string_id] + + def get_sample_type_id(self, name): + id = self.sample_types.get(name) + if id is not None: + return id + id = len(self.profile.sample_type) + sample_type = self.profile.sample_type.add() + sample_type.type = self.get_string_id('event_' + name + '_samples') + sample_type.unit = self.get_string_id('count') + sample_type = self.profile.sample_type.add() + sample_type.type = self.get_string_id('event_' + name + '_count') + sample_type.unit = self.get_string_id('count') + self.sample_types[name] = id + return id + + def get_location_id(self, ip, symbol): + mapping_id = self.get_mapping_id(symbol.mapping[0], symbol.dso_name) + location = Location(mapping_id, ip, symbol.vaddr_in_file) + # Default line info only contains the function name + line = Line() + line.function_id = self.get_function_id(symbol.symbol_name, symbol.dso_name, + symbol.symbol_addr) + location.lines.append(line) + + exist_location = self.location_map.get(location.key) + if exist_location: + return exist_location.id + # location_id starts from 1 + location.id = len(self.location_list) + 1 + self.location_list.append(location) + self.location_map[location.key] = location + return location.id + + def get_mapping_id(self, report_mapping, filename): + filename_id = self.get_string_id(filename) + build_id = self.lib.GetBuildIdForPath(filename) + if build_id and build_id[0:2] == "0x": + build_id = build_id[2:] + build_id_id = self.get_string_id(build_id) + mapping = Mapping(report_mapping.start, report_mapping.end, + report_mapping.pgoff, filename_id, build_id_id) + exist_mapping = self.mapping_map.get(mapping.key) + if exist_mapping: + return exist_mapping.id + # mapping_id starts from 1 + mapping.id = len(self.mapping_list) + 1 + self.mapping_list.append(mapping) + self.mapping_map[mapping.key] = mapping + return mapping.id + + def get_mapping(self, mapping_id): + return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None + + def get_function_id(self, name, dso_name, vaddr_in_file): + if name == 'unknown': + return 0 + function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) + exist_function = self.function_map.get(function.key) + if exist_function: + return exist_function.id + # function_id starts from 1 + function.id = len(self.function_list) + 1 + self.function_list.append(function) + self.function_map[function.key] = function + return function.id + + def get_function(self, function_id): + return self.function_list[function_id - 1] if function_id > 0 else None + + def add_sample(self, sample): + exist_sample = self.sample_map.get(sample.key) + if exist_sample: + exist_sample.add_values(sample.values) + else: + self.sample_list.append(sample) + self.sample_map[sample.key] = sample + + def gen_source_lines(self): + # 1. Create Addr2line instance + addr2line = Addr2Line(self.config['addr2line_path'], self.config['binary_cache_dir']) + + # 2. Put all needed addresses to it. + for location in self.location_list: + mapping = self.get_mapping(location.mapping_id) + dso_name = self.get_string(mapping.filename_id) + addr2line.add_addr(dso_name, location.vaddr_in_dso) + for function in self.function_list: + dso_name = self.get_string(function.dso_name_id) + addr2line.add_addr(dso_name, function.vaddr_in_dso) + + # 3. Generate source lines. + addr2line.convert_addrs_to_lines() + + # 4. Annotate locations and functions. + for location in self.location_list: + mapping = self.get_mapping(location.mapping_id) + dso_name = self.get_string(mapping.filename_id) + sources = addr2line.get_sources(dso_name, location.vaddr_in_dso) + source_id = 0 + for source in sources: + if source.file and source.function and source.line: + if source_id == 0: + # Clear default line info + location.lines = [] + location.lines.append(self.add_line(source, dso_name)) + source_id += 1 + + for function in self.function_list: + dso_name = self.get_string(function.dso_name_id) + if function.vaddr_in_dso: + sources = addr2line.get_sources(dso_name, function.vaddr_in_dso) + source = sources[0] if sources else None + if source and source.file: + function.source_filename_id = self.get_string_id(source.file) + if source.line: + function.start_line = source.line + + def add_line(self, source, dso_name): + line = Line() + function_id = self.get_function_id(source.function, dso_name, 0) + function = self.get_function(function_id) + function.source_filename_id = self.get_string_id(source.file) + line.function_id = function_id + line.line = source.line + return line + + def gen_profile_sample(self, sample): + profile_sample = self.profile.sample.add() + profile_sample.location_id.extend(sample.location_ids) + sample_type_count = len(self.sample_types) * 2 + values = [0] * sample_type_count + for id in sample.values.keys(): + values[id] = sample.values[id] + profile_sample.value.extend(values) + + def gen_profile_mapping(self, mapping): + profile_mapping = self.profile.mapping.add() + profile_mapping.id = mapping.id + profile_mapping.memory_start = mapping.memory_start + profile_mapping.memory_limit = mapping.memory_limit + profile_mapping.file_offset = mapping.file_offset + profile_mapping.filename = mapping.filename_id + profile_mapping.build_id = mapping.build_id_id + profile_mapping.has_filenames = True + profile_mapping.has_functions = True + profile_mapping.has_line_numbers = True + profile_mapping.has_inline_frames = True + + def gen_profile_location(self, location): + profile_location = self.profile.location.add() + profile_location.id = location.id + profile_location.mapping_id = location.mapping_id + profile_location.address = location.address + for i in range(len(location.lines)): + line = profile_location.line.add() + line.function_id = location.lines[i].function_id + line.line = location.lines[i].line + + def gen_profile_function(self, function): + profile_function = self.profile.function.add() + profile_function.id = function.id + profile_function.name = function.name_id + profile_function.system_name = function.name_id + profile_function.filename = function.source_filename_id + profile_function.start_line = function.start_line + + +def main(): + parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') + parser.add_argument('--show', nargs=1, help='print existing profile.pprof') + parser.add_argument('--config', nargs=1, default='pprof_proto_generator.config', + help='Set config file, default is gen_pprof_proto.config.') + args = parser.parse_args(sys.argv[1:]) + if args.show: + profile = load_pprof_profile(args.show[0]) + printer = PprofProfilePrinter(profile) + printer.show() + return + config = load_config(args.config) + generator = PprofProfileGenerator(config) + profile = generator.gen() + store_pprof_profile(config['output_file'], profile) + + +if __name__ == '__main__': + main() |