From 440dfbb95a145ad67d422b3ac87c30d131d07ef2 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Mon, 3 May 2021 16:47:30 -0700 Subject: simpleperf: create and use build_id_list in binary_cache. When looking for a binary in binary_cache, we use path / . This is not suitable for the kernel vmlinux, which doesn't have a path on device. To support vmlinux, this CL does below changes: 1. In binary_cache_builder.py, create build_id_list file, which supports finding a binary via its build id. 2. Add BinaryFinder class as a wrapper for finding binaries in binary_cache. It uses both build_id_list and path on device. 3. Use BinaryFinder in report scripts. Also add several related tests. Bug: 186566291 Test: run scripts/test/test.py. Change-Id: I74fd7dacdc4b42431797b7c2e293179e32ccb94b --- simpleperf/scripts/annotate.py | 15 ++- simpleperf/scripts/binary_cache_builder.py | 39 ++++-- simpleperf/scripts/pprof_proto_generator.py | 22 ++-- simpleperf/scripts/report_html.py | 87 +++++++------ simpleperf/scripts/simpleperf_utils.py | 139 ++++++++++++++------- .../scripts/test/binary_cache_builder_test.py | 41 ++++++ simpleperf/scripts/test/do_test.py | 1 + simpleperf/scripts/test/report_html_test.py | 88 ++++++++++++- .../runtest_two_functions_arm64_perf.data | Bin 0 -> 2950 bytes .../simpleperf_runtest_two_functions_arm64 | Bin 71800 -> 15944 bytes ..._runtest_two_functions_arm64_without_debug_info | Bin 0 -> 11472 bytes simpleperf/scripts/test/tools_test.py | 85 ++++++++----- 12 files changed, 378 insertions(+), 139 deletions(-) create mode 100644 simpleperf/scripts/test/script_testdata/runtest_two_functions_arm64_perf.data create mode 100755 simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64_without_debug_info (limited to 'simpleperf') diff --git a/simpleperf/scripts/annotate.py b/simpleperf/scripts/annotate.py index b05caa43..1970297f 100755 --- a/simpleperf/scripts/annotate.py +++ b/simpleperf/scripts/annotate.py @@ -25,8 +25,9 @@ import os.path import shutil from simpleperf_report_lib import ReportLib -from simpleperf_utils import (Addr2Nearestline, extant_dir, flatten_arg_list, is_windows, - log_exit, log_info, log_warning, SourceFileSearcher) +from simpleperf_utils import ( + Addr2Nearestline, BinaryFinder, extant_dir, flatten_arg_list, is_windows, log_exit, log_info, + log_warning, ReadElf, SourceFileSearcher) class SourceLine(object): @@ -53,10 +54,11 @@ class Addr2Line(object): """ def __init__(self, ndk_path, binary_cache_path, source_dirs): - self.addr2line = Addr2Nearestline(ndk_path, binary_cache_path, True) + binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path)) + self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True) self.source_searcher = SourceFileSearcher(source_dirs) - def add_addr(self, dso_path, func_addr, addr): + def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int): self.addr2line.add_addr(dso_path, func_addr, addr) def convert_addrs_to_lines(self): @@ -213,9 +215,10 @@ class SourceFileAnnotator(object): symbols.append(callchain.entries[i].symbol) for symbol in symbols: if self._filter_symbol(symbol): - self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, + build_id = lib.GetBuildIdForPath(symbol.dso_name) + self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, symbol.vaddr_in_file) - self.addr2line.add_addr(symbol.dso_name, symbol.symbol_addr, + self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, symbol.symbol_addr) def _filter_sample(self, sample): diff --git a/simpleperf/scripts/binary_cache_builder.py b/simpleperf/scripts/binary_cache_builder.py index 512fed98..362b8941 100755 --- a/simpleperf/scripts/binary_cache_builder.py +++ b/simpleperf/scripts/binary_cache_builder.py @@ -21,13 +21,16 @@ from __future__ import print_function import argparse +from dataclasses import dataclass import os import os.path +from pathlib import Path import shutil +from typing import List, Optional, Union from simpleperf_report_lib import ReportLib from simpleperf_utils import (AdbHelper, extant_dir, extant_file, flatten_arg_list, log_info, - log_warning, ReadElf, set_log_level) + log_warning, ReadElf, set_log_level, str_to_bytes) def is_jit_symfile(dso_name): @@ -37,7 +40,7 @@ def is_jit_symfile(dso_name): class BinaryCacheBuilder(object): """Collect all binaries needed by perf.data in binary_cache.""" - def __init__(self, ndk_path, disable_adb_root): + def __init__(self, ndk_path: Optional[str], disable_adb_root: bool): self.adb = AdbHelper(enable_switch_to_root=not disable_adb_root) self.readelf = ReadElf(ndk_path) self.binary_cache_dir = 'binary_cache' @@ -45,13 +48,15 @@ class BinaryCacheBuilder(object): os.makedirs(self.binary_cache_dir) self.binaries = {} - def build_binary_cache(self, perf_data_path, symfs_dirs): - self._collect_used_binaries(perf_data_path) + def build_binary_cache(self, perf_data_path: str, symfs_dirs: List[Union[Path, str]]): + self.collect_used_binaries(perf_data_path) self.copy_binaries_from_symfs_dirs(symfs_dirs) - self.pull_binaries_from_device() - self._pull_kernel_symbols() + if self.adb.is_device_available(): + self.pull_binaries_from_device() + self._pull_kernel_symbols() + self.create_build_id_list() - def _collect_used_binaries(self, perf_data_path): + def collect_used_binaries(self, perf_data_path): """read perf.data, collect all used binaries and their build id (if available).""" # A dict mapping from binary name to build_id binaries = {} @@ -73,10 +78,11 @@ class BinaryCacheBuilder(object): if dso_name not in binaries: if is_jit_symfile(dso_name): continue - binaries[dso_name] = lib.GetBuildIdForPath(dso_name) + name = 'vmlinux' if dso_name == '[kernel.kallsyms]' else dso_name + binaries[name] = lib.GetBuildIdForPath(dso_name) self.binaries = binaries - def copy_binaries_from_symfs_dirs(self, symfs_dirs): + def copy_binaries_from_symfs_dirs(self, symfs_dirs: List[Union[Path, str]]): """collect all files in symfs_dirs.""" if not symfs_dirs: return @@ -202,6 +208,21 @@ class BinaryCacheBuilder(object): self.adb.run(['shell', 'echo', '0', '>/proc/sys/kernel/kptr_restrict']) self.adb.run(['pull', '/proc/kallsyms', file_path]) + def create_build_id_list(self): + """ Create build_id_list. So report scripts can find a binary by its build_id instead of + path. + """ + build_id_list_path = os.path.join(self.binary_cache_dir, 'build_id_list') + with open(build_id_list_path, 'wb') as fh: + for root, _, files in os.walk(self.binary_cache_dir): + for filename in files: + path = os.path.join(root, filename) + relative_path = path[len(self.binary_cache_dir) + 1:] + build_id = self._read_build_id(path) + if build_id: + line = f'{build_id}={relative_path}\n' + fh.write(str_to_bytes(line)) + def main(): parser = argparse.ArgumentParser(description=""" diff --git a/simpleperf/scripts/pprof_proto_generator.py b/simpleperf/scripts/pprof_proto_generator.py index acac093b..c25cdae5 100755 --- a/simpleperf/scripts/pprof_proto_generator.py +++ b/simpleperf/scripts/pprof_proto_generator.py @@ -24,13 +24,12 @@ pprof -text pprof.profile """ -from __future__ import print_function import argparse import os import os.path from simpleperf_report_lib import ReportLib -from simpleperf_utils import (Addr2Nearestline, extant_dir, find_real_dso_path, find_tool_path, +from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir, find_tool_path, flatten_arg_list, log_info, log_exit, ReadElf) try: import profile_pb2 @@ -277,6 +276,7 @@ class PprofProfileGenerator(object): # Map from dso_name in perf.data to (binary path, build_id). self.binary_map = {} self.read_elf = ReadElf(self.config['ndk_path']) + self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf) def load_record_file(self, record_file): self.lib = ReportLib() @@ -433,17 +433,10 @@ class PprofProfileGenerator(object): # perf.data. build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name) # Try elf_path in binary cache. - elf_path = find_real_dso_path(dso_name, self.config['binary_cache_dir']) + elf_path = self.binary_finder.find_binary(dso_name, build_id_in_perf_data) if elf_path: - elf_build_id = self.read_elf.get_build_id(elf_path, False) - if build_id_in_perf_data: - match = build_id_in_perf_data == self.read_elf.pad_build_id(elf_build_id) - else: - # odex files generated by ART on Android O don't contain build id. - match = not elf_build_id - if match: - build_id = elf_build_id - binary_path = elf_path + build_id = elf_build_id + binary_path = str(elf_path) # When there is no matching elf_path, try converting build_id in perf.data. if not build_id and build_id_in_perf_data.startswith('0x'): @@ -492,8 +485,9 @@ class PprofProfileGenerator(object): log_info("Can't generate line information because can't find llvm-symbolizer.") return # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to - # pass binary_cache_dir to addr2line. - addr2line = Addr2Nearestline(self.config['ndk_path'], None, True) + # pass binary_cache_dir to BinaryFinder. + binary_finder = BinaryFinder(None, self.read_elf) + addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True) # 2. Put all needed addresses to it. for location in self.location_list: diff --git a/simpleperf/scripts/report_html.py b/simpleperf/scripts/report_html.py index 582aa0c5..c542d42a 100755 --- a/simpleperf/scripts/report_html.py +++ b/simpleperf/scripts/report_html.py @@ -17,15 +17,17 @@ import argparse import collections +from dataclasses import dataclass import datetime import json import os import sys -from typing import List, Optional +from typing import Callable, Dict, List, Optional from simpleperf_report_lib import ReportLib -from simpleperf_utils import (Addr2Nearestline, get_script_dir, log_exit, log_info, Objdump, - open_report_in_browser, SourceFileSearcher) +from simpleperf_utils import ( + Addr2Nearestline, BinaryFinder, get_script_dir, log_exit, log_info, Objdump, + open_report_in_browser, ReadElf, SourceFileSearcher) MAX_CALLSTACK_LENGTH = 750 @@ -386,23 +388,31 @@ class CallNode(object): cur_child.merge(child) +@dataclass +class LibInfo: + name: str + build_id: str + + class LibSet(object): """ Collection of shared libraries used in perf.data. """ def __init__(self): - self.lib_name_to_id = {} - self.lib_id_to_name = [] - - def get_lib_id(self, lib_name): - lib_id = self.lib_name_to_id.get(lib_name) - if lib_id is None: - lib_id = len(self.lib_id_to_name) - self.lib_name_to_id[lib_name] = lib_id - self.lib_id_to_name.append(lib_name) + self.lib_name_to_id: Dict[str, int] = {} + self.libs: List[LibInfo] = [] + + def get_lib_id(self, lib_name: str) -> Optional[int]: + return self.lib_name_to_id.get(lib_name) + + def add_lib(self, lib_name: str, build_id: str) -> int: + """ Return lib_id of the newly added lib. """ + lib_id = len(self.libs) + self.libs.append(LibInfo(lib_name, build_id)) + self.lib_name_to_id[lib_name] = lib_id return lib_id - def get_lib_name(self, lib_id): - return self.lib_id_to_name[lib_id] + def get_lib(self, lib_id: int) -> LibInfo: + return self.libs[lib_id] class Function(object): @@ -422,8 +432,8 @@ class FunctionSet(object): """ Collection of functions used in perf.data. """ def __init__(self): - self.name_to_func = {} - self.id_to_func = {} + self.name_to_func: Dict[Tuple[int, str], Function] = {} + self.id_to_func: Dict[int, Function] = {} def get_func_id(self, lib_id, symbol): key = (lib_id, symbol.symbol_name) @@ -586,6 +596,7 @@ class RecordData(object): self.total_samples = 0 self.source_files = SourceFileSet() self.gen_addr_hit_map_in_record_info = False + self.binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path)) def load_record_file(self, record_file, show_art_frames): lib = ReportLib() @@ -621,11 +632,16 @@ class RecordData(object): thread.sample_count += 1 lib_id = self.libs.get_lib_id(symbol.dso_name) + if lib_id is None: + lib_id = self.libs.add_lib(symbol.dso_name, lib.GetBuildIdForPath(symbol.dso_name)) func_id = self.functions.get_func_id(lib_id, symbol) callstack = [(lib_id, func_id, symbol.vaddr_in_file)] for i in range(callchain.nr): symbol = callchain.entries[i].symbol lib_id = self.libs.get_lib_id(symbol.dso_name) + if lib_id is None: + lib_id = self.libs.add_lib( + symbol.dso_name, lib.GetBuildIdForPath(symbol.dso_name)) func_id = self.functions.get_func_id(lib_id, symbol) callstack.append((lib_id, func_id, symbol.vaddr_in_file)) if len(callstack) > MAX_CALLSTACK_LENGTH: @@ -680,32 +696,33 @@ class RecordData(object): 2. Find line for each addr in FunctionScope.addr_hit_map. 3. Collect needed source code in SourceFileSet. """ - addr2line = Addr2Nearestline(self.ndk_path, self.binary_cache_path, False) + addr2line = Addr2Nearestline(self.ndk_path, self.binary_finder, False) # Request line range for each function. for function in self.functions.id_to_func.values(): if function.func_name == 'unknown': continue - lib_name = self.libs.get_lib_name(function.lib_id) - if filter_lib(lib_name): - addr2line.add_addr(lib_name, function.start_addr, function.start_addr) - addr2line.add_addr(lib_name, function.start_addr, + lib_info = self.libs.get_lib(function.lib_id) + if filter_lib(lib_info.name): + addr2line.add_addr(lib_info.name, lib_info.build_id, + function.start_addr, function.start_addr) + addr2line.add_addr(lib_info.name, lib_info.build_id, function.start_addr, function.start_addr + function.addr_len - 1) # Request line for each addr in FunctionScope.addr_hit_map. for event in self.events.values(): for lib in event.libraries: - lib_name = self.libs.get_lib_name(lib.lib_id) - if filter_lib(lib_name): + lib_info = self.libs.get_lib(lib.lib_id) + if filter_lib(lib_info.name): for function in lib.functions.values(): func_addr = self.functions.id_to_func[function.func_id].start_addr for addr in function.addr_hit_map: - addr2line.add_addr(lib_name, func_addr, addr) + addr2line.add_addr(lib_info.name, lib_info.build_id, func_addr, addr) addr2line.convert_addrs_to_lines() # Set line range for each function. for function in self.functions.id_to_func.values(): if function.func_name == 'unknown': continue - dso = addr2line.get_dso(self.libs.get_lib_name(function.lib_id)) + dso = addr2line.get_dso(self.libs.get_lib(function.lib_id).name) if not dso: continue start_source = addr2line.get_addr_source(dso, function.start_addr) @@ -723,7 +740,7 @@ class RecordData(object): # Build FunctionScope.line_hit_map. for event in self.events.values(): for lib in event.libraries: - dso = addr2line.get_dso(self.libs.get_lib_name(lib.lib_id)) + dso = addr2line.get_dso(self.libs.get_lib(lib.lib_id).name) if not dso: continue for function in lib.functions.values(): @@ -742,22 +759,22 @@ class RecordData(object): # Collect needed source code in SourceFileSet. self.source_files.load_source_code(source_dirs) - def add_disassembly(self, filter_lib): + def add_disassembly(self, filter_lib: Callable[[str], bool]): """ Collect disassembly information: 1. Use objdump to collect disassembly for each function in FunctionSet. 2. Set flag to dump addr_hit_map when generating record info. """ - objdump = Objdump(self.ndk_path, self.binary_cache_path) - cur_lib_name = None + objdump = Objdump(self.ndk_path, self.binary_finder) + cur_lib_name: Optional[str] = None dso_info = None for function in sorted(self.functions.id_to_func.values(), key=lambda a: a.lib_id): if function.func_name == 'unknown': continue - lib_name = self.libs.get_lib_name(function.lib_id) - if lib_name != cur_lib_name: - cur_lib_name = lib_name - if filter_lib(lib_name): - dso_info = objdump.get_dso_info(lib_name) + lib = self.libs.get_lib(function.lib_id) + if lib.name != cur_lib_name: + cur_lib_name = lib.name + if filter_lib(lib.name): + dso_info = objdump.get_dso_info(lib.name, lib.build_id) else: dso_info = None if dso_info: @@ -810,7 +827,7 @@ class RecordData(object): return thread_names def _gen_lib_list(self): - return [modify_text_for_html(x) for x in self.libs.lib_id_to_name] + return [modify_text_for_html(lib.name) for lib in self.libs.libs] def _gen_function_map(self): func_map = {} diff --git a/simpleperf/scripts/simpleperf_utils.py b/simpleperf/scripts/simpleperf_utils.py index 580db888..156d0e83 100644 --- a/simpleperf/scripts/simpleperf_utils.py +++ b/simpleperf/scripts/simpleperf_utils.py @@ -18,6 +18,7 @@ """utils.py: export utility functions. """ +from __future__ import annotations import argparse import logging import os @@ -28,6 +29,7 @@ import shutil import subprocess import sys import time +from typing import Dict, List, Optional, Union def get_script_dir(): @@ -315,6 +317,12 @@ class AdbHelper(object): self.enable_switch_to_root = enable_switch_to_root self.serial_number = None + def is_device_available(self) -> bool: + result, _ = self.run_and_return_output( + ['shell', 'whoami'], + log_output=False, log_stderr=False) + return result == True + def run(self, adb_args): return self.run_and_return_output(adb_args)[0] @@ -397,7 +405,7 @@ class AdbHelper(object): log_fatal('unsupported architecture: %s' % output.strip()) return '' - def get_android_version(self): + def get_android_version(self) -> int: """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" build_version = self.get_property('ro.build.version.release') android_version = 0 @@ -446,22 +454,53 @@ def open_report_in_browser(report_path): webbrowser.open_new_tab(report_path) -def is_elf_file(path): - if os.path.isfile(path): - with open(path, 'rb') as fh: - return fh.read(4) == b'\x7fELF' - return False - +class BinaryFinder: + def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf): + if isinstance(binary_cache_dir, str): + binary_cache_dir = Path(binary_cache_dir) + self.binary_cache_dir = binary_cache_dir + self.readelf = readelf + self.build_id_map = self._load_build_id_map() + + def _load_build_id_map(self) -> Dict[str, Path]: + build_id_map: Dict[str, Path] = {} + if self.binary_cache_dir: + build_id_list_file = self.binary_cache_dir / 'build_id_list' + if build_id_list_file.is_file(): + with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh: + for line in fh.readlines(): + # lines are in format "=". + items = bytes_to_str(line).strip().split('=') + if len(items) == 2: + build_id_map[items[0]] = self.binary_cache_dir / items[1] + return build_id_map + + def find_binary(self, dso_path_in_record_file: str, + expected_build_id: Optional[str]) -> Optional[Path]: + """ If expected_build_id is None, don't check build id. + Otherwise, the build id of the found binary should match the expected one.""" + # Find binary from build id map. + if expected_build_id: + path = self.build_id_map.get(expected_build_id) + if path and self._check_path(path, expected_build_id): + return path + # Find binary by path in binary cache. + if self.binary_cache_dir: + path = self.binary_cache_dir / dso_path_in_record_file[1:] + if self._check_path(path, expected_build_id): + return path + # Find binary by its absolute path. + path = Path(dso_path_in_record_file) + if self._check_path(path, expected_build_id): + return path + return None -def find_real_dso_path(dso_path_in_record_file, binary_cache_path): - """ Given the path of a shared library in perf.data, find its real path in the file system. """ - if binary_cache_path: - tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:]) - if is_elf_file(tmp_path): - return tmp_path - if is_elf_file(dso_path_in_record_file): - return dso_path_in_record_file - return None + def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool: + if not self.readelf.is_elf_file(path): + return False + if expected_build_id is not None: + return self.readelf.get_build_id(path) == expected_build_id + return True class Addr2Nearestline(object): @@ -496,7 +535,8 @@ class Addr2Nearestline(object): addrs: a map from address to Addr object in this dso. """ - def __init__(self): + def __init__(self, build_id: str): + self.build_id = build_id self.addrs = {} class Addr(object): @@ -510,13 +550,15 @@ class Addr2Nearestline(object): self.func_addr = func_addr self.source_lines = None - def __init__(self, ndk_path, binary_cache_path, with_function_name): + def __init__( + self, ndk_path: Optional[str], + binary_finder: BinaryFinder, with_function_name: bool): self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path) if not self.symbolizer_path: log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.") self.readelf = ReadElf(ndk_path) self.dso_map = {} # map from dso_path to Dso. - self.binary_cache_path = binary_cache_path + self.binary_finder = binary_finder self.with_function_name = with_function_name # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, # and provide data structures connecting file id and file name here. @@ -525,19 +567,19 @@ class Addr2Nearestline(object): self.func_name_to_id = {} self.func_id_to_name = [] - def add_addr(self, dso_path, func_addr, addr): + def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int): dso = self.dso_map.get(dso_path) if dso is None: - dso = self.dso_map[dso_path] = self.Dso() + dso = self.dso_map[dso_path] = self.Dso(build_id) if addr not in dso.addrs: dso.addrs[addr] = self.Addr(func_addr) def convert_addrs_to_lines(self): - for dso_path in self.dso_map: - self._convert_addrs_in_one_dso(dso_path, self.dso_map[dso_path]) + for dso_path, dso in self.dso_map.items(): + self._convert_addrs_in_one_dso(dso_path, dso) - def _convert_addrs_in_one_dso(self, dso_path, dso): - real_path = find_real_dso_path(dso_path, self.binary_cache_path) + def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso): + real_path = self.binary_finder.find_binary(dso_path, dso.build_id) if not real_path: if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: log_debug("Can't find dso %s" % dso_path) @@ -553,10 +595,10 @@ class Addr2Nearestline(object): self._collect_line_info(dso, real_path, range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) - def _check_debug_line_section(self, real_path): + def _check_debug_line_section(self, real_path: Path): return '.debug_line' in self.readelf.get_sections(real_path) - def _get_addr_step(self, real_path): + def _get_addr_step(self, real_path: Path): arch = self.readelf.get_arch(real_path) if arch == 'arm64': return 4 @@ -564,7 +606,8 @@ class Addr2Nearestline(object): return 2 return 1 - def _collect_line_info(self, dso, real_path, addr_shifts): + def _collect_line_info( + self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]): """ Use addr2line to get line info in a dso, with given addr shifts. """ # 1. Collect addrs to send to addr2line. addr_set = set() @@ -637,7 +680,7 @@ class Addr2Nearestline(object): if shifted_addr == addr_obj.func_addr: break - def _build_symbolizer_args(self, binary_path): + def _build_symbolizer_args(self, binary_path: Path) -> List[str]: args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path] if self.with_function_name: args += ['--functions=linkage', '--demangle'] @@ -753,20 +796,21 @@ class SourceFileSearcher(object): class Objdump(object): """ A wrapper of objdump to disassemble code. """ - def __init__(self, ndk_path, binary_cache_path): + def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder): self.ndk_path = ndk_path - self.binary_cache_path = binary_cache_path + self.binary_finder = binary_finder self.readelf = ReadElf(ndk_path) self.objdump_paths = {} - def get_dso_info(self, dso_path): - real_path = find_real_dso_path(dso_path, self.binary_cache_path) + def get_dso_info(self, dso_path: str, expected_build_id: Optional[str] + ) -> Optional[Tuple[str, str]]: + real_path = self.binary_finder.find_binary(dso_path, expected_build_id) if not real_path: return None arch = self.readelf.get_arch(real_path) if arch == 'unknown': return None - return (real_path, arch) + return (str(real_path), arch) def disassemble_code(self, dso_info, start_addr, addr_len): """ Disassemble [start_addr, start_addr + addr_len] of dso_path. @@ -821,11 +865,18 @@ class ReadElf(object): if not self.readelf_path: log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.") - def get_arch(self, elf_file_path): + @staticmethod + def is_elf_file(path: Union[Path, str]): + if os.path.isfile(path): + with open(path, 'rb') as fh: + return fh.read(4) == b'\x7fELF' + return False + + def get_arch(self, elf_file_path: Union[Path, str]): """ Get arch of an elf file. """ - if is_elf_file(elf_file_path): + if self.is_elf_file(elf_file_path): try: - output = subprocess.check_output([self.readelf_path, '-h', elf_file_path]) + output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)]) output = bytes_to_str(output) if output.find('AArch64') != -1: return 'arm64' @@ -839,11 +890,11 @@ class ReadElf(object): pass return 'unknown' - def get_build_id(self, elf_file_path, with_padding=True): + def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str: """ Get build id of an elf file. """ - if is_elf_file(elf_file_path): + if self.is_elf_file(elf_file_path): try: - output = subprocess.check_output([self.readelf_path, '-n', elf_file_path]) + output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)]) output = bytes_to_str(output) result = re.search(r'Build ID:\s*(\S+)', output) if result: @@ -864,12 +915,12 @@ class ReadElf(object): build_id = build_id[:40] return '0x' + build_id - def get_sections(self, elf_file_path): + def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]: """ Get sections of an elf file. """ - section_names = [] - if is_elf_file(elf_file_path): + section_names: List[str] = [] + if self.is_elf_file(elf_file_path): try: - output = subprocess.check_output([self.readelf_path, '-SW', elf_file_path]) + output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)]) output = bytes_to_str(output) for line in output.split('\n'): # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". diff --git a/simpleperf/scripts/test/binary_cache_builder_test.py b/simpleperf/scripts/test/binary_cache_builder_test.py index c17fbe49..5bbbf32a 100644 --- a/simpleperf/scripts/test/binary_cache_builder_test.py +++ b/simpleperf/scripts/test/binary_cache_builder_test.py @@ -16,7 +16,9 @@ import filecmp import os +from pathlib import Path import shutil +import tempfile from binary_cache_builder import BinaryCacheBuilder from simpleperf_utils import ReadElf, remove, find_tool_path @@ -65,3 +67,42 @@ class TestBinaryCacheBuilder(TestBase): self.assertTrue(filecmp.cmp(target_file, source_file)) binary_cache_builder.pull_binaries_from_device() self.assertTrue(filecmp.cmp(target_file, source_file)) + + def test_prefer_binary_with_debug_info(self): + binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False) + binary_cache_builder.collect_used_binaries( + TestHelper.testdata_path('runtest_two_functions_arm64_perf.data')) + + # Create a symfs_dir, which contains elf file with and without debug info. + with tempfile.TemporaryDirectory() as tmp_dir: + shutil.copy( + TestHelper.testdata_path( + 'simpleperf_runtest_two_functions_arm64_without_debug_info'), + Path(tmp_dir) / 'simpleperf_runtest_two_functions_arm64') + + debug_dir = Path(tmp_dir) / 'debug' + debug_dir.mkdir() + shutil.copy(TestHelper.testdata_path( + 'simpleperf_runtest_two_functions_arm64'), debug_dir) + # Check if the elf file with debug info is chosen. + binary_cache_builder.copy_binaries_from_symfs_dirs([tmp_dir]) + elf_path = (Path(binary_cache_builder.binary_cache_dir) / 'data' / + 'local' / 'tmp' / 'simpleperf_runtest_two_functions_arm64') + self.assertTrue(elf_path.is_file()) + self.assertIn('.debug_info', binary_cache_builder.readelf.get_sections(elf_path)) + + def test_create_build_id_list(self): + symfs_dir = TestHelper.testdata_dir + binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False) + binary_cache_builder.collect_used_binaries( + TestHelper.testdata_path('runtest_two_functions_arm64_perf.data')) + binary_cache_builder.copy_binaries_from_symfs_dirs([symfs_dir]) + elf_path = (Path(binary_cache_builder.binary_cache_dir) / 'data' / + 'local' / 'tmp' / 'simpleperf_runtest_two_functions_arm64') + self.assertTrue(elf_path.is_file()) + + binary_cache_builder.create_build_id_list() + build_id_list_path = Path(binary_cache_builder.binary_cache_dir) / 'build_id_list' + self.assertTrue(build_id_list_path.is_file()) + with open(build_id_list_path, 'r') as fh: + self.assertIn('simpleperf_runtest_two_functions_arm64', fh.read()) diff --git a/simpleperf/scripts/test/do_test.py b/simpleperf/scripts/test/do_test.py index 182f6446..b2886b65 100755 --- a/simpleperf/scripts/test/do_test.py +++ b/simpleperf/scripts/test/do_test.py @@ -135,6 +135,7 @@ def build_testdata(testdata_dir: Path): script_test_dir / 'testdata', script_dir.parent / 'testdata', script_dir.parent / 'demo', + script_dir.parent / 'runtest', ] for source_dir in source_dirs: diff --git a/simpleperf/scripts/test/report_html_test.py b/simpleperf/scripts/test/report_html_test.py index 0a9dadc1..c1f62bb6 100644 --- a/simpleperf/scripts/test/report_html_test.py +++ b/simpleperf/scripts/test/report_html_test.py @@ -18,6 +18,7 @@ import collections import json from typing import Any, Dict, List +from binary_cache_builder import BinaryCacheBuilder from . test_utils import TestBase, TestHelper @@ -81,7 +82,10 @@ class TestReportHtml(TestBase): self.assertIn(original_methodname, json.dumps(record_data)) def get_record_data(self, options: List[str]) -> Dict[str, Any]: - self.run_cmd(['report_html.py'] + options) + args = ['report_html.py'] + options + if TestHelper.ndk_path: + args += ['--ndk_path', TestHelper.ndk_path] + self.run_cmd(args) with open('report.html', 'r') as fh: data = fh.read() start_str = 'type="application/json"' @@ -95,3 +99,85 @@ class TestReportHtml(TestBase): self.assertNotEqual(end_pos, -1) json_data = data[start_pos:end_pos] return json.loads(json_data) + + def test_add_source_code(self): + """ Test --add_source_code option. """ + testdata_file = TestHelper.testdata_path('runtest_two_functions_arm64_perf.data') + + # Build binary_cache. + binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False) + binary_cache_builder.build_binary_cache(testdata_file, [TestHelper.testdata_dir]) + + # Generate report.html. + source_dir = TestHelper.testdata_dir + record_data = self.get_record_data( + ['-i', testdata_file, '--add_source_code', '--source_dirs', str(source_dir)]) + + # Check source code info in samples. + source_code_list = [] + thread = record_data['sampleInfo'][0]['processes'][0]['threads'][0] + for lib in thread['libs']: + for function in lib['functions']: + for source_code_info in function.get('s') or []: + source_file = record_data['sourceFiles'][source_code_info['f']] + file_path = source_file['path'] + line_number = source_code_info['l'] + line_content = source_file['code'][str(line_number)] + event_count = source_code_info['e'] + subtree_event_count = source_code_info['s'] + s = (f'{file_path}:{line_number}:{line_content}:' + + f'{event_count}:{subtree_event_count}') + source_code_list.append(s) + check_items = ['two_functions.cpp:9: *p = i;\n:590184:590184', + 'two_functions.cpp:16: *p = i;\n:591577:591577', + 'two_functions.cpp:22: Function1();\n:0:590184', + 'two_functions.cpp:23: Function2();\n:0:591577'] + for item in check_items: + found = False + for source_code in source_code_list: + if item in source_code: + found = True + break + self.assertTrue(found, item) + + def test_add_disassembly(self): + """ Test --add_disassembly option. """ + testdata_file = TestHelper.testdata_path('runtest_two_functions_arm64_perf.data') + + # Build binary_cache. + binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False) + binary_cache_builder.build_binary_cache(testdata_file, [TestHelper.testdata_dir]) + + # Generate report.html. + record_data = self.get_record_data(['-i', testdata_file, '--add_disassembly']) + + # Check disassembly in samples. + disassembly_list = [] + thread = record_data['sampleInfo'][0]['processes'][0]['threads'][0] + for lib in thread['libs']: + lib_name = record_data['libList'][lib['libId']] + for function in lib['functions']: + for addr_info in function.get('a') or []: + addr = addr_info['a'] + event_count = addr_info['e'] + subtree_event_count = addr_info['s'] + function_data = record_data['functionMap'][str(function['f'])] + function_name = function_data['f'] + for dis_line, dis_addr in function_data.get('d') or []: + if addr == dis_addr: + addr_str = '0x%x' % addr + s = (f'{lib_name}:{function_name}:{addr_str}:' + + f'{event_count}:{subtree_event_count}') + disassembly_list.append(s) + + check_items = ['simpleperf_runtest_two_functions_arm64:Function1():0x1094:590184:590184', + 'simpleperf_runtest_two_functions_arm64:Function2():0x1104:591577:591577', + 'simpleperf_runtest_two_functions_arm64:main:0x113c:0:590184', + 'simpleperf_runtest_two_functions_arm64:main:0x1140:0:591577'] + for item in check_items: + found = False + for disassembly in disassembly_list: + if item in disassembly: + found = True + break + self.assertTrue(found, item) diff --git a/simpleperf/scripts/test/script_testdata/runtest_two_functions_arm64_perf.data b/simpleperf/scripts/test/script_testdata/runtest_two_functions_arm64_perf.data new file mode 100644 index 00000000..e8c2c4a5 Binary files /dev/null and b/simpleperf/scripts/test/script_testdata/runtest_two_functions_arm64_perf.data differ diff --git a/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64 b/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64 index 49d76130..b8c47208 100755 Binary files a/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64 and b/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64 differ diff --git a/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64_without_debug_info b/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64_without_debug_info new file mode 100755 index 00000000..7b0e1b02 Binary files /dev/null and b/simpleperf/scripts/test/script_testdata/simpleperf_runtest_two_functions_arm64_without_debug_info differ diff --git a/simpleperf/scripts/test/tools_test.py b/simpleperf/scripts/test/tools_test.py index c70c6dc4..28b80b93 100644 --- a/simpleperf/scripts/test/tools_test.py +++ b/simpleperf/scripts/test/tools_test.py @@ -15,8 +15,10 @@ # limitations under the License. import os +from pathlib import Path -from simpleperf_utils import (is_elf_file, Addr2Nearestline, Objdump, ReadElf, +from binary_cache_builder import BinaryCacheBuilder +from simpleperf_utils import (Addr2Nearestline, BinaryFinder, Objdump, ReadElf, SourceFileSearcher, is_windows, remove) from . test_utils import TestBase, TestHelper @@ -27,22 +29,19 @@ class TestTools(TestBase): self.run_addr2nearestline_test(False) def run_addr2nearestline_test(self, with_function_name): - binary_cache_path = TestHelper.testdata_dir test_map = { '/simpleperf_runtest_two_functions_arm64': [ { - 'func_addr': 0x668, - 'addr': 0x668, + 'func_addr': 0x112c, + 'addr': 0x112c, 'source': 'system/extras/simpleperf/runtest/two_functions.cpp:20', 'function': 'main', }, { - 'func_addr': 0x668, - 'addr': 0x6a4, - 'source': """system/extras/simpleperf/runtest/two_functions.cpp:7 - system/extras/simpleperf/runtest/two_functions.cpp:22""", - 'function': """Function1() - main""", + 'func_addr': 0x104c, + 'addr': 0x105c, + 'source': "system/extras/simpleperf/runtest/two_functions.cpp:7", + 'function': "Function1()", }, ], '/simpleperf_runtest_two_functions_arm': [ @@ -96,11 +95,12 @@ class TestTools(TestBase): } ], } - addr2line = Addr2Nearestline(TestHelper.ndk_path, binary_cache_path, with_function_name) + binary_finder = BinaryFinder(TestHelper.testdata_dir, ReadElf(TestHelper.ndk_path)) + addr2line = Addr2Nearestline(TestHelper.ndk_path, binary_finder, with_function_name) for dso_path in test_map: test_addrs = test_map[dso_path] for test_addr in test_addrs: - addr2line.add_addr(dso_path, test_addr['func_addr'], test_addr['addr']) + addr2line.add_addr(dso_path, None, test_addr['func_addr'], test_addr['addr']) addr2line.convert_addrs_to_lines() for dso_path in test_map: dso = addr2line.get_dso(dso_path) @@ -137,15 +137,14 @@ class TestTools(TestBase): (dso_path, test_addr['addr'], expected_source, actual_source)) def test_objdump(self): - binary_cache_path = TestHelper.testdata_dir test_map = { '/simpleperf_runtest_two_functions_arm64': { - 'start_addr': 0x668, - 'len': 116, + 'start_addr': 0x112c, + 'len': 28, 'expected_items': [ ('main():', 0), ('system/extras/simpleperf/runtest/two_functions.cpp:20', 0), - ('694: add x20, x20, #1758', 0x694), + ('1134: add x29, sp, #16', 0x1134), ], }, '/simpleperf_runtest_two_functions_arm': { @@ -176,10 +175,11 @@ class TestTools(TestBase): ], }, } - objdump = Objdump(TestHelper.ndk_path, binary_cache_path) + binary_finder = BinaryFinder(TestHelper.testdata_dir, ReadElf(TestHelper.ndk_path)) + objdump = Objdump(TestHelper.ndk_path, binary_finder) for dso_path in test_map: dso = test_map[dso_path] - dso_info = objdump.get_dso_info(dso_path) + dso_info = objdump.get_dso_info(dso_path, None) self.assertIsNotNone(dso_info, dso_path) disassemble_code = objdump.disassemble_code(dso_info, dso['start_addr'], dso['len']) self.assertTrue(disassemble_code, dso_path) @@ -202,15 +202,9 @@ class TestTools(TestBase): test_map = { 'simpleperf_runtest_two_functions_arm64': { 'arch': 'arm64', - 'build_id': '0xe8ecb3916d989dbdc068345c30f0c24300000000', - 'sections': ['.interp', '.note.android.ident', '.note.gnu.build-id', '.dynsym', - '.dynstr', '.gnu.hash', '.gnu.version', '.gnu.version_r', '.rela.dyn', - '.rela.plt', '.plt', '.text', '.rodata', '.eh_frame', '.eh_frame_hdr', - '.preinit_array', '.init_array', '.fini_array', '.dynamic', '.got', - '.got.plt', '.data', '.bss', '.comment', '.debug_str', '.debug_loc', - '.debug_abbrev', '.debug_info', '.debug_ranges', '.debug_macinfo', - '.debug_pubnames', '.debug_pubtypes', '.debug_line', - '.note.gnu.gold-version', '.symtab', '.strtab', '.shstrtab'], + 'build_id': '0xb4f1b49b0fe9e34e78fb14e5374c930c00000000', + 'sections': ['.note.gnu.build-id', '.dynsym', '.text', '.rodata', '.eh_frame', + '.eh_frame_hdr', '.debug_info', '.debug_line', '.symtab'], }, 'simpleperf_runtest_two_functions_arm': { 'arch': 'arm', @@ -231,7 +225,9 @@ class TestTools(TestBase): if 'build_id' in dso_info: self.assertEqual(dso_info['build_id'], readelf.get_build_id(path), dso_path) if 'sections' in dso_info: - self.assertEqual(dso_info['sections'], readelf.get_sections(path), dso_path) + sections = readelf.get_sections(path) + for section in dso_info['sections']: + self.assertIn(section, sections) self.assertEqual(readelf.get_arch('not_exist_file'), 'unknown') self.assertEqual(readelf.get_build_id('not_exist_file'), '') self.assertEqual(readelf.get_sections('not_exist_file'), []) @@ -263,11 +259,40 @@ class TestTools(TestBase): searcher.get_real_path('MainActivity.kt')) def test_is_elf_file(self): - self.assertTrue(is_elf_file(TestHelper.testdata_path( + self.assertTrue(ReadElf.is_elf_file(TestHelper.testdata_path( 'simpleperf_runtest_two_functions_arm'))) with open('not_elf', 'wb') as fh: fh.write(b'\x90123') try: - self.assertFalse(is_elf_file('not_elf')) + self.assertFalse(ReadElf.is_elf_file('not_elf')) finally: remove('not_elf') + + def test_binary_finder(self): + # Create binary_cache. + binary_cache_builder = BinaryCacheBuilder(TestHelper.ndk_path, False) + elf_name = 'simpleperf_runtest_two_functions_arm' + elf_path = TestHelper.testdata_path(elf_name) + readelf = ReadElf(TestHelper.ndk_path) + build_id = readelf.get_build_id(elf_path) + self.assertGreater(len(build_id), 0) + binary_cache_builder.binaries[elf_name] = build_id + binary_cache_builder.copy_binaries_from_symfs_dirs([TestHelper.testdata_dir]) + binary_cache_builder.create_build_id_list() + + # Test BinaryFinder. + path_in_binary_cache = Path(binary_cache_builder.binary_cache_dir, elf_name) + binary_finder = BinaryFinder(binary_cache_builder.binary_cache_dir, readelf) + # Find binary using build id. + path = binary_finder.find_binary('[not_exist_file]', build_id) + self.assertEqual(path, path_in_binary_cache) + # Find binary using path. + path = binary_finder.find_binary('/' + elf_name, None) + self.assertEqual(path, path_in_binary_cache) + # Find binary using absolute path. + path = binary_finder.find_binary(str(path_in_binary_cache), None) + self.assertEqual(path, path_in_binary_cache) + + # The binary should has a matched build id. + path = binary_finder.find_binary('/' + elf_name, 'wrong_build_id') + self.assertIsNone(path) -- cgit v1.2.3