summaryrefslogtreecommitdiff
path: root/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'utils.py')
-rw-r--r--utils.py85
1 files changed, 52 insertions, 33 deletions
diff --git a/utils.py b/utils.py
index 9891e60..e0d40b2 100644
--- a/utils.py
+++ b/utils.py
@@ -148,15 +148,16 @@ EXPECTED_TOOLS = {
'adb': {
'is_binutils': False,
'test_option': 'version',
- 'path_in_ndk': '../platform-tools/adb',
+ 'path_in_ndk': lambda _: '../platform-tools/adb',
},
'readelf': {
'is_binutils': True,
'accept_tool_without_arch': True,
},
- 'addr2line': {
- 'is_binutils': True,
- 'accept_tool_without_arch': True
+ 'llvm-symbolizer': {
+ 'is_binutils': False,
+ 'path_in_ndk':
+ lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
},
'objdump': {
'is_binutils': True,
@@ -196,7 +197,7 @@ def find_tool_path(toolname, ndk_path=None, arch=None):
toolname_with_arch, path_in_ndk = _get_binutils_path_in_ndk(toolname, arch, platform)
else:
toolname_with_arch = toolname
- path_in_ndk = tool_info['path_in_ndk']
+ path_in_ndk = tool_info['path_in_ndk'](platform)
path_in_ndk = path_in_ndk.replace('/', os.sep)
# 1. Find tool in the given ndk path.
@@ -379,8 +380,6 @@ def is_elf_file(path):
def find_real_dso_path(dso_path_in_record_file, binary_cache_path):
""" Given the path of a shared library in perf.data, find its real path in the file system. """
- if dso_path_in_record_file[0] != '/' or dso_path_in_record_file == '//anon':
- return None
if binary_cache_path:
tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:])
if is_elf_file(tmp_path):
@@ -391,7 +390,7 @@ def find_real_dso_path(dso_path_in_record_file, binary_cache_path):
class Addr2Nearestline(object):
- """ Use addr2line to convert (dso_path, func_addr, addr) to (source_file, line) pairs.
+ """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
For instructions generated by C++ compilers without a matching statement in source code
(like stack corruption check, switch optimization, etc.), addr2line can't generate
line information. However, we want to assign the instruction to the nearest line before
@@ -435,9 +434,9 @@ class Addr2Nearestline(object):
self.source_lines = None
def __init__(self, ndk_path, binary_cache_path, with_function_name):
- self.addr2line_path = find_tool_path('addr2line', ndk_path)
- if not self.addr2line_path:
- log_exit("Can't find addr2line. Please set ndk path with --ndk_path option.")
+ self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path)
+ if not self.symbolizer_path:
+ log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
self.readelf = ReadElf(ndk_path)
self.dso_map = {} # map from dso_path to Dso.
self.binary_cache_path = binary_cache_path
@@ -504,12 +503,11 @@ class Addr2Nearestline(object):
break
if not addr_set:
return
- addr_request = '\n'.join(['%x' % addr for addr in sorted(addr_set)])
+ addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
# 2. Use addr2line to collect line info.
try:
- option = '-ai' + ('fC' if self.with_function_name else '')
- subproc = subprocess.Popen([self.addr2line_path, option, '-e', real_path],
+ subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
(stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
stdoutdata = bytes_to_str(stdoutdata)
@@ -520,6 +518,9 @@ class Addr2Nearestline(object):
need_function_name = self.with_function_name
cur_function_name = None
for line in stdoutdata.strip().split('\n'):
+ line = line.strip()
+ if not line:
+ continue
if line[:2] == '0x':
# a new address
cur_line_list = addr_map[int(line, 16)] = []
@@ -528,27 +529,16 @@ class Addr2Nearestline(object):
need_function_name = False
else:
need_function_name = self.with_function_name
- # a file:line.
if cur_line_list is None:
continue
- # Handle lines like "C:\Users\...\file:32".
- items = line.rsplit(':', 1)
- if len(items) != 2:
- continue
- if '?' in line:
- # if ? in line, it doesn't have a valid line info.
+ file_path, line_number = self._parse_source_location(line)
+ if not file_path or not line_number:
# An addr can have a list of (file, line), when the addr belongs to an inlined
# function. Sometimes only part of the list has ? mark. In this case, we think
# the line info is valid if the first line doesn't have ? mark.
if not cur_line_list:
cur_line_list = None
continue
- (file_path, line_number) = items
- line_number = line_number.split()[0] # Remove comments after line number
- try:
- line_number = int(line_number)
- except ValueError:
- continue
file_id = self._get_file_id(file_path)
if self.with_function_name:
func_id = self._get_func_id(cur_function_name)
@@ -570,6 +560,29 @@ class Addr2Nearestline(object):
if shifted_addr == addr_obj.func_addr:
break
+ def _build_symbolizer_args(self, binary_path):
+ args = [self.symbolizer_path, '-print-address', '-inlining', '-obj=%s' % binary_path]
+ if self.with_function_name:
+ args += ['-functions=linkage', '-demangle']
+ else:
+ args.append('-functions=none')
+ return args
+
+ def _parse_source_location(self, line):
+ file_path, line_number = None, None
+ # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
+ # Filename may contain ':' like "C:\Users\...\file".
+ items = line.rsplit(':', 2)
+ if len(items) == 3:
+ file_path, line_number = items[:2]
+ if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
+ return None, None
+ try:
+ line_number = int(line_number)
+ except ValueError:
+ return None, None
+ return file_path, line_number
+
def _get_file_id(self, file_path):
file_id = self.file_name_to_id.get(file_path)
if file_id is None:
@@ -740,7 +753,7 @@ class ReadElf(object):
pass
return 'unknown'
- def get_build_id(self, elf_file_path):
+ def get_build_id(self, elf_file_path, with_padding=True):
""" Get build id of an elf file. """
if is_elf_file(elf_file_path):
try:
@@ -749,16 +762,22 @@ class ReadElf(object):
result = re.search(r'Build ID:\s*(\S+)', output)
if result:
build_id = result.group(1)
- if len(build_id) < 40:
- build_id += '0' * (40 - len(build_id))
- else:
- build_id = build_id[:40]
- build_id = '0x' + build_id
+ if with_padding:
+ build_id = self.pad_build_id(build_id)
return build_id
except subprocess.CalledProcessError:
pass
return ""
+ @staticmethod
+ def pad_build_id(build_id):
+ """ Pad build id to 40 hex numbers (20 bytes). """
+ if len(build_id) < 40:
+ build_id += '0' * (40 - len(build_id))
+ else:
+ build_id = build_id[:40]
+ return '0x' + build_id
+
def get_sections(self, elf_file_path):
""" Get sections of an elf file. """
section_names = []