#!/usr/bin/python # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # TODO(petkov): Integrate this utility into the build system in a more # consistent way -- e.g., create an ebuild that pulls the utility from a # mirrored upstream repo with a patch or upstream the patch. from __future__ import print_function import optparse import os import re import sys import json def format_bytes(bytes): """Pretty-print a number of bytes.""" if bytes > 1e6: bytes = bytes / 1.0e6 return '%.1fm' % bytes if bytes > 1e3: bytes = bytes / 1.0e3 return '%.1fk' % bytes return str(bytes) def symbol_type_to_human(type): """Convert a symbol type as printed by nm into a human-readable name.""" return { 'b': 'bss', 'd': 'data', 'r': 'read-only data', 't': 'code', 'w': 'weak symbol', 'v': 'weak symbol' }[type] def parse_du(input): """Parse du output. Argument: an iterable over lines of 'du -B 1' output.' Yields: (size, path) """ # Match lines with |size| |path| line_re = re.compile(r'^([0-9]+)\s+(.*)$') for line in input: line = line.rstrip() match = line_re.match(line) if match: size, path = match.groups()[0:2] size = int(size) yield size, path def parse_nm(input): """Parse nm output. Argument: an iterable over lines of nm output. Yields: (symbol name, symbol type, symbol size, source file path). Path may be None if nm couldn't figure out the source file. """ # Match lines with size + symbol + optional filename. sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$') # Match lines with addr but no size. addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$') # Match lines that don't have an address at all -- typically external symbols. noaddr_re = re.compile(r'^ + (.) (.*)$') for line in input: line = line.rstrip() match = sym_re.match(line) if match: size, type, sym = match.groups()[0:3] size = int(size, 16) type = type.lower() if type == 'v': type = 'w' # just call them all weak if type == 'b': continue # skip all BSS for now path = match.group(4) yield sym, type, size, path continue match = addr_re.match(line) if match: type, sym = match.groups()[0:2] # No size == we don't care. continue match = noaddr_re.match(line) if match: type, sym = match.groups() if type in ('U', 'w'): # external or weak symbol continue print('unparsed:', repr(line), file=sys.stderr) def treeify_du(dulines, strip_prefix=None): dirs = {} for size, path in dulines: if strip_prefix and path.startswith(strip_prefix): path = path[len(strip_prefix):] elif path.startswith('/'): path = path[1:] parts = path.split('/') key = parts.pop() tree = dirs for part in parts: if part not in tree: tree[part] = {} tree = tree[part] if key not in tree: tree[key] = size else: # du reports the total for each directory (which may include files # contained in the directory itself). tree[key][None] = size return dirs def filter_syms(types, symbols): for sym, type, size, path in symbols: if type in types: yield sym, type, size, path def treeify_syms(symbols, strip_prefix=None): dirs = {} for sym, type, size, path in symbols: if path: path = os.path.normpath(path) if strip_prefix and path.startswith(strip_prefix): path = path[len(strip_prefix):] elif path.startswith('/usr/include'): path = path.replace('/usr/include', 'usrinclude') elif path.startswith('/'): path = path[1:] parts = None # TODO: make segmenting by namespace work. if False and '::' in sym: if sym.startswith('vtable for '): sym = sym[len('vtable for '):] parts = sym.split('::') parts.append('[vtable]') else: parts = sym.split('::') parts[0] = '::' + parts[0] elif path and '/' in path: parts = path.split('/') if parts: key = parts.pop() tree = dirs try: for part in parts: assert part != '', path if part not in tree: tree[part] = {} tree = tree[part] tree[key] = tree.get(key, 0) + size except: print(sym, parts, key, file=sys.stderr) raise else: key = 'symbols without paths' if key not in dirs: dirs[key] = {} tree = dirs[key] subkey = 'misc' if (sym.endswith('::__FUNCTION__') or sym.endswith('::__PRETTY_FUNCTION__')): subkey = '__FUNCTION__' elif sym.startswith('CSWTCH.'): subkey = 'CSWTCH' elif '::' in sym: subkey = sym[0:sym.find('::') + 2] else: print('unbucketed (no path?):', sym, type, size, path, file=sys.stderr) tree[subkey] = tree.get(subkey, 0) + size return dirs def jsonify_tree(tree, name): children = [] total = 0 subtree_total = None for key, val in tree.iteritems(): if key is None: subtree_total = val continue if isinstance(val, dict): subtree = jsonify_tree(val, key) total += subtree['data']['$area'] children.append(subtree) else: total += val children.append({ 'name': key + ' ' + format_bytes(val), 'data': { '$area': val } }) # Process du sub-tree totals by creating a '.' child with appropriate area. if subtree_total: dot_total = subtree_total - total if dot_total > 0: children.append({'name': '. ' + format_bytes(dot_total), 'data': { '$area': dot_total }}) total = subtree_total children.sort(key=lambda child: -child['data']['$area']) return { 'name': name + ' ' + format_bytes(total), 'data': { '$area': total, }, 'children': children, } def dump_du(dufile, strip_prefix): dirs = treeify_du(parse_du(dufile), strip_prefix) print('var kTree =', json.dumps(jsonify_tree(dirs, '/'), indent=2)) def dump_nm(nmfile, strip_prefix): dirs = treeify_syms(parse_nm(nmfile), strip_prefix) print('var kTree =', json.dumps(jsonify_tree(dirs, '/'), indent=2)) def parse_objdump(input): """Parse objdump -h output.""" sec_re = re.compile(r'^\d+ (\S+) +([0-9a-z]+)') sections = [] debug_sections = [] for line in input: line = line.strip() match = sec_re.match(line) if match: name, size = match.groups() if name.startswith('.'): name = name[1:] if name.startswith('debug_'): name = name[len('debug_'):] debug_sections.append((name, int(size, 16))) else: sections.append((name, int(size, 16))) continue return sections, debug_sections def jsonify_sections(name, sections): children = [] total = 0 for section, size in sections: children.append({ 'name': section + ' ' + format_bytes(size), 'data': { '$area': size } }) total += size children.sort(key=lambda child: -child['data']['$area']) return { 'name': name + ' ' + format_bytes(total), 'data': { '$area': total }, 'children': children } def dump_sections(): sections, debug_sections = parse_objdump(open('objdump.out')) sections = jsonify_sections('sections', sections) debug_sections = jsonify_sections('debug', debug_sections) print('var kTree =', json.dumps({ 'name': 'top', 'data': { '$area': sections['data']['$area'] + debug_sections['data']['$area'] }, 'children': [ debug_sections, sections ]})) usage="""%prog [options] MODE Modes are: du: output 'du' json suitable for a treemap syms: output symbols json suitable for a treemap dump: print symbols sorted by size (pipe to head for best output) sections: output binary sections json suitable for a treemap du output passsed to --du-output should be from running a command like the following: du -B 1 /path/to/root > du.out nm output passed to --nm-output should from running a command like the following (note, can take a long time -- 30 minutes): nm -C -S -l /path/to/binary > nm.out objdump output passed to --objdump-output should be from a command like: objdump -h /path/to/binary > objdump.out""" parser = optparse.OptionParser(usage=usage) parser.add_option('--du-output', action='store', dest='dupath', metavar='PATH', default='du.out', help='path to nm output [default=nm.out]') parser.add_option('--nm-output', action='store', dest='nmpath', metavar='PATH', default='nm.out', help='path to nm output [default=nm.out]') parser.add_option('--objdump-output', action='store', dest='objdump', metavar='PATH', default='objdump.out', help='path to objdump output [default=objdump.out]') parser.add_option('--strip-prefix', metavar='PATH', action='store', help='strip PATH prefix from paths; e.g. /path/to/src/root') parser.add_option('--filter', action='store', help='include only symbols/files matching FILTER') opts, args = parser.parse_args() if len(args) != 1: parser.print_usage() sys.exit(1) mode = args[0] if mode == 'du': dufile = open(opts.dupath, 'r') dump_du(dufile, strip_prefix=opts.strip_prefix) elif mode == 'syms': nmfile = open(opts.nmpath, 'r') dump_nm(nmfile, strip_prefix=opts.strip_prefix) elif mode == 'sections': dump_sections() elif mode == 'dump': nmfile = open(opts.nmpath, 'r') syms = list(parse_nm(nmfile)) # a list of (sym, type, size, path); sort by size. syms.sort(key=lambda x: -x[2]) total = 0 for sym, type, size, path in syms: if type in ('b', 'w'): continue # skip bss and weak symbols if path is None: path = '' if opts.filter and not (opts.filter in sym or opts.filter in path): continue print('%6s %s (%s) %s' % (format_bytes(size), sym, symbol_type_to_human(type), path)) total += size print('%6s %s' % (format_bytes(total), 'total'), end='') else: print('unknown mode') parser.print_usage()