diff options
Diffstat (limited to 'binary_search_tool/bisect_driver.py')
-rw-r--r-- | binary_search_tool/bisect_driver.py | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/binary_search_tool/bisect_driver.py b/binary_search_tool/bisect_driver.py new file mode 100644 index 00000000..0b3fb1d4 --- /dev/null +++ b/binary_search_tool/bisect_driver.py @@ -0,0 +1,334 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# This script is used to help the compiler wrapper in the Android build system +# bisect for bad object files. +"""Utilities for bisection of Android object files. + +This module contains a set of utilities to allow bisection between +two sets (good and bad) of object files. Mostly used to find compiler +bugs. + +Reference page: +https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper + +Design doc: +https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM +""" + +from __future__ import print_function + +import contextlib +import fcntl +import os +import shutil +import subprocess +import sys + +VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE'] +GOOD_CACHE = 'good' +BAD_CACHE = 'bad' +LIST_FILE = os.path.join(GOOD_CACHE, '_LIST') + +CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1' +WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1' + + +class Error(Exception): + """The general compiler wrapper error class.""" + pass + + +@contextlib.contextmanager +def lock_file(path, mode): + """Lock file and block if other process has lock on file. + + Acquire exclusive lock for file. Only blocks other processes if they attempt + to also acquire lock through this method. If only reading (modes 'r' and 'rb') + then the lock is shared (i.e. many reads can happen concurrently, but only one + process may write at a time). + + This function is a contextmanager, meaning it's meant to be used with the + "with" statement in Python. This is so cleanup and setup happens automatically + and cleanly. Execution of the outer "with" statement happens at the "yield" + statement. Execution resumes after the yield when the outer "with" statement + ends. + + Args: + path: path to file being locked + mode: mode to open file with ('w', 'r', etc.) + """ + with open(path, mode) as f: + # Share the lock if just reading, make lock exclusive if writing + if f.mode == 'r' or f.mode == 'rb': + lock_type = fcntl.LOCK_SH + else: + lock_type = fcntl.LOCK_EX + + try: + fcntl.lockf(f, lock_type) + yield f + f.flush() + except: + raise + finally: + fcntl.lockf(f, fcntl.LOCK_UN) + + +def log_to_file(path, execargs, link_from=None, link_to=None): + """Common logging function. + + Log current working directory, current execargs, and a from-to relationship + between files. + """ + with lock_file(path, 'a') as log: + log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs))) + if link_from and link_to: + log.write('%s -> %s\n' % (link_from, link_to)) + + +def exec_and_return(execargs): + """Execute process and return. + + Execute according to execargs and return immediately. Don't inspect + stderr or stdout. + """ + return subprocess.call(execargs) + + +def which_cache(obj_file): + """Determine which cache an object belongs to. + + The binary search tool creates two files for each search iteration listing + the full set of bad objects and full set of good objects. We use this to + determine where an object file should be linked from (good or bad). + """ + bad_set_file = os.environ.get('BISECT_BAD_SET') + ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file]) + if ret == 0: + return BAD_CACHE + else: + return GOOD_CACHE + + +def makedirs(path): + """Try to create directories in path.""" + try: + os.makedirs(path) + except os.error: + if not os.path.isdir(path): + raise + + +def get_obj_path(execargs): + """Get the object path for the object file in the list of arguments. + + Returns: + Absolute object path from execution args (-o argument). If no object being + outputted or output doesn't end in ".o" then return empty string. + """ + try: + i = execargs.index('-o') + except ValueError: + return '' + + obj_path = execargs[i + 1] + if not obj_path.endswith(('.o',)): + # TODO: what suffixes do we need to contemplate + # TODO: add this as a warning + # TODO: need to handle -r compilations + return '' + + return os.path.abspath(obj_path) + + +def get_dep_path(execargs): + """Get the dep file path for the dep file in the list of arguments. + + Returns: + Absolute path of dependency file path from execution args (-o argument). If + no dependency being outputted then return empty string. + """ + if '-MD' not in execargs and '-MMD' not in execargs: + return '' + + # If -MF given this is the path of the dependency file. Otherwise the + # dependency file is the value of -o but with a .d extension + if '-MF' in execargs: + i = execargs.index('-MF') + dep_path = execargs[i + 1] + return os.path.abspath(dep_path) + + full_obj_path = get_obj_path(execargs) + if not full_obj_path: + return '' + + return full_obj_path[:-2] + '.d' + + +def get_dwo_path(execargs): + """Get the dwo file path for the dwo file in the list of arguments. + + Returns: + Absolute dwo file path from execution args (-gsplit-dwarf argument) If no + dwo file being outputted then return empty string. + """ + if '-gsplit-dwarf' not in execargs: + return '' + + full_obj_path = get_obj_path(execargs) + if not full_obj_path: + return '' + + return full_obj_path[:-2] + '.dwo' + + +def in_object_list(obj_name, list_filename): + """Check if object file name exist in file with object list.""" + if not obj_name: + return False + + with lock_file(list_filename, 'r') as list_file: + for line in list_file: + if line.strip() == obj_name: + return True + + return False + + +def get_side_effects(execargs): + """Determine side effects generated by compiler + + Returns: + List of paths of objects that the compiler generates as side effects. + """ + side_effects = [] + + # Cache dependency files + full_dep_path = get_dep_path(execargs) + if full_dep_path: + side_effects.append(full_dep_path) + + # Cache dwo files + full_dwo_path = get_dwo_path(execargs) + if full_dwo_path: + side_effects.append(full_dwo_path) + + return side_effects + + +def cache_file(execargs, bisect_dir, cache, abs_file_path): + """Cache compiler output file (.o/.d/.dwo).""" + # os.path.join fails with absolute paths, use + instead + bisect_path = os.path.join(bisect_dir, cache) + abs_file_path + bisect_path_dir = os.path.dirname(bisect_path) + makedirs(bisect_path_dir) + pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG') + log_to_file(pop_log, execargs, abs_file_path, bisect_path) + + try: + if os.path.exists(abs_file_path): + shutil.copy2(abs_file_path, bisect_path) + except Exception: + print('Could not cache file %s' % abs_file_path, file=sys.stderr) + raise + + +def restore_file(bisect_dir, cache, abs_file_path): + """Restore file from cache (.o/.d/.dwo).""" + # os.path.join fails with absolute paths, use + instead + cached_path = os.path.join(bisect_dir, cache) + abs_file_path + if os.path.exists(cached_path): + if os.path.exists(abs_file_path): + os.remove(abs_file_path) + os.link(cached_path, abs_file_path) + else: + raise Error(('%s is missing from %s cache! Unsure how to proceed. Make ' + 'will now crash.' % (cache, cached_path))) + + +def bisect_populate(execargs, bisect_dir, population_name): + """Add necessary information to the bisect cache for the given execution. + + Extract the necessary information for bisection from the compiler + execution arguments and put it into the bisection cache. This + includes copying the created object file, adding the object + file path to the cache list and keeping a log of the execution. + + Args: + execargs: compiler execution arguments. + bisect_dir: bisection directory. + population_name: name of the cache being populated (good/bad). + """ + retval = exec_and_return(execargs) + if retval: + return retval + + full_obj_path = get_obj_path(execargs) + # If not a normal compiler call then just exit + if not full_obj_path: + return + + cache_file(execargs, bisect_dir, population_name, full_obj_path) + + population_dir = os.path.join(bisect_dir, population_name) + with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list: + object_list.write('%s\n' % full_obj_path) + + for side_effect in get_side_effects(execargs): + cache_file(execargs, bisect_dir, population_name, side_effect) + + +def bisect_triage(execargs, bisect_dir): + full_obj_path = get_obj_path(execargs) + obj_list = os.path.join(bisect_dir, LIST_FILE) + + # If the output isn't an object file just call compiler + if not full_obj_path: + return exec_and_return(execargs) + + # If this isn't a bisected object just call compiler + # This shouldn't happen! + if not in_object_list(full_obj_path, obj_list): + if CONTINUE_ON_MISSING: + log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG') + log_to_file(log_file, execargs, '? compiler', full_obj_path) + return exec_and_return(execargs) + else: + raise Error(('%s is missing from cache! To ignore export ' + 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more ' + 'details on this option.' % full_obj_path)) + + cache = which_cache(full_obj_path) + + # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the + # result from the good/bad cache. This option is safe and covers all compiler + # side effects, but is very slow! + if WRAPPER_SAFE_MODE: + retval = exec_and_return(execargs) + if retval: + return retval + os.remove(full_obj_path) + restore_file(bisect_dir, cache, full_obj_path) + return + + # Generate compiler side effects. Trick Make into thinking compiler was + # actually executed. + for side_effect in get_side_effects(execargs): + restore_file(bisect_dir, cache, side_effect) + + # If generated object file happened to be pruned/cleaned by Make then link it + # over from cache again. + if not os.path.exists(full_obj_path): + restore_file(bisect_dir, cache, full_obj_path) + + +def bisect_driver(bisect_stage, bisect_dir, execargs): + """Call appropriate bisection stage according to value in bisect_stage.""" + if bisect_stage == 'POPULATE_GOOD': + bisect_populate(execargs, bisect_dir, GOOD_CACHE) + elif bisect_stage == 'POPULATE_BAD': + bisect_populate(execargs, bisect_dir, BAD_CACHE) + elif bisect_stage == 'TRIAGE': + bisect_triage(execargs, bisect_dir) + else: + raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage) |