aboutsummaryrefslogtreecommitdiff
path: root/binary_search_tool/bisect_driver.py
diff options
context:
space:
mode:
Diffstat (limited to 'binary_search_tool/bisect_driver.py')
-rw-r--r--binary_search_tool/bisect_driver.py334
1 files changed, 334 insertions, 0 deletions
diff --git a/binary_search_tool/bisect_driver.py b/binary_search_tool/bisect_driver.py
new file mode 100644
index 00000000..0b3fb1d4
--- /dev/null
+++ b/binary_search_tool/bisect_driver.py
@@ -0,0 +1,334 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# This script is used to help the compiler wrapper in the Android build system
+# bisect for bad object files.
+"""Utilities for bisection of Android object files.
+
+This module contains a set of utilities to allow bisection between
+two sets (good and bad) of object files. Mostly used to find compiler
+bugs.
+
+Reference page:
+https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
+
+Design doc:
+https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
+"""
+
+from __future__ import print_function
+
+import contextlib
+import fcntl
+import os
+import shutil
+import subprocess
+import sys
+
+VALID_MODES = ['POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE']
+GOOD_CACHE = 'good'
+BAD_CACHE = 'bad'
+LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
+
+CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
+WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
+
+
+class Error(Exception):
+ """The general compiler wrapper error class."""
+ pass
+
+
+@contextlib.contextmanager
+def lock_file(path, mode):
+ """Lock file and block if other process has lock on file.
+
+ Acquire exclusive lock for file. Only blocks other processes if they attempt
+ to also acquire lock through this method. If only reading (modes 'r' and 'rb')
+ then the lock is shared (i.e. many reads can happen concurrently, but only one
+ process may write at a time).
+
+ This function is a contextmanager, meaning it's meant to be used with the
+ "with" statement in Python. This is so cleanup and setup happens automatically
+ and cleanly. Execution of the outer "with" statement happens at the "yield"
+ statement. Execution resumes after the yield when the outer "with" statement
+ ends.
+
+ Args:
+ path: path to file being locked
+ mode: mode to open file with ('w', 'r', etc.)
+ """
+ with open(path, mode) as f:
+ # Share the lock if just reading, make lock exclusive if writing
+ if f.mode == 'r' or f.mode == 'rb':
+ lock_type = fcntl.LOCK_SH
+ else:
+ lock_type = fcntl.LOCK_EX
+
+ try:
+ fcntl.lockf(f, lock_type)
+ yield f
+ f.flush()
+ except:
+ raise
+ finally:
+ fcntl.lockf(f, fcntl.LOCK_UN)
+
+
+def log_to_file(path, execargs, link_from=None, link_to=None):
+ """Common logging function.
+
+ Log current working directory, current execargs, and a from-to relationship
+ between files.
+ """
+ with lock_file(path, 'a') as log:
+ log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
+ if link_from and link_to:
+ log.write('%s -> %s\n' % (link_from, link_to))
+
+
+def exec_and_return(execargs):
+ """Execute process and return.
+
+ Execute according to execargs and return immediately. Don't inspect
+ stderr or stdout.
+ """
+ return subprocess.call(execargs)
+
+
+def which_cache(obj_file):
+ """Determine which cache an object belongs to.
+
+ The binary search tool creates two files for each search iteration listing
+ the full set of bad objects and full set of good objects. We use this to
+ determine where an object file should be linked from (good or bad).
+ """
+ bad_set_file = os.environ.get('BISECT_BAD_SET')
+ ret = subprocess.call(['grep', '-x', '-q', obj_file, bad_set_file])
+ if ret == 0:
+ return BAD_CACHE
+ else:
+ return GOOD_CACHE
+
+
+def makedirs(path):
+ """Try to create directories in path."""
+ try:
+ os.makedirs(path)
+ except os.error:
+ if not os.path.isdir(path):
+ raise
+
+
+def get_obj_path(execargs):
+ """Get the object path for the object file in the list of arguments.
+
+ Returns:
+ Absolute object path from execution args (-o argument). If no object being
+ outputted or output doesn't end in ".o" then return empty string.
+ """
+ try:
+ i = execargs.index('-o')
+ except ValueError:
+ return ''
+
+ obj_path = execargs[i + 1]
+ if not obj_path.endswith(('.o',)):
+ # TODO: what suffixes do we need to contemplate
+ # TODO: add this as a warning
+ # TODO: need to handle -r compilations
+ return ''
+
+ return os.path.abspath(obj_path)
+
+
+def get_dep_path(execargs):
+ """Get the dep file path for the dep file in the list of arguments.
+
+ Returns:
+ Absolute path of dependency file path from execution args (-o argument). If
+ no dependency being outputted then return empty string.
+ """
+ if '-MD' not in execargs and '-MMD' not in execargs:
+ return ''
+
+ # If -MF given this is the path of the dependency file. Otherwise the
+ # dependency file is the value of -o but with a .d extension
+ if '-MF' in execargs:
+ i = execargs.index('-MF')
+ dep_path = execargs[i + 1]
+ return os.path.abspath(dep_path)
+
+ full_obj_path = get_obj_path(execargs)
+ if not full_obj_path:
+ return ''
+
+ return full_obj_path[:-2] + '.d'
+
+
+def get_dwo_path(execargs):
+ """Get the dwo file path for the dwo file in the list of arguments.
+
+ Returns:
+ Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
+ dwo file being outputted then return empty string.
+ """
+ if '-gsplit-dwarf' not in execargs:
+ return ''
+
+ full_obj_path = get_obj_path(execargs)
+ if not full_obj_path:
+ return ''
+
+ return full_obj_path[:-2] + '.dwo'
+
+
+def in_object_list(obj_name, list_filename):
+ """Check if object file name exist in file with object list."""
+ if not obj_name:
+ return False
+
+ with lock_file(list_filename, 'r') as list_file:
+ for line in list_file:
+ if line.strip() == obj_name:
+ return True
+
+ return False
+
+
+def get_side_effects(execargs):
+ """Determine side effects generated by compiler
+
+ Returns:
+ List of paths of objects that the compiler generates as side effects.
+ """
+ side_effects = []
+
+ # Cache dependency files
+ full_dep_path = get_dep_path(execargs)
+ if full_dep_path:
+ side_effects.append(full_dep_path)
+
+ # Cache dwo files
+ full_dwo_path = get_dwo_path(execargs)
+ if full_dwo_path:
+ side_effects.append(full_dwo_path)
+
+ return side_effects
+
+
+def cache_file(execargs, bisect_dir, cache, abs_file_path):
+ """Cache compiler output file (.o/.d/.dwo)."""
+ # os.path.join fails with absolute paths, use + instead
+ bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
+ bisect_path_dir = os.path.dirname(bisect_path)
+ makedirs(bisect_path_dir)
+ pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
+ log_to_file(pop_log, execargs, abs_file_path, bisect_path)
+
+ try:
+ if os.path.exists(abs_file_path):
+ shutil.copy2(abs_file_path, bisect_path)
+ except Exception:
+ print('Could not cache file %s' % abs_file_path, file=sys.stderr)
+ raise
+
+
+def restore_file(bisect_dir, cache, abs_file_path):
+ """Restore file from cache (.o/.d/.dwo)."""
+ # os.path.join fails with absolute paths, use + instead
+ cached_path = os.path.join(bisect_dir, cache) + abs_file_path
+ if os.path.exists(cached_path):
+ if os.path.exists(abs_file_path):
+ os.remove(abs_file_path)
+ os.link(cached_path, abs_file_path)
+ else:
+ raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
+ 'will now crash.' % (cache, cached_path)))
+
+
+def bisect_populate(execargs, bisect_dir, population_name):
+ """Add necessary information to the bisect cache for the given execution.
+
+ Extract the necessary information for bisection from the compiler
+ execution arguments and put it into the bisection cache. This
+ includes copying the created object file, adding the object
+ file path to the cache list and keeping a log of the execution.
+
+ Args:
+ execargs: compiler execution arguments.
+ bisect_dir: bisection directory.
+ population_name: name of the cache being populated (good/bad).
+ """
+ retval = exec_and_return(execargs)
+ if retval:
+ return retval
+
+ full_obj_path = get_obj_path(execargs)
+ # If not a normal compiler call then just exit
+ if not full_obj_path:
+ return
+
+ cache_file(execargs, bisect_dir, population_name, full_obj_path)
+
+ population_dir = os.path.join(bisect_dir, population_name)
+ with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
+ object_list.write('%s\n' % full_obj_path)
+
+ for side_effect in get_side_effects(execargs):
+ cache_file(execargs, bisect_dir, population_name, side_effect)
+
+
+def bisect_triage(execargs, bisect_dir):
+ full_obj_path = get_obj_path(execargs)
+ obj_list = os.path.join(bisect_dir, LIST_FILE)
+
+ # If the output isn't an object file just call compiler
+ if not full_obj_path:
+ return exec_and_return(execargs)
+
+ # If this isn't a bisected object just call compiler
+ # This shouldn't happen!
+ if not in_object_list(full_obj_path, obj_list):
+ if CONTINUE_ON_MISSING:
+ log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
+ log_to_file(log_file, execargs, '? compiler', full_obj_path)
+ return exec_and_return(execargs)
+ else:
+ raise Error(('%s is missing from cache! To ignore export '
+ 'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
+ 'details on this option.' % full_obj_path))
+
+ cache = which_cache(full_obj_path)
+
+ # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
+ # result from the good/bad cache. This option is safe and covers all compiler
+ # side effects, but is very slow!
+ if WRAPPER_SAFE_MODE:
+ retval = exec_and_return(execargs)
+ if retval:
+ return retval
+ os.remove(full_obj_path)
+ restore_file(bisect_dir, cache, full_obj_path)
+ return
+
+ # Generate compiler side effects. Trick Make into thinking compiler was
+ # actually executed.
+ for side_effect in get_side_effects(execargs):
+ restore_file(bisect_dir, cache, side_effect)
+
+ # If generated object file happened to be pruned/cleaned by Make then link it
+ # over from cache again.
+ if not os.path.exists(full_obj_path):
+ restore_file(bisect_dir, cache, full_obj_path)
+
+
+def bisect_driver(bisect_stage, bisect_dir, execargs):
+ """Call appropriate bisection stage according to value in bisect_stage."""
+ if bisect_stage == 'POPULATE_GOOD':
+ bisect_populate(execargs, bisect_dir, GOOD_CACHE)
+ elif bisect_stage == 'POPULATE_BAD':
+ bisect_populate(execargs, bisect_dir, BAD_CACHE)
+ elif bisect_stage == 'TRIAGE':
+ bisect_triage(execargs, bisect_dir)
+ else:
+ raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)