aboutsummaryrefslogtreecommitdiff
path: root/binary_search_tool/bisect_driver.py
blob: 8feb1a37a6982ce674ef4821573e5157e4a393e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
# -*- coding: utf-8 -*-
# Copyright 2020 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
#
# This script is used to help the compiler wrapper in the ChromeOS and
# Android build systems bisect for bad object files.

"""Utilities for bisection of ChromeOS and Android object files.

This module contains a set of utilities to allow bisection between
two sets (good and bad) of object files. Mostly used to find compiler
bugs.

Reference page:
https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper

Design doc:
https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
"""


import contextlib
import fcntl
import os
import shutil
import stat
import subprocess
import sys


VALID_MODES = ("POPULATE_GOOD", "POPULATE_BAD", "TRIAGE")
GOOD_CACHE = "good"
BAD_CACHE = "bad"
LIST_FILE = os.path.join(GOOD_CACHE, "_LIST")

CONTINUE_ON_MISSING = os.environ.get("BISECT_CONTINUE_ON_MISSING", None) == "1"
CONTINUE_ON_REDUNDANCY = (
    os.environ.get("BISECT_CONTINUE_ON_REDUNDANCY", None) == "1"
)
WRAPPER_SAFE_MODE = os.environ.get("BISECT_WRAPPER_SAFE_MODE", None) == "1"


class Error(Exception):
    """The general compiler wrapper error class."""


@contextlib.contextmanager
def lock_file(path, mode):
    """Lock file and block if other process has lock on file.

    Acquire exclusive lock for file. Only blocks other processes if they attempt
    to also acquire lock through this method. If only reading (modes 'r' and 'rb')
    then the lock is shared (i.e. many reads can happen concurrently, but only one
    process may write at a time).

    This function is a contextmanager, meaning it's meant to be used with the
    "with" statement in Python. This is so cleanup and setup happens automatically
    and cleanly. Execution of the outer "with" statement happens at the "yield"
    statement. Execution resumes after the yield when the outer "with" statement
    ends.

    Args:
      path: path to file being locked
      mode: mode to open file with ('w', 'r', etc.)
    """
    with open(path, mode) as f:
        # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor
        # won't be leaked to any child processes.
        current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD)
        fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC)

        # Reads can share the lock as no race conditions exist. If write is needed,
        # give writing process exclusive access to the file.
        if f.mode == "r" or f.mode == "rb":
            lock_type = fcntl.LOCK_SH
        else:
            lock_type = fcntl.LOCK_EX

        try:
            fcntl.lockf(f, lock_type)
            yield f
            f.flush()
        finally:
            fcntl.lockf(f, fcntl.LOCK_UN)


def log_to_file(path, execargs, link_from=None, link_to=None):
    """Common logging function.

    Log current working directory, current execargs, and a from-to relationship
    between files.
    """
    with lock_file(path, "a") as log:
        log.write("cd: %s; %s\n" % (os.getcwd(), " ".join(execargs)))
        if link_from and link_to:
            log.write("%s -> %s\n" % (link_from, link_to))


def exec_and_return(execargs):
    """Execute process and return.

    Execute according to execargs and return immediately. Don't inspect
    stderr or stdout.
    """
    return subprocess.call(execargs)


def which_cache(obj_file):
    """Determine which cache an object belongs to.

    The binary search tool creates two files for each search iteration listing
    the full set of bad objects and full set of good objects. We use this to
    determine where an object file should be linked from (good or bad).
    """
    bad_set_file = os.environ.get("BISECT_BAD_SET")
    if in_object_list(obj_file, bad_set_file):
        return BAD_CACHE
    else:
        return GOOD_CACHE


def makedirs(path):
    """Try to create directories in path."""
    try:
        os.makedirs(path)
    except os.error:
        if not os.path.isdir(path):
            raise


def get_obj_path(execargs):
    """Get the object path for the object file in the list of arguments.

    Returns:
      Absolute object path from execution args (-o argument). If no object being
      outputted, then return empty string. -o argument is checked only if -c is
      also present.
    """
    try:
        i = execargs.index("-o")
        _ = execargs.index("-c")
    except ValueError:
        return ""

    obj_path = execargs[i + 1]
    # Ignore args that do not create a file.
    if obj_path in (
        "-",
        "/dev/null",
    ):
        return ""
    # Ignore files ending in .tmp.
    if obj_path.endswith((".tmp",)):
        return ""
    # Ignore configuration files generated by Automake/Autoconf/CMake etc.
    if (
        obj_path.endswith("conftest.o")
        or obj_path.endswith("CMakeFiles/test.o")
        or obj_path.find("CMakeTmp") != -1
        or os.path.abspath(obj_path).find("CMakeTmp") != -1
    ):
        return ""

    return os.path.abspath(obj_path)


def get_dep_path(execargs):
    """Get the dep file path for the dep file in the list of arguments.

    Returns:
      Absolute path of dependency file path from execution args (-o argument). If
      no dependency being outputted then return empty string.
    """
    if "-MD" not in execargs and "-MMD" not in execargs:
        return ""

    # If -MF is given this is the path of the dependency file. Otherwise the
    # dependency file is the value of -o but with a .d extension
    if "-MF" in execargs:
        i = execargs.index("-MF")
        dep_path = execargs[i + 1]
        return os.path.abspath(dep_path)

    full_obj_path = get_obj_path(execargs)
    if not full_obj_path:
        return ""

    return full_obj_path[:-2] + ".d"


def get_dwo_path(execargs):
    """Get the dwo file path for the dwo file in the list of arguments.

    Returns:
      Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
      dwo file being outputted then return empty string.
    """
    if "-gsplit-dwarf" not in execargs:
        return ""

    full_obj_path = get_obj_path(execargs)
    if not full_obj_path:
        return ""

    return full_obj_path[:-2] + ".dwo"


def in_object_list(obj_name, list_filename):
    """Check if object file name exist in file with object list."""
    if not obj_name:
        return False

    with lock_file(list_filename, "r") as list_file:
        for line in list_file:
            if line.strip() == obj_name:
                return True

        return False


def get_side_effects(execargs):
    """Determine side effects generated by compiler

    Returns:
      List of paths of objects that the compiler generates as side effects.
    """
    side_effects = []

    # Cache dependency files
    full_dep_path = get_dep_path(execargs)
    if full_dep_path:
        side_effects.append(full_dep_path)

    # Cache dwo files
    full_dwo_path = get_dwo_path(execargs)
    if full_dwo_path:
        side_effects.append(full_dwo_path)

    return side_effects


def cache_file(execargs, bisect_dir, cache, abs_file_path):
    """Cache compiler output file (.o/.d/.dwo).

    Args:
      execargs: compiler execution arguments.
      bisect_dir: The directory where bisection caches live.
      cache: Which cache the file will be cached to (GOOD/BAD).
      abs_file_path: Absolute path to file being cached.

    Returns:
      True if caching was successful, False otherwise.
    """
    # os.path.join fails with absolute paths, use + instead
    bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
    bisect_path_dir = os.path.dirname(bisect_path)
    makedirs(bisect_path_dir)
    pop_log = os.path.join(bisect_dir, cache, "_POPULATE_LOG")
    log_to_file(pop_log, execargs, abs_file_path, bisect_path)

    try:
        if os.path.exists(abs_file_path):
            if os.path.exists(bisect_path):
                # File exists
                population_dir = os.path.join(bisect_dir, cache)
                with lock_file(
                    os.path.join(population_dir, "_DUPS"), "a"
                ) as dup_object_list:
                    dup_object_list.write("%s\n" % abs_file_path)
                if CONTINUE_ON_REDUNDANCY:
                    return True
                raise Exception(
                    "Trying to cache file %s multiple times. To avoid the error, set "
                    "BISECT_CONTINUE_ON_REDUNDANCY to 1. For reference, the list of "
                    "such files will be written to %s"
                    % (abs_file_path, os.path.join(population_dir, "_DUPS"))
                )

            shutil.copy2(abs_file_path, bisect_path)
            # Set cache object to be read-only so later compilations can't
            # accidentally overwrite it.
            os.chmod(bisect_path, 0o444)
            return True
        else:
            # File not found (happens when compilation fails but error code is still
            # 0)
            return False
    except Exception:
        print("Could not cache file %s" % abs_file_path, file=sys.stderr)
        raise


def restore_file(bisect_dir, cache, abs_file_path):
    """Restore file from cache (.o/.d/.dwo).

    Args:
      bisect_dir: The directory where bisection caches live.
      cache: Which cache the file will be restored from (GOOD/BAD).
      abs_file_path: Absolute path to file being restored.
    """
    # os.path.join fails with absolute paths, use + instead
    cached_path = os.path.join(bisect_dir, cache) + abs_file_path
    if os.path.exists(cached_path):
        if os.path.exists(abs_file_path):
            os.remove(abs_file_path)
        shutil.copy2(cached_path, abs_file_path)
        # Add write permission to the restored object files as some packages
        # (such as kernels) may need write permission to delete files.
        os.chmod(abs_file_path, os.stat(abs_file_path).st_mode | stat.S_IWUSR)
    else:
        raise Error(
            (
                "%s is missing from %s cache! Unsure how to proceed. Make "
                "will now crash." % (cache, cached_path)
            )
        )


def bisect_populate(execargs, bisect_dir, population_name):
    """Add necessary information to the bisect cache for the given execution.

    Extract the necessary information for bisection from the compiler
    execution arguments and put it into the bisection cache. This
    includes copying the created object file, adding the object
    file path to the cache list and keeping a log of the execution.

    Args:
      execargs: compiler execution arguments.
      bisect_dir: bisection directory.
      population_name: name of the cache being populated (good/bad).
    """
    retval = exec_and_return(execargs)
    if retval:
        return retval

    full_obj_path = get_obj_path(execargs)
    # This is not a normal compiler call because it doesn't have a -o argument,
    # or the -o argument has an unusable output file.
    # It's likely that this compiler call was actually made to invoke the linker,
    # or as part of a configuratoin test. In this case we want to simply call the
    # compiler and return.
    if not full_obj_path:
        return retval

    # Return if not able to cache the object file
    if not cache_file(execargs, bisect_dir, population_name, full_obj_path):
        return retval

    population_dir = os.path.join(bisect_dir, population_name)
    with lock_file(os.path.join(population_dir, "_LIST"), "a") as object_list:
        object_list.write("%s\n" % full_obj_path)

    for side_effect in get_side_effects(execargs):
        _ = cache_file(execargs, bisect_dir, population_name, side_effect)

    return retval


def bisect_triage(execargs, bisect_dir):
    """Use object object file from appropriate cache (good/bad).

    Given a populated bisection directory, use the object file saved
    into one of the caches (good/bad) according to what is specified
    in the good/bad sets. The good/bad sets are generated by the
    high level binary search tool. Additionally restore any possible
    side effects of compiler.

    Args:
      execargs: compiler execution arguments.
      bisect_dir: populated bisection directory.
    """
    full_obj_path = get_obj_path(execargs)
    obj_list = os.path.join(bisect_dir, LIST_FILE)

    # If the output isn't an object file just call compiler
    if not full_obj_path:
        return exec_and_return(execargs)

    # If this isn't a bisected object just call compiler
    # This shouldn't happen!
    if not in_object_list(full_obj_path, obj_list):
        if CONTINUE_ON_MISSING:
            log_file = os.path.join(bisect_dir, "_MISSING_CACHED_OBJ_LOG")
            log_to_file(log_file, execargs, "? compiler", full_obj_path)
            return exec_and_return(execargs)
        else:
            raise Error(
                (
                    "%s is missing from cache! To ignore export "
                    "BISECT_CONTINUE_ON_MISSING=1. See documentation for more "
                    "details on this option." % full_obj_path
                )
            )

    cache = which_cache(full_obj_path)

    # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
    # result from the good/bad cache. This option is safe and covers all compiler
    # side effects, but is very slow!
    if WRAPPER_SAFE_MODE:
        retval = exec_and_return(execargs)
        if retval:
            return retval
        os.remove(full_obj_path)
        restore_file(bisect_dir, cache, full_obj_path)
        return retval

    # Generate compiler side effects. Trick Make into thinking compiler was
    # actually executed.
    for side_effect in get_side_effects(execargs):
        restore_file(bisect_dir, cache, side_effect)

    # If generated object file happened to be pruned/cleaned by Make then link it
    # over from cache again.
    if not os.path.exists(full_obj_path):
        restore_file(bisect_dir, cache, full_obj_path)

    return 0


def bisect_driver(bisect_stage, bisect_dir, execargs):
    """Call appropriate bisection stage according to value in bisect_stage."""
    if bisect_stage == "POPULATE_GOOD":
        return bisect_populate(execargs, bisect_dir, GOOD_CACHE)
    elif bisect_stage == "POPULATE_BAD":
        return bisect_populate(execargs, bisect_dir, BAD_CACHE)
    elif bisect_stage == "TRIAGE":
        return bisect_triage(execargs, bisect_dir)
    else:
        raise ValueError("wrong value for BISECT_STAGE: %s" % bisect_stage)