diff options
Diffstat (limited to 'infra/bisector.py')
-rw-r--r-- | infra/bisector.py | 182 |
1 files changed, 92 insertions, 90 deletions
diff --git a/infra/bisector.py b/infra/bisector.py index 877110577..d1fd669fa 100644 --- a/infra/bisector.py +++ b/infra/bisector.py @@ -18,7 +18,7 @@ where the bug was introduced. It also looks for where the bug was fixed. This is done with the following steps: - NOTE: NEEDS TO BE RUN FROM THE OSS-Fuzz HOME directory + NOTE: Needs to be run from root of the OSS-Fuzz source checkout. Typical usage example: python3 infra/bisector.py @@ -31,122 +31,124 @@ This is done with the following steps: """ import argparse -from dataclasses import dataclass -import os +import logging import tempfile import build_specified_commit import helper import repo_manager - - -@dataclass -class BuildData(): - """List of data requried for bisection of errors in OSS-Fuzz projects. - - Attributes: - project_name: The name of the OSS-Fuzz project that is being checked - engine: The fuzzing engine to be used - sanitizer: The fuzzing sanitizer to be used - architecture: The system architecture being fuzzed - """ - project_name: str - engine: str - sanitizer: str - architecture: str +import utils def main(): """Finds the commit SHA where an error was initally introduced.""" + utils.chdir_to_root() parser = argparse.ArgumentParser( description='git bisection for finding introduction of bugs') - parser.add_argument( - '--project_name', - help='The name of the project where the bug occured', - required=True) - parser.add_argument( - '--commit_new', - help='The newest commit SHA to be bisected', - required=True) - parser.add_argument( - '--commit_old', - help='The oldest commit SHA to be bisected', - required=True) - parser.add_argument( - '--fuzz_target', help='the name of the fuzzer to be built', required=True) - parser.add_argument( - '--testcase', help='the testcase to be reproduced', required=True) - parser.add_argument('--engine', default='libfuzzer') - parser.add_argument( - '--sanitizer', - default='address', - help='the default is "address"; "dataflow" for "dataflow" engine') + parser.add_argument('--project_name', + help='The name of the project where the bug occurred.', + required=True) + parser.add_argument('--new_commit', + help='The newest commit SHA to be bisected.', + required=True) + parser.add_argument('--old_commit', + help='The oldest commit SHA to be bisected.', + required=True) + parser.add_argument('--fuzz_target', + help='The name of the fuzzer to be built.', + required=True) + parser.add_argument('--test_case_path', + help='The path to test case.', + required=True) + parser.add_argument('--engine', + help='The default is "libfuzzer".', + default='libfuzzer') + parser.add_argument('--sanitizer', + default='address', + help='The default is "address".') parser.add_argument('--architecture', default='x86_64') args = parser.parse_args() - build_data = BuildData(args.project_name, args.engine, args.sanitizer, - args.architecture) - if os.getcwd() != os.path.dirname( - os.path.dirname(os.path.realpath(__file__))): - print("Error: bisector.py needs to be run from the OSS-Fuzz home directory") - return 1 - error_sha = bisect(args.commit_old, args.commit_new, args.testcase, + + build_data = build_specified_commit.BuildData(project_name=args.project_name, + engine=args.engine, + sanitizer=args.sanitizer, + architecture=args.architecture) + + error_sha = bisect(args.old_commit, args.new_commit, args.test_case_path, args.fuzz_target, build_data) if not error_sha: - print('No error was found in commit range %s:%s' % - (args.commit_old, args.commit_new)) + logging.error('No error was found in commit range %s:%s', args.commit_old, + args.commit_new) + return 1 + if error_sha == args.commit_old: + logging.error( + 'Bisection Error: Both the first and the last commits in' + 'the given range have the same behavior, bisection is not possible. ') return 1 print('Error was introduced at commit %s' % error_sha) return 0 -def bisect(commit_old, commit_new, testcase, fuzz_target, build_data): +def bisect(old_commit, new_commit, test_case_path, fuzz_target, build_data): """From a commit range, this function caluclates which introduced a - specific error from a fuzz testcase. + specific error from a fuzz test_case_path. Args: - commit_old: The oldest commit in the error regression range - commit_new: The newest commit in the error regression range - testcase: The file path of the test case that triggers the error - fuzz_target: The name of the fuzzer to be tested - build_data: a class holding all of the input parameters for bisection + old_commit: The oldest commit in the error regression range. + new_commit: The newest commit in the error regression range. + test_case_path: The file path of the test case that triggers the error + fuzz_target: The name of the fuzzer to be tested. + build_data: a class holding all of the input parameters for bisection. Returns: - The commit SHA that introduced the error or None + The commit SHA that introduced the error or None. + + Raises: + ValueError: when a repo url can't be determine from the project. """ - local_store_path = tempfile.mkdtemp() - repo_url = build_specified_commit.infer_main_repo(build_data.project_name, - local_store_path, - commit_old) - bisect_repo_manager = repo_manager.RepoManager(repo_url, local_store_path) - commit_list = bisect_repo_manager.get_commit_list(commit_old, commit_new) - build_specified_commit.build_fuzzer_from_commit( - build_data.project_name, commit_list[0], bisect_repo_manager.repo_dir, - build_data.engine, build_data.sanitizer, build_data.architecture, - bisect_repo_manager) - error_code = helper.reproduce_impl(build_data.project_name, fuzz_target, - False, [], [], testcase) - old_idx = len(commit_list) - 1 - new_idx = 0 - if len(commit_list) == 1: - if not error_code: - return None - return commit_list[0] - - while old_idx - new_idx != 1: - curr_idx = (old_idx + new_idx) // 2 - build_specified_commit.build_fuzzer_from_commit( - build_data.project_name, commit_list[curr_idx], - bisect_repo_manager.repo_dir, build_data.engine, build_data.sanitizer, - build_data.architecture, bisect_repo_manager) - error_exists = ( - helper.reproduce_impl(build_data.project_name, fuzz_target, False, [], - [], testcase) == error_code) - if error_exists == error_code: - new_idx = curr_idx - else: - old_idx = curr_idx - return commit_list[new_idx] + with tempfile.TemporaryDirectory() as tmp_dir: + repo_url, repo_name = build_specified_commit.detect_main_repo( + build_data.project_name, commit=old_commit) + if not repo_url or not repo_name: + raise ValueError('Main git repo can not be determined.') + bisect_repo_manager = repo_manager.RepoManager(repo_url, + tmp_dir, + repo_name=repo_name) + commit_list = bisect_repo_manager.get_commit_list(old_commit, new_commit) + old_idx = len(commit_list) - 1 + new_idx = 0 + build_specified_commit.build_fuzzers_from_commit(commit_list[new_idx], + bisect_repo_manager, + build_data) + expected_error_code = helper.reproduce_impl(build_data.project_name, + fuzz_target, False, [], [], + test_case_path) + + # Check if the error is persistent through the commit range + build_specified_commit.build_fuzzers_from_commit( + commit_list[old_idx], + bisect_repo_manager, + build_data, + ) + + if expected_error_code == helper.reproduce_impl(build_data.project_name, + fuzz_target, False, [], [], + test_case_path): + return commit_list[old_idx] + + while old_idx - new_idx > 1: + curr_idx = (old_idx + new_idx) // 2 + build_specified_commit.build_fuzzers_from_commit(commit_list[curr_idx], + bisect_repo_manager, + build_data) + error_code = helper.reproduce_impl(build_data.project_name, fuzz_target, + False, [], [], test_case_path) + if expected_error_code == error_code: + new_idx = curr_idx + else: + old_idx = curr_idx + return commit_list[new_idx] if __name__ == '__main__': |