From 4a51ecb61f80d955685615001846c545b2229368 Mon Sep 17 00:00:00 2001 From: Oliver Chang Date: Wed, 10 Jun 2020 17:12:28 +1000 Subject: build_specified_commit: Handle build.sh which are part of upstream repo. (#3932) This is a best effort attempt to parse the relevant copy command from the Dockerfile. Also add a main function to build_specified_commit to make it easier to test. --- infra/build_specified_commit.py | 133 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 3 deletions(-) (limited to 'infra/build_specified_commit.py') diff --git a/infra/build_specified_commit.py b/infra/build_specified_commit.py index 72c3bad8e..eef671b70 100644 --- a/infra/build_specified_commit.py +++ b/infra/build_specified_commit.py @@ -17,12 +17,17 @@ This module is allows each of the OSS Fuzz projects fuzzers to be built from a specific point in time. This feature can be used for implementations like continuious integration fuzzing and bisection to find errors """ +import argparse import bisect +import datetime +from distutils import spawn import os import collections +import json import logging import re import shutil +import tempfile import time import helper @@ -152,6 +157,34 @@ def _build_image_with_retries(project_name): return result +def get_required_post_checkout_steps(dockerfile_path): + """Get required post checkout steps (best effort).""" + + checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)') + + # If the build.sh is copied from upstream, we need to copy it again after + # changing the revision to ensure correct building. + post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)') + + with open(dockerfile_path) as handle: + lines = handle.readlines() + + subsequent_run_cmds = [] + for i, line in enumerate(lines): + if checkout_pattern.match(line): + subsequent_run_cmds = [] + continue + + match = post_run_pattern.match(line) + if match: + workdir = helper.workdir_from_lines(lines[:i]) + command = match.group(1) + subsequent_run_cmds.append((workdir, command)) + + return subsequent_run_cmds + + +# pylint: disable=too-many-locals def build_fuzzers_from_commit(commit, build_repo_manager, host_src_path, @@ -175,8 +208,27 @@ def build_fuzzers_from_commit(commit, copy_src_from_docker(build_data.project_name, os.path.dirname(host_src_path)) + projects_dir = os.path.join('projects', build_data.project_name) + dockerfile_path = os.path.join(projects_dir, 'Dockerfile') + for i in range(num_retry + 1): build_repo_manager.checkout_commit(commit, clean=False) + + post_checkout_steps = get_required_post_checkout_steps(dockerfile_path) + for workdir, post_checkout_step in post_checkout_steps: + logging.info('Running post-checkout step `%s` in %s.', post_checkout_step, + workdir) + helper.docker_run([ + '-w', + workdir, + '-v', + host_src_path + ':' + '/src', + 'gcr.io/oss-fuzz/' + build_data.project_name, + '/bin/bash', + '-c', + post_checkout_step, + ]) + result = helper.build_fuzzers_impl(project_name=build_data.project_name, clean=True, engine=build_data.engine, @@ -191,7 +243,6 @@ def build_fuzzers_from_commit(commit, # Retry with an OSS-Fuzz builder container that's closer to the project # commit date. commit_date = build_repo_manager.commit_date(commit) - projects_dir = os.path.join('projects', build_data.project_name) # Find first change in the projects/ directory before the project # commit date. @@ -216,8 +267,7 @@ def build_fuzzers_from_commit(commit, if base_builder_repo: base_builder_digest = base_builder_repo.find_digest(commit_date) logging.info('Using base-builder with digest %s.', base_builder_digest) - _replace_base_builder_digest(os.path.join(projects_dir, 'Dockerfile'), - base_builder_digest) + _replace_base_builder_digest(dockerfile_path, base_builder_digest) # Rebuild image and re-copy src dir since things in /src could have changed. if not _build_image_with_retries(build_data.project_name): @@ -273,3 +323,80 @@ def detect_main_repo(project_name, repo_name=None, commit=None): logging.error('Failed to detect repo:\n%s', out) return None, None + + +def load_base_builder_repo(): + """Get base-image digests.""" + gcloud_path = spawn.find_executable('gcloud') + if not gcloud_path: + logging.warning('gcloud not found in PATH.') + return None + + result, _, _ = utils.execute([ + gcloud_path, + 'container', + 'images', + 'list-tags', + 'gcr.io/oss-fuzz-base/base-builder', + '--format=json', + '--sort-by=timestamp', + ], + check_result=True) + result = json.loads(result) + + repo = BaseBuilderRepo() + for image in result: + timestamp = datetime.datetime.fromisoformat( + image['timestamp']['datetime']).astimezone(datetime.timezone.utc) + repo.add_digest(timestamp, image['digest']) + + return repo + + +def main(): + """Main function.""" + logging.getLogger().setLevel(logging.INFO) + + parser = argparse.ArgumentParser( + description='Build fuzzers at a specific commit') + parser.add_argument('--project_name', + help='The name of the project where the bug occurred.', + required=True) + parser.add_argument('--commit', + help='The newest commit SHA to be bisected.', + required=True) + parser.add_argument('--engine', + help='The default is "libfuzzer".', + default='libfuzzer') + parser.add_argument('--sanitizer', + default='address', + help='The default is "address".') + parser.add_argument('--architecture', default='x86_64') + + args = parser.parse_args() + + repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit) + + if not repo_url or not repo_path: + raise ValueError('Main git repo can not be determined.') + + with tempfile.TemporaryDirectory() as tmp_dir: + host_src_dir = copy_src_from_docker(args.project_name, tmp_dir) + build_repo_manager = repo_manager.BaseRepoManager( + os.path.join(host_src_dir, os.path.basename(repo_path))) + base_builder_repo = load_base_builder_repo() + + build_data = BuildData(project_name=args.project_name, + engine=args.engine, + sanitizer=args.sanitizer, + architecture=args.architecture) + if not build_fuzzers_from_commit(args.commit, + build_repo_manager, + host_src_dir, + build_data, + base_builder_repo=base_builder_repo): + raise RuntimeError('Failed to build.') + + +if __name__ == '__main__': + main() -- cgit v1.2.3