aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--infra/bisector.py33
-rw-r--r--infra/build_specified_commit.py133
-rwxr-xr-xinfra/helper.py21
3 files changed, 144 insertions, 43 deletions
diff --git a/infra/bisector.py b/infra/bisector.py
index ff8366db7..f8d2e7d4f 100644
--- a/infra/bisector.py
+++ b/infra/bisector.py
@@ -32,9 +32,6 @@ This is done with the following steps:
import argparse
import collections
-import datetime
-from distutils import spawn
-import json
import logging
import os
import sys
@@ -121,34 +118,6 @@ def main():
return 0
-def _load_base_builder_repo():
- """Get base-image digests."""
- gcloud_path = spawn.find_executable('gcloud')
- if not gcloud_path:
- logging.warning('gcloud not found in PATH.')
- return None
-
- result, _, _ = utils.execute([
- gcloud_path,
- 'container',
- 'images',
- 'list-tags',
- 'gcr.io/oss-fuzz-base/base-builder',
- '--format=json',
- '--sort-by=timestamp',
- ],
- check_result=True)
- result = json.loads(result)
-
- repo = build_specified_commit.BaseBuilderRepo()
- for image in result:
- timestamp = datetime.datetime.fromisoformat(
- image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
- repo.add_digest(timestamp, image['digest'])
-
- return repo
-
-
def _get_dedup_token(output):
"""Get dedup token."""
for line in output.splitlines():
@@ -200,7 +169,7 @@ def _bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target,
build_data):
"""Perform the bisect."""
# pylint: disable=too-many-branches
- base_builder_repo = _load_base_builder_repo()
+ base_builder_repo = build_specified_commit.load_base_builder_repo()
with tempfile.TemporaryDirectory() as tmp_dir:
repo_url, repo_path = build_specified_commit.detect_main_repo(
diff --git a/infra/build_specified_commit.py b/infra/build_specified_commit.py
index 72c3bad8e..eef671b70 100644
--- a/infra/build_specified_commit.py
+++ b/infra/build_specified_commit.py
@@ -17,12 +17,17 @@ This module is allows each of the OSS Fuzz projects fuzzers to be built
from a specific point in time. This feature can be used for implementations
like continuious integration fuzzing and bisection to find errors
"""
+import argparse
import bisect
+import datetime
+from distutils import spawn
import os
import collections
+import json
import logging
import re
import shutil
+import tempfile
import time
import helper
@@ -152,6 +157,34 @@ def _build_image_with_retries(project_name):
return result
+def get_required_post_checkout_steps(dockerfile_path):
+ """Get required post checkout steps (best effort)."""
+
+ checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)')
+
+ # If the build.sh is copied from upstream, we need to copy it again after
+ # changing the revision to ensure correct building.
+ post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)')
+
+ with open(dockerfile_path) as handle:
+ lines = handle.readlines()
+
+ subsequent_run_cmds = []
+ for i, line in enumerate(lines):
+ if checkout_pattern.match(line):
+ subsequent_run_cmds = []
+ continue
+
+ match = post_run_pattern.match(line)
+ if match:
+ workdir = helper.workdir_from_lines(lines[:i])
+ command = match.group(1)
+ subsequent_run_cmds.append((workdir, command))
+
+ return subsequent_run_cmds
+
+
+# pylint: disable=too-many-locals
def build_fuzzers_from_commit(commit,
build_repo_manager,
host_src_path,
@@ -175,8 +208,27 @@ def build_fuzzers_from_commit(commit,
copy_src_from_docker(build_data.project_name,
os.path.dirname(host_src_path))
+ projects_dir = os.path.join('projects', build_data.project_name)
+ dockerfile_path = os.path.join(projects_dir, 'Dockerfile')
+
for i in range(num_retry + 1):
build_repo_manager.checkout_commit(commit, clean=False)
+
+ post_checkout_steps = get_required_post_checkout_steps(dockerfile_path)
+ for workdir, post_checkout_step in post_checkout_steps:
+ logging.info('Running post-checkout step `%s` in %s.', post_checkout_step,
+ workdir)
+ helper.docker_run([
+ '-w',
+ workdir,
+ '-v',
+ host_src_path + ':' + '/src',
+ 'gcr.io/oss-fuzz/' + build_data.project_name,
+ '/bin/bash',
+ '-c',
+ post_checkout_step,
+ ])
+
result = helper.build_fuzzers_impl(project_name=build_data.project_name,
clean=True,
engine=build_data.engine,
@@ -191,7 +243,6 @@ def build_fuzzers_from_commit(commit,
# Retry with an OSS-Fuzz builder container that's closer to the project
# commit date.
commit_date = build_repo_manager.commit_date(commit)
- projects_dir = os.path.join('projects', build_data.project_name)
# Find first change in the projects/<PROJECT> directory before the project
# commit date.
@@ -216,8 +267,7 @@ def build_fuzzers_from_commit(commit,
if base_builder_repo:
base_builder_digest = base_builder_repo.find_digest(commit_date)
logging.info('Using base-builder with digest %s.', base_builder_digest)
- _replace_base_builder_digest(os.path.join(projects_dir, 'Dockerfile'),
- base_builder_digest)
+ _replace_base_builder_digest(dockerfile_path, base_builder_digest)
# Rebuild image and re-copy src dir since things in /src could have changed.
if not _build_image_with_retries(build_data.project_name):
@@ -273,3 +323,80 @@ def detect_main_repo(project_name, repo_name=None, commit=None):
logging.error('Failed to detect repo:\n%s', out)
return None, None
+
+
+def load_base_builder_repo():
+ """Get base-image digests."""
+ gcloud_path = spawn.find_executable('gcloud')
+ if not gcloud_path:
+ logging.warning('gcloud not found in PATH.')
+ return None
+
+ result, _, _ = utils.execute([
+ gcloud_path,
+ 'container',
+ 'images',
+ 'list-tags',
+ 'gcr.io/oss-fuzz-base/base-builder',
+ '--format=json',
+ '--sort-by=timestamp',
+ ],
+ check_result=True)
+ result = json.loads(result)
+
+ repo = BaseBuilderRepo()
+ for image in result:
+ timestamp = datetime.datetime.fromisoformat(
+ image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
+ repo.add_digest(timestamp, image['digest'])
+
+ return repo
+
+
+def main():
+ """Main function."""
+ logging.getLogger().setLevel(logging.INFO)
+
+ parser = argparse.ArgumentParser(
+ description='Build fuzzers at a specific commit')
+ parser.add_argument('--project_name',
+ help='The name of the project where the bug occurred.',
+ required=True)
+ parser.add_argument('--commit',
+ help='The newest commit SHA to be bisected.',
+ required=True)
+ parser.add_argument('--engine',
+ help='The default is "libfuzzer".',
+ default='libfuzzer')
+ parser.add_argument('--sanitizer',
+ default='address',
+ help='The default is "address".')
+ parser.add_argument('--architecture', default='x86_64')
+
+ args = parser.parse_args()
+
+ repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit)
+
+ if not repo_url or not repo_path:
+ raise ValueError('Main git repo can not be determined.')
+
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ host_src_dir = copy_src_from_docker(args.project_name, tmp_dir)
+ build_repo_manager = repo_manager.BaseRepoManager(
+ os.path.join(host_src_dir, os.path.basename(repo_path)))
+ base_builder_repo = load_base_builder_repo()
+
+ build_data = BuildData(project_name=args.project_name,
+ engine=args.engine,
+ sanitizer=args.sanitizer,
+ architecture=args.architecture)
+ if not build_fuzzers_from_commit(args.commit,
+ build_repo_manager,
+ host_src_dir,
+ build_data,
+ base_builder_repo=base_builder_repo):
+ raise RuntimeError('Failed to build.')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/infra/helper.py b/infra/helper.py
index 668f31d0b..22c81984d 100755
--- a/infra/helper.py
+++ b/infra/helper.py
@@ -363,13 +363,8 @@ def _env_to_docker_args(env_list):
WORKDIR_REGEX = re.compile(r'\s*WORKDIR\s*([^\s]+)')
-def _workdir_from_dockerfile(project_name):
- """Parse WORKDIR from the Dockerfile for the given project."""
- dockerfile_path = get_dockerfile_path(project_name)
-
- with open(dockerfile_path) as file_handle:
- lines = file_handle.readlines()
-
+def workdir_from_lines(lines, default='/src'):
+ """Get the WORKDIR from the given lines."""
for line in reversed(lines): # reversed to get last WORKDIR.
match = re.match(WORKDIR_REGEX, line)
if match:
@@ -381,7 +376,17 @@ def _workdir_from_dockerfile(project_name):
return os.path.normpath(workdir)
- return os.path.join('/src', project_name)
+ return default
+
+
+def _workdir_from_dockerfile(project_name):
+ """Parse WORKDIR from the Dockerfile for the given project."""
+ dockerfile_path = get_dockerfile_path(project_name)
+
+ with open(dockerfile_path) as file_handle:
+ lines = file_handle.readlines()
+
+ return workdir_from_lines(lines, default=os.path.join('/src', project_name))
def docker_run(run_args, print_output=True):