aboutsummaryrefslogtreecommitdiff
path: root/infra/base-images/base-runner
diff options
context:
space:
mode:
Diffstat (limited to 'infra/base-images/base-runner')
-rwxr-xr-xinfra/base-images/base-runner/Dockerfile18
-rwxr-xr-xinfra/base-images/base-runner/bad_build_check12
-rwxr-xr-xinfra/base-images/base-runner/coverage120
-rwxr-xr-xinfra/base-images/base-runner/coverage_helper2
-rwxr-xr-xinfra/base-images/base-runner/jacoco_report_converter.py158
-rw-r--r--infra/base-images/base-runner/profraw_update.py123
-rwxr-xr-xinfra/base-images/base-runner/run_fuzzer31
-rwxr-xr-xinfra/base-images/base-runner/targets_list3
-rwxr-xr-xinfra/base-images/base-runner/test_all.py102
-rw-r--r--infra/base-images/base-runner/test_all_test.py8
10 files changed, 492 insertions, 85 deletions
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile
index f847de026..fadd00acc 100755
--- a/infra/base-images/base-runner/Dockerfile
+++ b/infra/base-images/base-runner/Dockerfile
@@ -45,12 +45,18 @@ RUN apt-get update && apt-get install -y \
libcap2 \
python3 \
python3-pip \
+ python3-setuptools \
unzip \
wget \
zip --no-install-recommends
-RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage /opt/code_coverage && \
- pip3 install -r /opt/code_coverage/requirements.txt
+ENV CODE_COVERAGE_SRC=/opt/code_coverage
+RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage $CODE_COVERAGE_SRC && \
+ cd /opt/code_coverage && \
+ git checkout edba4873b5e8a390e977a64c522db2df18a8b27d && \
+ pip3 install wheel && \
+ pip3 install -r requirements.txt && \
+ pip3 install MarkupSafe==0.23
# Default environment options for various sanitizers.
# Note that these match the settings used in ClusterFuzz and
@@ -91,6 +97,12 @@ RUN wget https://download.java.net/java/GA/jdk15.0.2/0d1cfde4252546c6931946de8db
rm -f openjdk-15.0.2_linux-x64_bin.tar.gz && \
rm -rf $JAVA_HOME/jmods $JAVA_HOME/lib/src.zip
+# Install JaCoCo for JVM coverage.
+RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jacoco.cli-0.8.7-nodeps.jar -O /opt/jacoco-cli.jar && \
+ wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.agent/0.8.7/org.jacoco.agent-0.8.7-runtime.jar -O /opt/jacoco-agent.jar && \
+ echo "37df187b76888101ecd745282e9cd1ad4ea508d6 /opt/jacoco-agent.jar" | shasum --check && \
+ echo "c1814e7bba5fd8786224b09b43c84fd6156db690 /opt/jacoco-cli.jar" | shasum --check
+
# Do this last to make developing these files easier/faster due to caching.
COPY bad_build_check \
collect_dft \
@@ -98,10 +110,12 @@ COPY bad_build_check \
coverage_helper \
dataflow_tracer.py \
download_corpus \
+ jacoco_report_converter.py \
rcfilt \
reproduce \
run_fuzzer \
parse_options.py \
+ profraw_update.py \
targets_list \
test_all.py \
test_one.py \
diff --git a/infra/base-images/base-runner/bad_build_check b/infra/base-images/base-runner/bad_build_check
index 01f8fbbab..bb328c793 100755
--- a/infra/base-images/base-runner/bad_build_check
+++ b/infra/base-images/base-runner/bad_build_check
@@ -39,7 +39,7 @@ DFSAN_CALLS_THRESHOLD_FOR_NON_DFSAN_BUILD=0
MSAN_CALLS_THRESHOLD_FOR_MSAN_BUILD=1000
# Some engines (e.g. honggfuzz) may make a very small number of calls to msan
# for memory poisoning.
-MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=2
+MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=3
# Usually, a non UBSan build (e.g. ASan) has 165 calls to UBSan runtime. The
# majority of targets built with UBSan have 200+ UBSan calls, but there are
@@ -90,10 +90,7 @@ function check_engine {
echo "BAD BUILD: $FUZZER seems to have only partial coverage instrumentation."
fi
elif [[ "$FUZZING_ENGINE" == afl ]]; then
- # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use
- # AFL_DRIVER_DONT_DEFER by default, support .options files in
- # bad_build_check instead.
- AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
+ AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
CHECK_PASSED=$(egrep "All set and ready to roll" -c $FUZZER_OUTPUT)
if (( $CHECK_PASSED == 0 )); then
echo "BAD BUILD: fuzzing $FUZZER with afl-fuzz failed."
@@ -136,10 +133,7 @@ function check_startup_crash {
SKIP_SEED_CORPUS=1 run_fuzzer $FUZZER_NAME -seed=1337 -runs=$MIN_NUMBER_OF_RUNS &>$FUZZER_OUTPUT
CHECK_PASSED=$(egrep "Done $MIN_NUMBER_OF_RUNS runs" -c $FUZZER_OUTPUT)
elif [[ "$FUZZING_ENGINE" = afl ]]; then
- # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use
- # AFL_DRIVER_DONT_DEFER by default, support .options files in
- # bad_build_check instead.
- AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
+ AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
if [ $(egrep "target binary (crashed|terminated)" -c $FUZZER_OUTPUT) -eq 0 ]; then
CHECK_PASSED=1
fi
diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage
index a86b00dec..3c7b274e4 100755
--- a/infra/base-images/base-runner/coverage
+++ b/infra/base-images/base-runner/coverage
@@ -19,14 +19,21 @@ cd $OUT
if (( $# > 0 )); then
FUZZ_TARGETS="$@"
else
- FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n')"
+ FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n' | \
+ grep -v -x -F \
+ -e 'llvm-symbolizer' \
+ -e 'jazzer_agent_deploy.jar' \
+ -e 'jazzer_driver' \
+ -e 'jazzer_driver_with_sanitizer')"
fi
-DUMPS_DIR="$OUT/dumps"
-FUZZER_STATS_DIR="$OUT/fuzzer_stats"
-LOGS_DIR="$OUT/logs"
-REPORT_ROOT_DIR="$OUT/report"
-REPORT_PLATFORM_DIR="$OUT/report/linux"
+COVERAGE_OUTPUT_DIR=${COVERAGE_OUTPUT_DIR:-$OUT}
+
+DUMPS_DIR="$COVERAGE_OUTPUT_DIR/dumps"
+FUZZER_STATS_DIR="$COVERAGE_OUTPUT_DIR/fuzzer_stats"
+LOGS_DIR="$COVERAGE_OUTPUT_DIR/logs"
+REPORT_ROOT_DIR="$COVERAGE_OUTPUT_DIR/report"
+REPORT_PLATFORM_DIR="$COVERAGE_OUTPUT_DIR/report/linux"
for directory in $DUMPS_DIR $FUZZER_STATS_DIR $LOGS_DIR $REPORT_ROOT_DIR \
$REPORT_PLATFORM_DIR; do
@@ -54,6 +61,8 @@ objects=""
# Number of CPUs available, this is needed for running tests in parallel.
NPROC=$(nproc)
+CORPUS_DIR=${CORPUS_DIR:-"/corpus"}
+
function run_fuzz_target {
local target=$1
@@ -62,7 +71,7 @@ function run_fuzz_target {
local profraw_file="$DUMPS_DIR/$target.%1m.profraw"
local profraw_file_mask="$DUMPS_DIR/$target.*.profraw"
local profdata_file="$DUMPS_DIR/$target.profdata"
- local corpus_real="/corpus/${target}"
+ local corpus_real="$CORPUS_DIR/${target}"
# -merge=1 requires an output directory, create a new, empty dir for that.
local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
@@ -74,7 +83,7 @@ function run_fuzz_target {
# because (A) corpuses are already minimized; (B) we do not use sancov, and so
# libFuzzer always finishes merge with an empty output dir.
# Use 100s timeout instead of 25s as code coverage builds can be very slow.
- local args="-merge=1 -timeout=100 -close_fd_mask=3 $corpus_dummy $corpus_real"
+ local args="-merge=1 -timeout=100 $corpus_dummy $corpus_real"
export LLVM_PROFILE_FILE=$profraw_file
timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
@@ -90,6 +99,9 @@ function run_fuzz_target {
return 0
fi
+ # If necessary translate to latest profraw version.
+ profraw_update.py $OUT/$target $profraw_file_mask tmp.profraw
+ mv tmp.profraw $profraw_file_mask
llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file
# Delete unnecessary and (potentially) large .profraw files.
@@ -115,7 +127,7 @@ function run_go_fuzz_target {
local target=$1
echo "Running go target $target"
- export FUZZ_CORPUS_DIR="/corpus/${target}/"
+ export FUZZ_CORPUS_DIR="$CORPUS_DIR/${target}/"
export FUZZ_PROFILE_NAME="$DUMPS_DIR/$target.perf"
$OUT/$target -test.coverprofile $DUMPS_DIR/$target.profdata &> $LOGS_DIR/$target.log
# translate from golangish paths to current absolute paths
@@ -125,6 +137,47 @@ function run_go_fuzz_target {
$SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json
}
+function run_java_fuzz_target {
+ local target=$1
+
+ local exec_file="$DUMPS_DIR/$target.exec"
+ local class_dump_dir="$DUMPS_DIR/${target}_classes/"
+ mkdir "$class_dump_dir"
+ local corpus_real="$CORPUS_DIR/${target}"
+
+ # -merge=1 requires an output directory, create a new, empty dir for that.
+ local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
+ rm -rf $corpus_dummy && mkdir -p $corpus_dummy
+
+ # Use 100s timeout instead of 25s as code coverage builds can be very slow.
+ local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*"
+ local args="-merge=1 -timeout=100 --nohooks \
+ --additional_jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \
+ $corpus_dummy $corpus_real"
+
+ timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
+ if (( $? != 0 )); then
+ echo "Error occured while running $target:"
+ cat $LOGS_DIR/$target.log
+ fi
+
+ if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then
+ # Skip fuzz targets that failed to produce .exec files.
+ return 0
+ fi
+
+ # Generate XML report only as input to jacoco_report_converter.
+ # Source files are not needed for the summary.
+ local xml_report="$DUMPS_DIR/${target}.xml"
+ local summary_file="$FUZZER_STATS_DIR/$target.json"
+ java -jar /opt/jacoco-cli.jar report $exec_file \
+ --xml $xml_report \
+ --classfiles $class_dump_dir
+
+ # Write llvm-cov summary file.
+ jacoco_report_converter.py $xml_report $summary_file
+}
+
export SYSGOPATH=$GOPATH
export GOPATH=$OUT/$GOPATH
# Run each fuzz target, generate raw coverage dumps.
@@ -136,6 +189,14 @@ for fuzz_target in $FUZZ_TARGETS; do
grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue
fi
run_go_fuzz_target $fuzz_target &
+ elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
+ # Continue if not a fuzz target.
+ if [[ $FUZZING_ENGINE != "none" ]]; then
+ grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
+ fi
+
+ echo "Running $fuzz_target"
+ run_java_fuzz_target $fuzz_target &
else
# Continue if not a fuzz target.
if [[ $FUZZING_ENGINE != "none" ]]; then
@@ -175,6 +236,43 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then
mv merged.data $REPORT_ROOT_DIR/heap.prof
#TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof
echo "Finished generating code coverage report for Go fuzz targets."
+elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
+
+ # From this point on the script does not tolerate any errors.
+ set -e
+
+ # Merge .exec files from the individual targets.
+ jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec
+ java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \
+ --destfile $jacoco_merged_exec
+
+ # Merge .class files from the individual targets.
+ classes_dir=$DUMPS_DIR/classes
+ mkdir $classes_dir
+ for fuzz_target in $FUZZ_TARGETS; do
+ cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/
+ done
+
+ # Heuristically determine source directories based on Maven structure.
+ # Always include the $SRC root as it likely contains the fuzzer sources.
+ sourcefiles_args=(--sourcefiles $OUT/$SRC)
+ source_dirs=$(find $OUT/$SRC -type d -name 'java')
+ for source_dir in $source_dirs; do
+ sourcefiles_args+=(--sourcefiles "$source_dir")
+ done
+
+ # Generate HTML and XML reports.
+ xml_report=$REPORT_PLATFORM_DIR/index.xml
+ java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \
+ --html $REPORT_PLATFORM_DIR \
+ --xml $xml_report \
+ --classfiles $classes_dir \
+ "${sourcefiles_args[@]}"
+
+ # Write llvm-cov summary file.
+ jacoco_report_converter.py $xml_report $SUMMARY_FILE
+
+ set +e
else
# From this point on the script does not tolerate any errors.
@@ -206,6 +304,10 @@ else
fi
+# Make sure report is readable.
+chmod -R +r $REPORT_ROOT_DIR
+find $REPORT_ROOT_DIR -type d -exec chmod +x {} +
+
if [[ -n $HTTP_PORT ]]; then
# Serve the report locally.
echo "Serving the report on http://127.0.0.1:$HTTP_PORT/linux/index.html"
diff --git a/infra/base-images/base-runner/coverage_helper b/infra/base-images/base-runner/coverage_helper
index 22c9cb5d6..4d29ceac8 100755
--- a/infra/base-images/base-runner/coverage_helper
+++ b/infra/base-images/base-runner/coverage_helper
@@ -14,4 +14,4 @@
# limitations under the License.
#
################################################################################
-python3 /opt/code_coverage/coverage_utils.py $@
+python3 $CODE_COVERAGE_SRC/coverage_utils.py $@
diff --git a/infra/base-images/base-runner/jacoco_report_converter.py b/infra/base-images/base-runner/jacoco_report_converter.py
new file mode 100755
index 000000000..3c36065f1
--- /dev/null
+++ b/infra/base-images/base-runner/jacoco_report_converter.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML
+report."""
+import json
+import os
+import sys
+import xml.etree.ElementTree as ET
+
+
+def convert(xml):
+ """Turns a JaCoCo XML report into an llvm-cov JSON summary."""
+ summary = {
+ "type": "oss-fuzz.java.coverage.json.export",
+ "version": "1.0.0",
+ "data": [{
+ "totals": {},
+ "files": [],
+ }],
+ }
+
+ report = ET.fromstring(xml)
+ totals = make_element_summary(report)
+ summary["data"][0]["totals"] = totals
+
+ # Since Java compilation does not track source file location, we match
+ # coverage info to source files via the full class name, e.g. we search for
+ # a path in /out/src ending in foo/bar/Baz.java for the class foo.bar.Baz.
+ # Under the assumptions that a given project only ever contains a single
+ # version of a class and that no class name appears as a suffix of another
+ # class name, we can assign coverage info to every source file matched in that
+ # way.
+ src_files = list_src_files()
+
+ for class_element in report.findall("./package/class"):
+ class_name = class_element.attrib["name"]
+ package_name = os.path.dirname(class_name)
+ if "sourcefilename" not in class_element.attrib:
+ continue
+ basename = class_element.attrib["sourcefilename"]
+ # This path is "foo/Bar.java" for the class element
+ # <class name="foo/Bar" sourcefilename="Bar.java">.
+ canonical_path = os.path.join(package_name, basename)
+
+ class_summary = make_element_summary(class_element)
+ summary["data"][0]["files"].append({
+ "filename": relative_to_src_path(src_files, canonical_path),
+ "summary": class_summary,
+ })
+
+ return json.dumps(summary)
+
+
+def list_src_files():
+ """Returns a map from basename to full path for all files in $OUT/$SRC."""
+ filename_to_paths = {}
+ out_path = os.environ["OUT"] + "/"
+ src_path = os.environ["SRC"]
+ src_in_out = out_path + src_path
+ for dirpath, _, filenames in os.walk(src_in_out):
+ for filename in filenames:
+ full_path = dirpath + "/" + filename
+ # Map /out//src/... to /src/...
+ src_path = full_path[len(out_path):]
+ filename_to_paths.setdefault(filename, []).append(src_path)
+ return filename_to_paths
+
+
+def relative_to_src_path(src_files, canonical_path):
+ """Returns all paths in src_files ending in canonical_path."""
+ basename = os.path.basename(canonical_path)
+ if basename not in src_files:
+ return []
+ candidate_paths = src_files[basename]
+ return [
+ path for path in candidate_paths if path.endswith("/" + canonical_path)
+ ]
+
+
+def make_element_summary(element):
+ """Returns a coverage summary for an element in the XML report."""
+ summary = {}
+
+ function_counter = element.find("./counter[@type='METHOD']")
+ summary["functions"] = make_counter_summary(function_counter)
+
+ line_counter = element.find("./counter[@type='LINE']")
+ summary["lines"] = make_counter_summary(line_counter)
+
+ # JaCoCo tracks branch coverage, which counts the covered control-flow edges
+ # between llvm-cov's regions instead of the covered regions themselves. For
+ # non-trivial code parts, the difference is usually negligible. However, if
+ # all methods of a class consist of a single region only (no branches),
+ # JaCoCo does not report any branch coverage even if there is instruction
+ # coverage. Since this would give incorrect results for CI Fuzz purposes, we
+ # increase the regions counter by 1 if there is any amount of instruction
+ # coverage.
+ instruction_counter = element.find("./counter[@type='INSTRUCTION']")
+ has_some_coverage = instruction_counter is not None and int(
+ instruction_counter.attrib["covered"]) > 0
+ branch_covered_adjustment = 1 if has_some_coverage else 0
+ region_counter = element.find("./counter[@type='BRANCH']")
+ summary["regions"] = make_counter_summary(
+ region_counter, covered_adjustment=branch_covered_adjustment)
+
+ return summary
+
+
+def make_counter_summary(counter_element, covered_adjustment=0):
+ """Turns a JaCoCo <counter> element into an llvm-cov totals entry."""
+ summary = {}
+ covered = covered_adjustment
+ missed = 0
+ if counter_element is not None:
+ covered += int(counter_element.attrib["covered"])
+ missed += int(counter_element.attrib["missed"])
+ summary["covered"] = covered
+ summary["notcovered"] = missed
+ summary["count"] = summary["covered"] + summary["notcovered"]
+ if summary["count"] != 0:
+ summary["percent"] = (100.0 * summary["covered"]) / summary["count"]
+ else:
+ summary["percent"] = 0
+ return summary
+
+
+def main():
+ """Produces an llvm-cov style JSON summary from a JaCoCo XML report."""
+ if len(sys.argv) != 3:
+ sys.stderr.write('Usage: %s <path_to_jacoco_xml> <out_path_json>\n' %
+ sys.argv[0])
+ return 1
+
+ with open(sys.argv[1], 'r') as xml_file:
+ xml_report = xml_file.read()
+ json_summary = convert(xml_report)
+ with open(sys.argv[2], 'w') as json_file:
+ json_file.write(json_summary)
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/infra/base-images/base-runner/profraw_update.py b/infra/base-images/base-runner/profraw_update.py
new file mode 100644
index 000000000..408b5fb93
--- /dev/null
+++ b/infra/base-images/base-runner/profraw_update.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for upgrading a profraw file to latest version."""
+
+from collections import namedtuple
+import struct
+import subprocess
+import sys
+
+HeaderGeneric = namedtuple('HeaderGeneric', 'magic version')
+HeaderVersion7 = namedtuple(
+ 'HeaderVersion7',
+ 'BinaryIdsSize DataSize PaddingBytesBeforeCounters CountersSize \
+ PaddingBytesAfterCounters NamesSize CountersDelta NamesDelta ValueKindLast')
+
+PROFRAW_MAGIC = 0xff6c70726f667281
+
+
+def relativize_address(data, offset, databegin, sect_prf_cnts, sect_prf_data):
+ """Turns an absolute offset into a relative one."""
+ value = struct.unpack('Q', data[offset:offset + 8])[0]
+ if sect_prf_cnts <= value < sect_prf_data:
+ # If the value is an address in the right section, make it relative.
+ value = (value - databegin) & 0xffffffffffffffff
+ value = struct.pack('Q', value)
+ for i in range(8):
+ data[offset + i] = value[i]
+
+
+def upgrade(data, sect_prf_cnts, sect_prf_data):
+ """Upgrades profraw data, knowing the sections addresses."""
+ generic_header = HeaderGeneric._make(struct.unpack('QQ', data[:16]))
+ if generic_header.magic != PROFRAW_MAGIC:
+ raise Exception('Bad magic.')
+ if generic_header.version == 5:
+ generic_header = generic_header._replace(version=7)
+ # Upgrade from version 5 to 7 by adding binaryids field.
+ data = struct.pack('QQ', generic_header) + struct.pack('Q', 0) + data[16:]
+ if generic_header.version < 7:
+ raise Exception('Unhandled version.')
+ v7_header = HeaderVersion7._make(struct.unpack('QQQQQQQQQ', data[16:88]))
+
+ if v7_header.BinaryIdsSize % 8 != 0:
+ # Adds padding for binary ids.
+ # cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be
+ # cf https://reviews.llvm.org/D110365
+ padlen = 8 - (v7_header.BinaryIdsSize % 8)
+ v7_header = v7_header._replace(BinaryIdsSize=v7_header.BinaryIdsSize +
+ padlen)
+ data = data[:16] + struct.pack('Q', v7_header.BinaryIdsSize) + data[24:]
+ data = data[:88 + v7_header.BinaryIdsSize] + bytes(
+ padlen) + data[88 + v7_header.BinaryIdsSize:]
+
+ if v7_header.CountersDelta != sect_prf_cnts - sect_prf_data:
+ # Rust linking seems to add an offset...
+ sect_prf_data = v7_header.CountersDelta - sect_prf_cnts + sect_prf_data
+ sect_prf_cnts = v7_header.CountersDelta
+
+ dataref = sect_prf_data
+ relativize_address(data, 64, dataref, sect_prf_cnts, sect_prf_data)
+
+ offset = 88 + v7_header.BinaryIdsSize
+ # This also works for C+Rust binaries compiled with
+ # clang-14/rust-nightly-clang-13.
+ for _ in range(v7_header.DataSize):
+ # 16 is the offset of CounterPtr in ProfrawData structure.
+ relativize_address(data, offset + 16, dataref, sect_prf_cnts, sect_prf_data)
+ # We need this because of CountersDelta -= sizeof(*SrcData);
+ # seen in __llvm_profile_merge_from_buffer.
+ dataref += 44 + 2 * (v7_header.ValueKindLast + 1)
+ # This is the size of one ProfrawData structure.
+ offset += 44 + 2 * (v7_header.ValueKindLast + 1)
+
+ return data
+
+
+def main():
+ """Helper script for upgrading a profraw file to latest version."""
+ if len(sys.argv) != 4:
+ sys.stderr.write('Usage: %s <binary> <profraw> <output>\n' % sys.argv[0])
+ return 1
+
+ # First find llvm profile sections addresses in the elf, quick and dirty.
+ process = subprocess.Popen(['readelf', '-S', sys.argv[1]],
+ stdout=subprocess.PIPE)
+ output, err = process.communicate()
+ if err:
+ print('readelf failed')
+ return 2
+ for line in iter(output.split(b'\n')):
+ if b'__llvm_prf_cnts' in line:
+ sect_prf_cnts = int(line.split()[4], 16)
+ elif b'__llvm_prf_data' in line:
+ sect_prf_data = int(line.split()[4], 16)
+
+ # Then open and read the input profraw file.
+ with open(sys.argv[2], 'rb') as input_file:
+ profraw_base = bytearray(input_file.read())
+ # Do the upgrade, returning a bytes object.
+ profraw_latest = upgrade(profraw_base, sect_prf_cnts, sect_prf_data)
+ # Write the output to the file given to the command line.
+ with open(sys.argv[3], 'wb') as output_file:
+ output_file.write(profraw_latest)
+
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/infra/base-images/base-runner/run_fuzzer b/infra/base-images/base-runner/run_fuzzer
index b9bc8d9d6..426688ea3 100755
--- a/infra/base-images/base-runner/run_fuzzer
+++ b/infra/base-images/base-runner/run_fuzzer
@@ -26,7 +26,14 @@ DEBUGGER=${DEBUGGER:-}
FUZZER=$1
shift
-CORPUS_DIR=${CORPUS_DIR:-"/tmp/${FUZZER}_corpus"}
+# This env var is set by CIFuzz. CIFuzz fills this directory with the corpus
+# from ClusterFuzz.
+CORPUS_DIR=${CORPUS_DIR:-}
+if [ -z "$CORPUS_DIR" ]
+then
+ CORPUS_DIR="/tmp/${FUZZER}_corpus"
+ rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
+fi
SANITIZER=${SANITIZER:-}
if [ -z $SANITIZER ]; then
@@ -63,14 +70,13 @@ function get_dictionary() {
fi
}
-rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
rm -rf $FUZZER_OUT && mkdir -p $FUZZER_OUT
SEED_CORPUS="${FUZZER}_seed_corpus.zip"
if [ -f $SEED_CORPUS ] && [ -z ${SKIP_SEED_CORPUS:-} ]; then
echo "Using seed corpus: $SEED_CORPUS"
- unzip -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
+ unzip -o -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
fi
OPTIONS_FILE="${FUZZER}.options"
@@ -103,19 +109,18 @@ if [[ "$FUZZING_ENGINE" = afl ]]; then
export UBSAN_OPTIONS="$UBSAN_OPTIONS:symbolize=0"
export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
export AFL_SKIP_CPUFREQ=1
- export AFL_NO_AFFINITY=1
+ export AFL_TRY_AFFINITY=1
export AFL_FAST_CAL=1
+ export AFL_CMPLOG_ONLY_NEW=1
+ export AFL_FORKSRV_INIT_TMOUT=30000
# If $OUT/afl_cmplog.txt is present this means the target was compiled for
- # CMPLOG. So we have to add the proper parameters to afl-fuzz. `-l 2` is
- # CMPLOG level 2, which will colorize larger files but not huge files and
- # not enable transform analysis unless there have been several cycles without
- # any finds.
- test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -l 2 -c $OUT/$FUZZER"
+ # CMPLOG. So we have to add the proper parameters to afl-fuzz.
+ test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -c $OUT/$FUZZER"
# If $OUT/afl++.dict we load it as a dictionary for afl-fuzz.
test -e "$OUT/afl++.dict" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -x $OUT/afl++.dict"
- # Ensure timeout is a bit large than 1sec as some of the OSS-Fuzz fuzzers
- # are slower than this.
- AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -t 5000+"
+ # Ensure timeout is a bit larger than 1sec as some of the OSS-Fuzz fuzzers
+ # are slower than this.
+ AFL_FUZZER_ARGS="$FUZZER_ARGS $AFL_FUZZER_ARGS -t 5000+"
# AFL expects at least 1 file in the input dir.
echo input > ${CORPUS_DIR}/input
echo afl++ setup:
@@ -135,7 +140,7 @@ elif [[ "$FUZZING_ENGINE" = honggfuzz ]]; then
# -P: use persistent mode of fuzzing (i.e. LLVMFuzzerTestOneInput)
# -f: location of the initial (and destination) file corpus
# -n: number of fuzzing threads (and processes)
- CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $* -- \"$OUT/$FUZZER\""
+ CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $FUZZER_ARGS $* -- \"$OUT/$FUZZER\""
else
diff --git a/infra/base-images/base-runner/targets_list b/infra/base-images/base-runner/targets_list
index d35534258..95615c811 100755
--- a/infra/base-images/base-runner/targets_list
+++ b/infra/base-images/base-runner/targets_list
@@ -2,7 +2,8 @@
for binary in $(find $OUT/ -executable -type f); do
[[ "$binary" != *.so ]] || continue
- file "$binary" | grep ELF > /dev/null 2>&1 || continue
+ [[ $(basename "$binary") != jazzer_driver* ]] || continue
+ file "$binary" | grep -e ELF -e "shell script" > /dev/null 2>&1 || continue
grep "LLVMFuzzerTestOneInput" "$binary" > /dev/null 2>&1 || continue
basename "$binary"
diff --git a/infra/base-images/base-runner/test_all.py b/infra/base-images/base-runner/test_all.py
index 925ebde69..16dfcbfa9 100755
--- a/infra/base-images/base-runner/test_all.py
+++ b/infra/base-images/base-runner/test_all.py
@@ -20,12 +20,12 @@ import contextlib
import multiprocessing
import os
import re
-import shutil
import subprocess
import stat
import sys
+import tempfile
-TMP_FUZZER_DIR = '/tmp/not-out'
+BASE_TMP_FUZZER_DIR = '/tmp/not-out'
EXECUTABLE = stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
@@ -37,14 +37,6 @@ IGNORED_TARGETS = [
IGNORED_TARGETS_RE = re.compile('^' + r'$|^'.join(IGNORED_TARGETS) + '$')
-def recreate_directory(directory):
- """Creates |directory|. If it already exists than deletes it first before
- creating."""
- if os.path.exists(directory):
- shutil.rmtree(directory)
- os.mkdir(directory)
-
-
def move_directory_contents(src_directory, dst_directory):
"""Moves contents of |src_directory| to |dst_directory|."""
# Use mv because mv preserves file permissions. If we don't preserve file
@@ -67,7 +59,15 @@ def is_elf(filepath):
return b'ELF' in result.stdout
-def find_fuzz_targets(directory, fuzzing_language):
+def is_shell_script(filepath):
+ """Returns True if |filepath| is a shell script."""
+ result = subprocess.run(['file', filepath],
+ stdout=subprocess.PIPE,
+ check=False)
+ return b'shell script' in result.stdout
+
+
+def find_fuzz_targets(directory):
"""Returns paths to fuzz targets in |directory|."""
# TODO(https://github.com/google/oss-fuzz/issues/4585): Use libClusterFuzz for
# this.
@@ -84,10 +84,10 @@ def find_fuzz_targets(directory, fuzzing_language):
continue
if not os.stat(path).st_mode & EXECUTABLE:
continue
- # Fuzz targets are expected to be ELF binaries for languages other than
- # Python and Java.
- if (fuzzing_language != 'python' and fuzzing_language != 'jvm' and
- not is_elf(path)):
+ # Fuzz targets can either be ELF binaries or shell scripts (e.g. wrapper
+ # scripts for Python and JVM targets or rules_fuzzing builds with runfiles
+ # trees).
+ if not is_elf(path) and not is_shell_script(path):
continue
if os.getenv('FUZZING_ENGINE') != 'none':
with open(path, 'rb') as file_handle:
@@ -132,51 +132,66 @@ def has_ignored_targets(out_dir):
@contextlib.contextmanager
def use_different_out_dir():
- """Context manager that moves OUT to TMP_FUZZER_DIR. This is useful for
- catching hardcoding. Note that this sets the environment variable OUT and
- therefore must be run before multiprocessing.Pool is created. Resets OUT at
- the end."""
+ """Context manager that moves OUT to subdirectory of BASE_TMP_FUZZER_DIR. This
+ is useful for catching hardcoding. Note that this sets the environment
+ variable OUT and therefore must be run before multiprocessing.Pool is created.
+ Resets OUT at the end."""
# Use a fake OUT directory to catch path hardcoding that breaks on
# ClusterFuzz.
- out = os.getenv('OUT')
- initial_out = out
- recreate_directory(TMP_FUZZER_DIR)
- out = TMP_FUZZER_DIR
- # Set this so that run_fuzzer which is called by bad_build_check works
- # properly.
- os.environ['OUT'] = out
- # We move the contents of the directory because we can't move the
- # directory itself because it is a mount.
- move_directory_contents(initial_out, out)
- try:
- yield out
- finally:
- move_directory_contents(out, initial_out)
- shutil.rmtree(out)
- os.environ['OUT'] = initial_out
-
-
-def test_all_outside_out(fuzzing_language, allowed_broken_targets_percentage):
+ initial_out = os.getenv('OUT')
+ os.makedirs(BASE_TMP_FUZZER_DIR, exist_ok=True)
+ # Use a random subdirectory of BASE_TMP_FUZZER_DIR to allow running multiple
+ # instances of test_all in parallel (useful for integration testing).
+ with tempfile.TemporaryDirectory(dir=BASE_TMP_FUZZER_DIR) as out:
+ # Set this so that run_fuzzer which is called by bad_build_check works
+ # properly.
+ os.environ['OUT'] = out
+ # We move the contents of the directory because we can't move the
+ # directory itself because it is a mount.
+ move_directory_contents(initial_out, out)
+ try:
+ yield out
+ finally:
+ move_directory_contents(out, initial_out)
+ os.environ['OUT'] = initial_out
+
+
+def test_all_outside_out(allowed_broken_targets_percentage):
"""Wrapper around test_all that changes OUT and returns the result."""
with use_different_out_dir() as out:
- return test_all(out, fuzzing_language, allowed_broken_targets_percentage)
+ return test_all(out, allowed_broken_targets_percentage)
-def test_all(out, fuzzing_language, allowed_broken_targets_percentage):
+def test_all(out, allowed_broken_targets_percentage):
"""Do bad_build_check on all fuzz targets."""
# TODO(metzman): Refactor so that we can convert test_one to python.
- fuzz_targets = find_fuzz_targets(out, fuzzing_language)
+ fuzz_targets = find_fuzz_targets(out)
if not fuzz_targets:
print('ERROR: No fuzz targets found.')
return False
pool = multiprocessing.Pool()
bad_build_results = pool.map(do_bad_build_check, fuzz_targets)
+ pool.close()
+ pool.join()
broken_targets = get_broken_fuzz_targets(bad_build_results, fuzz_targets)
broken_targets_count = len(broken_targets)
if not broken_targets_count:
return True
+ print('Retrying failed fuzz targets sequentially', broken_targets_count)
+ pool = multiprocessing.Pool(1)
+ retry_targets = []
+ for broken_target, result in broken_targets:
+ retry_targets.append(broken_target)
+ bad_build_results = pool.map(do_bad_build_check, retry_targets)
+ pool.close()
+ pool.join()
+ broken_targets = get_broken_fuzz_targets(bad_build_results, broken_targets)
+ broken_targets_count = len(broken_targets)
+ if not broken_targets_count:
+ return True
+
print('Broken fuzz targets', broken_targets_count)
total_targets_count = len(fuzz_targets)
broken_targets_percentage = 100 * broken_targets_count / total_targets_count
@@ -211,11 +226,8 @@ def get_allowed_broken_targets_percentage():
def main():
"""Does bad_build_check on all fuzz targets in parallel. Returns 0 on success.
Returns 1 on failure."""
- # Set these environment variables here so that stdout
- fuzzing_language = os.getenv('FUZZING_LANGUAGE')
allowed_broken_targets_percentage = get_allowed_broken_targets_percentage()
- if not test_all_outside_out(fuzzing_language,
- allowed_broken_targets_percentage):
+ if not test_all_outside_out(allowed_broken_targets_percentage):
return 1
return 0
diff --git a/infra/base-images/base-runner/test_all_test.py b/infra/base-images/base-runner/test_all_test.py
index 3771ec231..b3077ec1e 100644
--- a/infra/base-images/base-runner/test_all_test.py
+++ b/infra/base-images/base-runner/test_all_test.py
@@ -25,15 +25,13 @@ class TestTestAll(unittest.TestCase):
@mock.patch('test_all.find_fuzz_targets', return_value=[])
@mock.patch('builtins.print')
- def test_test_all_no_fuzz_targets(self, mocked_print, _):
+ def test_test_all_no_fuzz_targets(self, mock_print, _):
"""Tests that test_all returns False when there are no fuzz targets."""
outdir = '/out'
- fuzzing_language = 'c++'
allowed_broken_targets_percentage = 0
self.assertFalse(
- test_all.test_all(outdir, fuzzing_language,
- allowed_broken_targets_percentage))
- mocked_print.assert_called_with('ERROR: No fuzz targets found.')
+ test_all.test_all(outdir, allowed_broken_targets_percentage))
+ mock_print.assert_called_with('ERROR: No fuzz targets found.')
if __name__ == '__main__':