diff options
Diffstat (limited to 'infra/base-images/base-runner')
-rwxr-xr-x | infra/base-images/base-runner/Dockerfile | 18 | ||||
-rwxr-xr-x | infra/base-images/base-runner/bad_build_check | 12 | ||||
-rwxr-xr-x | infra/base-images/base-runner/coverage | 120 | ||||
-rwxr-xr-x | infra/base-images/base-runner/coverage_helper | 2 | ||||
-rwxr-xr-x | infra/base-images/base-runner/jacoco_report_converter.py | 158 | ||||
-rw-r--r-- | infra/base-images/base-runner/profraw_update.py | 123 | ||||
-rwxr-xr-x | infra/base-images/base-runner/run_fuzzer | 31 | ||||
-rwxr-xr-x | infra/base-images/base-runner/targets_list | 3 | ||||
-rwxr-xr-x | infra/base-images/base-runner/test_all.py | 102 | ||||
-rw-r--r-- | infra/base-images/base-runner/test_all_test.py | 8 |
10 files changed, 492 insertions, 85 deletions
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index f847de026..fadd00acc 100755 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -45,12 +45,18 @@ RUN apt-get update && apt-get install -y \ libcap2 \ python3 \ python3-pip \ + python3-setuptools \ unzip \ wget \ zip --no-install-recommends -RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage /opt/code_coverage && \ - pip3 install -r /opt/code_coverage/requirements.txt +ENV CODE_COVERAGE_SRC=/opt/code_coverage +RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage $CODE_COVERAGE_SRC && \ + cd /opt/code_coverage && \ + git checkout edba4873b5e8a390e977a64c522db2df18a8b27d && \ + pip3 install wheel && \ + pip3 install -r requirements.txt && \ + pip3 install MarkupSafe==0.23 # Default environment options for various sanitizers. # Note that these match the settings used in ClusterFuzz and @@ -91,6 +97,12 @@ RUN wget https://download.java.net/java/GA/jdk15.0.2/0d1cfde4252546c6931946de8db rm -f openjdk-15.0.2_linux-x64_bin.tar.gz && \ rm -rf $JAVA_HOME/jmods $JAVA_HOME/lib/src.zip +# Install JaCoCo for JVM coverage. +RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jacoco.cli-0.8.7-nodeps.jar -O /opt/jacoco-cli.jar && \ + wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.agent/0.8.7/org.jacoco.agent-0.8.7-runtime.jar -O /opt/jacoco-agent.jar && \ + echo "37df187b76888101ecd745282e9cd1ad4ea508d6 /opt/jacoco-agent.jar" | shasum --check && \ + echo "c1814e7bba5fd8786224b09b43c84fd6156db690 /opt/jacoco-cli.jar" | shasum --check + # Do this last to make developing these files easier/faster due to caching. COPY bad_build_check \ collect_dft \ @@ -98,10 +110,12 @@ COPY bad_build_check \ coverage_helper \ dataflow_tracer.py \ download_corpus \ + jacoco_report_converter.py \ rcfilt \ reproduce \ run_fuzzer \ parse_options.py \ + profraw_update.py \ targets_list \ test_all.py \ test_one.py \ diff --git a/infra/base-images/base-runner/bad_build_check b/infra/base-images/base-runner/bad_build_check index 01f8fbbab..bb328c793 100755 --- a/infra/base-images/base-runner/bad_build_check +++ b/infra/base-images/base-runner/bad_build_check @@ -39,7 +39,7 @@ DFSAN_CALLS_THRESHOLD_FOR_NON_DFSAN_BUILD=0 MSAN_CALLS_THRESHOLD_FOR_MSAN_BUILD=1000 # Some engines (e.g. honggfuzz) may make a very small number of calls to msan # for memory poisoning. -MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=2 +MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=3 # Usually, a non UBSan build (e.g. ASan) has 165 calls to UBSan runtime. The # majority of targets built with UBSan have 200+ UBSan calls, but there are @@ -90,10 +90,7 @@ function check_engine { echo "BAD BUILD: $FUZZER seems to have only partial coverage instrumentation." fi elif [[ "$FUZZING_ENGINE" == afl ]]; then - # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use - # AFL_DRIVER_DONT_DEFER by default, support .options files in - # bad_build_check instead. - AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT + AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT CHECK_PASSED=$(egrep "All set and ready to roll" -c $FUZZER_OUTPUT) if (( $CHECK_PASSED == 0 )); then echo "BAD BUILD: fuzzing $FUZZER with afl-fuzz failed." @@ -136,10 +133,7 @@ function check_startup_crash { SKIP_SEED_CORPUS=1 run_fuzzer $FUZZER_NAME -seed=1337 -runs=$MIN_NUMBER_OF_RUNS &>$FUZZER_OUTPUT CHECK_PASSED=$(egrep "Done $MIN_NUMBER_OF_RUNS runs" -c $FUZZER_OUTPUT) elif [[ "$FUZZING_ENGINE" = afl ]]; then - # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use - # AFL_DRIVER_DONT_DEFER by default, support .options files in - # bad_build_check instead. - AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT + AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT if [ $(egrep "target binary (crashed|terminated)" -c $FUZZER_OUTPUT) -eq 0 ]; then CHECK_PASSED=1 fi diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage index a86b00dec..3c7b274e4 100755 --- a/infra/base-images/base-runner/coverage +++ b/infra/base-images/base-runner/coverage @@ -19,14 +19,21 @@ cd $OUT if (( $# > 0 )); then FUZZ_TARGETS="$@" else - FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n')" + FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n' | \ + grep -v -x -F \ + -e 'llvm-symbolizer' \ + -e 'jazzer_agent_deploy.jar' \ + -e 'jazzer_driver' \ + -e 'jazzer_driver_with_sanitizer')" fi -DUMPS_DIR="$OUT/dumps" -FUZZER_STATS_DIR="$OUT/fuzzer_stats" -LOGS_DIR="$OUT/logs" -REPORT_ROOT_DIR="$OUT/report" -REPORT_PLATFORM_DIR="$OUT/report/linux" +COVERAGE_OUTPUT_DIR=${COVERAGE_OUTPUT_DIR:-$OUT} + +DUMPS_DIR="$COVERAGE_OUTPUT_DIR/dumps" +FUZZER_STATS_DIR="$COVERAGE_OUTPUT_DIR/fuzzer_stats" +LOGS_DIR="$COVERAGE_OUTPUT_DIR/logs" +REPORT_ROOT_DIR="$COVERAGE_OUTPUT_DIR/report" +REPORT_PLATFORM_DIR="$COVERAGE_OUTPUT_DIR/report/linux" for directory in $DUMPS_DIR $FUZZER_STATS_DIR $LOGS_DIR $REPORT_ROOT_DIR \ $REPORT_PLATFORM_DIR; do @@ -54,6 +61,8 @@ objects="" # Number of CPUs available, this is needed for running tests in parallel. NPROC=$(nproc) +CORPUS_DIR=${CORPUS_DIR:-"/corpus"} + function run_fuzz_target { local target=$1 @@ -62,7 +71,7 @@ function run_fuzz_target { local profraw_file="$DUMPS_DIR/$target.%1m.profraw" local profraw_file_mask="$DUMPS_DIR/$target.*.profraw" local profdata_file="$DUMPS_DIR/$target.profdata" - local corpus_real="/corpus/${target}" + local corpus_real="$CORPUS_DIR/${target}" # -merge=1 requires an output directory, create a new, empty dir for that. local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}" @@ -74,7 +83,7 @@ function run_fuzz_target { # because (A) corpuses are already minimized; (B) we do not use sancov, and so # libFuzzer always finishes merge with an empty output dir. # Use 100s timeout instead of 25s as code coverage builds can be very slow. - local args="-merge=1 -timeout=100 -close_fd_mask=3 $corpus_dummy $corpus_real" + local args="-merge=1 -timeout=100 $corpus_dummy $corpus_real" export LLVM_PROFILE_FILE=$profraw_file timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log @@ -90,6 +99,9 @@ function run_fuzz_target { return 0 fi + # If necessary translate to latest profraw version. + profraw_update.py $OUT/$target $profraw_file_mask tmp.profraw + mv tmp.profraw $profraw_file_mask llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file # Delete unnecessary and (potentially) large .profraw files. @@ -115,7 +127,7 @@ function run_go_fuzz_target { local target=$1 echo "Running go target $target" - export FUZZ_CORPUS_DIR="/corpus/${target}/" + export FUZZ_CORPUS_DIR="$CORPUS_DIR/${target}/" export FUZZ_PROFILE_NAME="$DUMPS_DIR/$target.perf" $OUT/$target -test.coverprofile $DUMPS_DIR/$target.profdata &> $LOGS_DIR/$target.log # translate from golangish paths to current absolute paths @@ -125,6 +137,47 @@ function run_go_fuzz_target { $SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json } +function run_java_fuzz_target { + local target=$1 + + local exec_file="$DUMPS_DIR/$target.exec" + local class_dump_dir="$DUMPS_DIR/${target}_classes/" + mkdir "$class_dump_dir" + local corpus_real="$CORPUS_DIR/${target}" + + # -merge=1 requires an output directory, create a new, empty dir for that. + local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}" + rm -rf $corpus_dummy && mkdir -p $corpus_dummy + + # Use 100s timeout instead of 25s as code coverage builds can be very slow. + local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*" + local args="-merge=1 -timeout=100 --nohooks \ + --additional_jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \ + $corpus_dummy $corpus_real" + + timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log + if (( $? != 0 )); then + echo "Error occured while running $target:" + cat $LOGS_DIR/$target.log + fi + + if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then + # Skip fuzz targets that failed to produce .exec files. + return 0 + fi + + # Generate XML report only as input to jacoco_report_converter. + # Source files are not needed for the summary. + local xml_report="$DUMPS_DIR/${target}.xml" + local summary_file="$FUZZER_STATS_DIR/$target.json" + java -jar /opt/jacoco-cli.jar report $exec_file \ + --xml $xml_report \ + --classfiles $class_dump_dir + + # Write llvm-cov summary file. + jacoco_report_converter.py $xml_report $summary_file +} + export SYSGOPATH=$GOPATH export GOPATH=$OUT/$GOPATH # Run each fuzz target, generate raw coverage dumps. @@ -136,6 +189,14 @@ for fuzz_target in $FUZZ_TARGETS; do grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue fi run_go_fuzz_target $fuzz_target & + elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then + # Continue if not a fuzz target. + if [[ $FUZZING_ENGINE != "none" ]]; then + grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue + fi + + echo "Running $fuzz_target" + run_java_fuzz_target $fuzz_target & else # Continue if not a fuzz target. if [[ $FUZZING_ENGINE != "none" ]]; then @@ -175,6 +236,43 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then mv merged.data $REPORT_ROOT_DIR/heap.prof #TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof echo "Finished generating code coverage report for Go fuzz targets." +elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then + + # From this point on the script does not tolerate any errors. + set -e + + # Merge .exec files from the individual targets. + jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec + java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \ + --destfile $jacoco_merged_exec + + # Merge .class files from the individual targets. + classes_dir=$DUMPS_DIR/classes + mkdir $classes_dir + for fuzz_target in $FUZZ_TARGETS; do + cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/ + done + + # Heuristically determine source directories based on Maven structure. + # Always include the $SRC root as it likely contains the fuzzer sources. + sourcefiles_args=(--sourcefiles $OUT/$SRC) + source_dirs=$(find $OUT/$SRC -type d -name 'java') + for source_dir in $source_dirs; do + sourcefiles_args+=(--sourcefiles "$source_dir") + done + + # Generate HTML and XML reports. + xml_report=$REPORT_PLATFORM_DIR/index.xml + java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \ + --html $REPORT_PLATFORM_DIR \ + --xml $xml_report \ + --classfiles $classes_dir \ + "${sourcefiles_args[@]}" + + # Write llvm-cov summary file. + jacoco_report_converter.py $xml_report $SUMMARY_FILE + + set +e else # From this point on the script does not tolerate any errors. @@ -206,6 +304,10 @@ else fi +# Make sure report is readable. +chmod -R +r $REPORT_ROOT_DIR +find $REPORT_ROOT_DIR -type d -exec chmod +x {} + + if [[ -n $HTTP_PORT ]]; then # Serve the report locally. echo "Serving the report on http://127.0.0.1:$HTTP_PORT/linux/index.html" diff --git a/infra/base-images/base-runner/coverage_helper b/infra/base-images/base-runner/coverage_helper index 22c9cb5d6..4d29ceac8 100755 --- a/infra/base-images/base-runner/coverage_helper +++ b/infra/base-images/base-runner/coverage_helper @@ -14,4 +14,4 @@ # limitations under the License. # ################################################################################ -python3 /opt/code_coverage/coverage_utils.py $@ +python3 $CODE_COVERAGE_SRC/coverage_utils.py $@ diff --git a/infra/base-images/base-runner/jacoco_report_converter.py b/infra/base-images/base-runner/jacoco_report_converter.py new file mode 100755 index 000000000..3c36065f1 --- /dev/null +++ b/infra/base-images/base-runner/jacoco_report_converter.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python3 +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML +report.""" +import json +import os +import sys +import xml.etree.ElementTree as ET + + +def convert(xml): + """Turns a JaCoCo XML report into an llvm-cov JSON summary.""" + summary = { + "type": "oss-fuzz.java.coverage.json.export", + "version": "1.0.0", + "data": [{ + "totals": {}, + "files": [], + }], + } + + report = ET.fromstring(xml) + totals = make_element_summary(report) + summary["data"][0]["totals"] = totals + + # Since Java compilation does not track source file location, we match + # coverage info to source files via the full class name, e.g. we search for + # a path in /out/src ending in foo/bar/Baz.java for the class foo.bar.Baz. + # Under the assumptions that a given project only ever contains a single + # version of a class and that no class name appears as a suffix of another + # class name, we can assign coverage info to every source file matched in that + # way. + src_files = list_src_files() + + for class_element in report.findall("./package/class"): + class_name = class_element.attrib["name"] + package_name = os.path.dirname(class_name) + if "sourcefilename" not in class_element.attrib: + continue + basename = class_element.attrib["sourcefilename"] + # This path is "foo/Bar.java" for the class element + # <class name="foo/Bar" sourcefilename="Bar.java">. + canonical_path = os.path.join(package_name, basename) + + class_summary = make_element_summary(class_element) + summary["data"][0]["files"].append({ + "filename": relative_to_src_path(src_files, canonical_path), + "summary": class_summary, + }) + + return json.dumps(summary) + + +def list_src_files(): + """Returns a map from basename to full path for all files in $OUT/$SRC.""" + filename_to_paths = {} + out_path = os.environ["OUT"] + "/" + src_path = os.environ["SRC"] + src_in_out = out_path + src_path + for dirpath, _, filenames in os.walk(src_in_out): + for filename in filenames: + full_path = dirpath + "/" + filename + # Map /out//src/... to /src/... + src_path = full_path[len(out_path):] + filename_to_paths.setdefault(filename, []).append(src_path) + return filename_to_paths + + +def relative_to_src_path(src_files, canonical_path): + """Returns all paths in src_files ending in canonical_path.""" + basename = os.path.basename(canonical_path) + if basename not in src_files: + return [] + candidate_paths = src_files[basename] + return [ + path for path in candidate_paths if path.endswith("/" + canonical_path) + ] + + +def make_element_summary(element): + """Returns a coverage summary for an element in the XML report.""" + summary = {} + + function_counter = element.find("./counter[@type='METHOD']") + summary["functions"] = make_counter_summary(function_counter) + + line_counter = element.find("./counter[@type='LINE']") + summary["lines"] = make_counter_summary(line_counter) + + # JaCoCo tracks branch coverage, which counts the covered control-flow edges + # between llvm-cov's regions instead of the covered regions themselves. For + # non-trivial code parts, the difference is usually negligible. However, if + # all methods of a class consist of a single region only (no branches), + # JaCoCo does not report any branch coverage even if there is instruction + # coverage. Since this would give incorrect results for CI Fuzz purposes, we + # increase the regions counter by 1 if there is any amount of instruction + # coverage. + instruction_counter = element.find("./counter[@type='INSTRUCTION']") + has_some_coverage = instruction_counter is not None and int( + instruction_counter.attrib["covered"]) > 0 + branch_covered_adjustment = 1 if has_some_coverage else 0 + region_counter = element.find("./counter[@type='BRANCH']") + summary["regions"] = make_counter_summary( + region_counter, covered_adjustment=branch_covered_adjustment) + + return summary + + +def make_counter_summary(counter_element, covered_adjustment=0): + """Turns a JaCoCo <counter> element into an llvm-cov totals entry.""" + summary = {} + covered = covered_adjustment + missed = 0 + if counter_element is not None: + covered += int(counter_element.attrib["covered"]) + missed += int(counter_element.attrib["missed"]) + summary["covered"] = covered + summary["notcovered"] = missed + summary["count"] = summary["covered"] + summary["notcovered"] + if summary["count"] != 0: + summary["percent"] = (100.0 * summary["covered"]) / summary["count"] + else: + summary["percent"] = 0 + return summary + + +def main(): + """Produces an llvm-cov style JSON summary from a JaCoCo XML report.""" + if len(sys.argv) != 3: + sys.stderr.write('Usage: %s <path_to_jacoco_xml> <out_path_json>\n' % + sys.argv[0]) + return 1 + + with open(sys.argv[1], 'r') as xml_file: + xml_report = xml_file.read() + json_summary = convert(xml_report) + with open(sys.argv[2], 'w') as json_file: + json_file.write(json_summary) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/infra/base-images/base-runner/profraw_update.py b/infra/base-images/base-runner/profraw_update.py new file mode 100644 index 000000000..408b5fb93 --- /dev/null +++ b/infra/base-images/base-runner/profraw_update.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Helper script for upgrading a profraw file to latest version.""" + +from collections import namedtuple +import struct +import subprocess +import sys + +HeaderGeneric = namedtuple('HeaderGeneric', 'magic version') +HeaderVersion7 = namedtuple( + 'HeaderVersion7', + 'BinaryIdsSize DataSize PaddingBytesBeforeCounters CountersSize \ + PaddingBytesAfterCounters NamesSize CountersDelta NamesDelta ValueKindLast') + +PROFRAW_MAGIC = 0xff6c70726f667281 + + +def relativize_address(data, offset, databegin, sect_prf_cnts, sect_prf_data): + """Turns an absolute offset into a relative one.""" + value = struct.unpack('Q', data[offset:offset + 8])[0] + if sect_prf_cnts <= value < sect_prf_data: + # If the value is an address in the right section, make it relative. + value = (value - databegin) & 0xffffffffffffffff + value = struct.pack('Q', value) + for i in range(8): + data[offset + i] = value[i] + + +def upgrade(data, sect_prf_cnts, sect_prf_data): + """Upgrades profraw data, knowing the sections addresses.""" + generic_header = HeaderGeneric._make(struct.unpack('QQ', data[:16])) + if generic_header.magic != PROFRAW_MAGIC: + raise Exception('Bad magic.') + if generic_header.version == 5: + generic_header = generic_header._replace(version=7) + # Upgrade from version 5 to 7 by adding binaryids field. + data = struct.pack('QQ', generic_header) + struct.pack('Q', 0) + data[16:] + if generic_header.version < 7: + raise Exception('Unhandled version.') + v7_header = HeaderVersion7._make(struct.unpack('QQQQQQQQQ', data[16:88])) + + if v7_header.BinaryIdsSize % 8 != 0: + # Adds padding for binary ids. + # cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be + # cf https://reviews.llvm.org/D110365 + padlen = 8 - (v7_header.BinaryIdsSize % 8) + v7_header = v7_header._replace(BinaryIdsSize=v7_header.BinaryIdsSize + + padlen) + data = data[:16] + struct.pack('Q', v7_header.BinaryIdsSize) + data[24:] + data = data[:88 + v7_header.BinaryIdsSize] + bytes( + padlen) + data[88 + v7_header.BinaryIdsSize:] + + if v7_header.CountersDelta != sect_prf_cnts - sect_prf_data: + # Rust linking seems to add an offset... + sect_prf_data = v7_header.CountersDelta - sect_prf_cnts + sect_prf_data + sect_prf_cnts = v7_header.CountersDelta + + dataref = sect_prf_data + relativize_address(data, 64, dataref, sect_prf_cnts, sect_prf_data) + + offset = 88 + v7_header.BinaryIdsSize + # This also works for C+Rust binaries compiled with + # clang-14/rust-nightly-clang-13. + for _ in range(v7_header.DataSize): + # 16 is the offset of CounterPtr in ProfrawData structure. + relativize_address(data, offset + 16, dataref, sect_prf_cnts, sect_prf_data) + # We need this because of CountersDelta -= sizeof(*SrcData); + # seen in __llvm_profile_merge_from_buffer. + dataref += 44 + 2 * (v7_header.ValueKindLast + 1) + # This is the size of one ProfrawData structure. + offset += 44 + 2 * (v7_header.ValueKindLast + 1) + + return data + + +def main(): + """Helper script for upgrading a profraw file to latest version.""" + if len(sys.argv) != 4: + sys.stderr.write('Usage: %s <binary> <profraw> <output>\n' % sys.argv[0]) + return 1 + + # First find llvm profile sections addresses in the elf, quick and dirty. + process = subprocess.Popen(['readelf', '-S', sys.argv[1]], + stdout=subprocess.PIPE) + output, err = process.communicate() + if err: + print('readelf failed') + return 2 + for line in iter(output.split(b'\n')): + if b'__llvm_prf_cnts' in line: + sect_prf_cnts = int(line.split()[4], 16) + elif b'__llvm_prf_data' in line: + sect_prf_data = int(line.split()[4], 16) + + # Then open and read the input profraw file. + with open(sys.argv[2], 'rb') as input_file: + profraw_base = bytearray(input_file.read()) + # Do the upgrade, returning a bytes object. + profraw_latest = upgrade(profraw_base, sect_prf_cnts, sect_prf_data) + # Write the output to the file given to the command line. + with open(sys.argv[3], 'wb') as output_file: + output_file.write(profraw_latest) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/infra/base-images/base-runner/run_fuzzer b/infra/base-images/base-runner/run_fuzzer index b9bc8d9d6..426688ea3 100755 --- a/infra/base-images/base-runner/run_fuzzer +++ b/infra/base-images/base-runner/run_fuzzer @@ -26,7 +26,14 @@ DEBUGGER=${DEBUGGER:-} FUZZER=$1 shift -CORPUS_DIR=${CORPUS_DIR:-"/tmp/${FUZZER}_corpus"} +# This env var is set by CIFuzz. CIFuzz fills this directory with the corpus +# from ClusterFuzz. +CORPUS_DIR=${CORPUS_DIR:-} +if [ -z "$CORPUS_DIR" ] +then + CORPUS_DIR="/tmp/${FUZZER}_corpus" + rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR +fi SANITIZER=${SANITIZER:-} if [ -z $SANITIZER ]; then @@ -63,14 +70,13 @@ function get_dictionary() { fi } -rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR rm -rf $FUZZER_OUT && mkdir -p $FUZZER_OUT SEED_CORPUS="${FUZZER}_seed_corpus.zip" if [ -f $SEED_CORPUS ] && [ -z ${SKIP_SEED_CORPUS:-} ]; then echo "Using seed corpus: $SEED_CORPUS" - unzip -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null + unzip -o -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null fi OPTIONS_FILE="${FUZZER}.options" @@ -103,19 +109,18 @@ if [[ "$FUZZING_ENGINE" = afl ]]; then export UBSAN_OPTIONS="$UBSAN_OPTIONS:symbolize=0" export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 export AFL_SKIP_CPUFREQ=1 - export AFL_NO_AFFINITY=1 + export AFL_TRY_AFFINITY=1 export AFL_FAST_CAL=1 + export AFL_CMPLOG_ONLY_NEW=1 + export AFL_FORKSRV_INIT_TMOUT=30000 # If $OUT/afl_cmplog.txt is present this means the target was compiled for - # CMPLOG. So we have to add the proper parameters to afl-fuzz. `-l 2` is - # CMPLOG level 2, which will colorize larger files but not huge files and - # not enable transform analysis unless there have been several cycles without - # any finds. - test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -l 2 -c $OUT/$FUZZER" + # CMPLOG. So we have to add the proper parameters to afl-fuzz. + test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -c $OUT/$FUZZER" # If $OUT/afl++.dict we load it as a dictionary for afl-fuzz. test -e "$OUT/afl++.dict" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -x $OUT/afl++.dict" - # Ensure timeout is a bit large than 1sec as some of the OSS-Fuzz fuzzers - # are slower than this. - AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -t 5000+" + # Ensure timeout is a bit larger than 1sec as some of the OSS-Fuzz fuzzers + # are slower than this. + AFL_FUZZER_ARGS="$FUZZER_ARGS $AFL_FUZZER_ARGS -t 5000+" # AFL expects at least 1 file in the input dir. echo input > ${CORPUS_DIR}/input echo afl++ setup: @@ -135,7 +140,7 @@ elif [[ "$FUZZING_ENGINE" = honggfuzz ]]; then # -P: use persistent mode of fuzzing (i.e. LLVMFuzzerTestOneInput) # -f: location of the initial (and destination) file corpus # -n: number of fuzzing threads (and processes) - CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $* -- \"$OUT/$FUZZER\"" + CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $FUZZER_ARGS $* -- \"$OUT/$FUZZER\"" else diff --git a/infra/base-images/base-runner/targets_list b/infra/base-images/base-runner/targets_list index d35534258..95615c811 100755 --- a/infra/base-images/base-runner/targets_list +++ b/infra/base-images/base-runner/targets_list @@ -2,7 +2,8 @@ for binary in $(find $OUT/ -executable -type f); do [[ "$binary" != *.so ]] || continue - file "$binary" | grep ELF > /dev/null 2>&1 || continue + [[ $(basename "$binary") != jazzer_driver* ]] || continue + file "$binary" | grep -e ELF -e "shell script" > /dev/null 2>&1 || continue grep "LLVMFuzzerTestOneInput" "$binary" > /dev/null 2>&1 || continue basename "$binary" diff --git a/infra/base-images/base-runner/test_all.py b/infra/base-images/base-runner/test_all.py index 925ebde69..16dfcbfa9 100755 --- a/infra/base-images/base-runner/test_all.py +++ b/infra/base-images/base-runner/test_all.py @@ -20,12 +20,12 @@ import contextlib import multiprocessing import os import re -import shutil import subprocess import stat import sys +import tempfile -TMP_FUZZER_DIR = '/tmp/not-out' +BASE_TMP_FUZZER_DIR = '/tmp/not-out' EXECUTABLE = stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH @@ -37,14 +37,6 @@ IGNORED_TARGETS = [ IGNORED_TARGETS_RE = re.compile('^' + r'$|^'.join(IGNORED_TARGETS) + '$') -def recreate_directory(directory): - """Creates |directory|. If it already exists than deletes it first before - creating.""" - if os.path.exists(directory): - shutil.rmtree(directory) - os.mkdir(directory) - - def move_directory_contents(src_directory, dst_directory): """Moves contents of |src_directory| to |dst_directory|.""" # Use mv because mv preserves file permissions. If we don't preserve file @@ -67,7 +59,15 @@ def is_elf(filepath): return b'ELF' in result.stdout -def find_fuzz_targets(directory, fuzzing_language): +def is_shell_script(filepath): + """Returns True if |filepath| is a shell script.""" + result = subprocess.run(['file', filepath], + stdout=subprocess.PIPE, + check=False) + return b'shell script' in result.stdout + + +def find_fuzz_targets(directory): """Returns paths to fuzz targets in |directory|.""" # TODO(https://github.com/google/oss-fuzz/issues/4585): Use libClusterFuzz for # this. @@ -84,10 +84,10 @@ def find_fuzz_targets(directory, fuzzing_language): continue if not os.stat(path).st_mode & EXECUTABLE: continue - # Fuzz targets are expected to be ELF binaries for languages other than - # Python and Java. - if (fuzzing_language != 'python' and fuzzing_language != 'jvm' and - not is_elf(path)): + # Fuzz targets can either be ELF binaries or shell scripts (e.g. wrapper + # scripts for Python and JVM targets or rules_fuzzing builds with runfiles + # trees). + if not is_elf(path) and not is_shell_script(path): continue if os.getenv('FUZZING_ENGINE') != 'none': with open(path, 'rb') as file_handle: @@ -132,51 +132,66 @@ def has_ignored_targets(out_dir): @contextlib.contextmanager def use_different_out_dir(): - """Context manager that moves OUT to TMP_FUZZER_DIR. This is useful for - catching hardcoding. Note that this sets the environment variable OUT and - therefore must be run before multiprocessing.Pool is created. Resets OUT at - the end.""" + """Context manager that moves OUT to subdirectory of BASE_TMP_FUZZER_DIR. This + is useful for catching hardcoding. Note that this sets the environment + variable OUT and therefore must be run before multiprocessing.Pool is created. + Resets OUT at the end.""" # Use a fake OUT directory to catch path hardcoding that breaks on # ClusterFuzz. - out = os.getenv('OUT') - initial_out = out - recreate_directory(TMP_FUZZER_DIR) - out = TMP_FUZZER_DIR - # Set this so that run_fuzzer which is called by bad_build_check works - # properly. - os.environ['OUT'] = out - # We move the contents of the directory because we can't move the - # directory itself because it is a mount. - move_directory_contents(initial_out, out) - try: - yield out - finally: - move_directory_contents(out, initial_out) - shutil.rmtree(out) - os.environ['OUT'] = initial_out - - -def test_all_outside_out(fuzzing_language, allowed_broken_targets_percentage): + initial_out = os.getenv('OUT') + os.makedirs(BASE_TMP_FUZZER_DIR, exist_ok=True) + # Use a random subdirectory of BASE_TMP_FUZZER_DIR to allow running multiple + # instances of test_all in parallel (useful for integration testing). + with tempfile.TemporaryDirectory(dir=BASE_TMP_FUZZER_DIR) as out: + # Set this so that run_fuzzer which is called by bad_build_check works + # properly. + os.environ['OUT'] = out + # We move the contents of the directory because we can't move the + # directory itself because it is a mount. + move_directory_contents(initial_out, out) + try: + yield out + finally: + move_directory_contents(out, initial_out) + os.environ['OUT'] = initial_out + + +def test_all_outside_out(allowed_broken_targets_percentage): """Wrapper around test_all that changes OUT and returns the result.""" with use_different_out_dir() as out: - return test_all(out, fuzzing_language, allowed_broken_targets_percentage) + return test_all(out, allowed_broken_targets_percentage) -def test_all(out, fuzzing_language, allowed_broken_targets_percentage): +def test_all(out, allowed_broken_targets_percentage): """Do bad_build_check on all fuzz targets.""" # TODO(metzman): Refactor so that we can convert test_one to python. - fuzz_targets = find_fuzz_targets(out, fuzzing_language) + fuzz_targets = find_fuzz_targets(out) if not fuzz_targets: print('ERROR: No fuzz targets found.') return False pool = multiprocessing.Pool() bad_build_results = pool.map(do_bad_build_check, fuzz_targets) + pool.close() + pool.join() broken_targets = get_broken_fuzz_targets(bad_build_results, fuzz_targets) broken_targets_count = len(broken_targets) if not broken_targets_count: return True + print('Retrying failed fuzz targets sequentially', broken_targets_count) + pool = multiprocessing.Pool(1) + retry_targets = [] + for broken_target, result in broken_targets: + retry_targets.append(broken_target) + bad_build_results = pool.map(do_bad_build_check, retry_targets) + pool.close() + pool.join() + broken_targets = get_broken_fuzz_targets(bad_build_results, broken_targets) + broken_targets_count = len(broken_targets) + if not broken_targets_count: + return True + print('Broken fuzz targets', broken_targets_count) total_targets_count = len(fuzz_targets) broken_targets_percentage = 100 * broken_targets_count / total_targets_count @@ -211,11 +226,8 @@ def get_allowed_broken_targets_percentage(): def main(): """Does bad_build_check on all fuzz targets in parallel. Returns 0 on success. Returns 1 on failure.""" - # Set these environment variables here so that stdout - fuzzing_language = os.getenv('FUZZING_LANGUAGE') allowed_broken_targets_percentage = get_allowed_broken_targets_percentage() - if not test_all_outside_out(fuzzing_language, - allowed_broken_targets_percentage): + if not test_all_outside_out(allowed_broken_targets_percentage): return 1 return 0 diff --git a/infra/base-images/base-runner/test_all_test.py b/infra/base-images/base-runner/test_all_test.py index 3771ec231..b3077ec1e 100644 --- a/infra/base-images/base-runner/test_all_test.py +++ b/infra/base-images/base-runner/test_all_test.py @@ -25,15 +25,13 @@ class TestTestAll(unittest.TestCase): @mock.patch('test_all.find_fuzz_targets', return_value=[]) @mock.patch('builtins.print') - def test_test_all_no_fuzz_targets(self, mocked_print, _): + def test_test_all_no_fuzz_targets(self, mock_print, _): """Tests that test_all returns False when there are no fuzz targets.""" outdir = '/out' - fuzzing_language = 'c++' allowed_broken_targets_percentage = 0 self.assertFalse( - test_all.test_all(outdir, fuzzing_language, - allowed_broken_targets_percentage)) - mocked_print.assert_called_with('ERROR: No fuzz targets found.') + test_all.test_all(outdir, allowed_broken_targets_percentage)) + mock_print.assert_called_with('ERROR: No fuzz targets found.') if __name__ == '__main__': |