10 files changed, 492 insertions, 85 deletions
diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile
index f847de026..fadd00acc 100755
--- a/infra/base-images/base-runner/Dockerfile
+++ b/infra/base-images/base-runner/Dockerfile
@@ -45,12 +45,18 @@ RUN apt-get update && apt-get install -y \
     libcap2 \
     python3 \
     python3-pip \
+    python3-setuptools \
     unzip \
     wget \
     zip --no-install-recommends
 
-RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage /opt/code_coverage && \
-    pip3 install -r /opt/code_coverage/requirements.txt
+ENV CODE_COVERAGE_SRC=/opt/code_coverage
+RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage $CODE_COVERAGE_SRC && \
+    cd /opt/code_coverage && \
+    git checkout edba4873b5e8a390e977a64c522db2df18a8b27d && \
+    pip3 install wheel && \
+    pip3 install -r requirements.txt && \
+    pip3 install MarkupSafe==0.23
 
 # Default environment options for various sanitizers.
 # Note that these match the settings used in ClusterFuzz and
@@ -91,6 +97,12 @@ RUN wget https://download.java.net/java/GA/jdk15.0.2/0d1cfde4252546c6931946de8db
     rm -f openjdk-15.0.2_linux-x64_bin.tar.gz && \
     rm -rf $JAVA_HOME/jmods $JAVA_HOME/lib/src.zip
 
+# Install JaCoCo for JVM coverage.
+RUN wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.cli/0.8.7/org.jacoco.cli-0.8.7-nodeps.jar -O /opt/jacoco-cli.jar && \
+    wget https://repo1.maven.org/maven2/org/jacoco/org.jacoco.agent/0.8.7/org.jacoco.agent-0.8.7-runtime.jar -O /opt/jacoco-agent.jar && \
+    echo "37df187b76888101ecd745282e9cd1ad4ea508d6  /opt/jacoco-agent.jar" | shasum --check && \
+    echo "c1814e7bba5fd8786224b09b43c84fd6156db690  /opt/jacoco-cli.jar" | shasum --check
+
 # Do this last to make developing these files easier/faster due to caching.
 COPY bad_build_check \
     collect_dft \
@@ -98,10 +110,12 @@ COPY bad_build_check \
     coverage_helper \
     dataflow_tracer.py \
     download_corpus \
+    jacoco_report_converter.py \
     rcfilt \
     reproduce \
     run_fuzzer \
     parse_options.py \
+    profraw_update.py \
     targets_list \
     test_all.py \
     test_one.py \
diff --git a/infra/base-images/base-runner/bad_build_check b/infra/base-images/base-runner/bad_build_check
index 01f8fbbab..bb328c793 100755
--- a/infra/base-images/base-runner/bad_build_check
+++ b/infra/base-images/base-runner/bad_build_check
@@ -39,7 +39,7 @@ DFSAN_CALLS_THRESHOLD_FOR_NON_DFSAN_BUILD=0
 MSAN_CALLS_THRESHOLD_FOR_MSAN_BUILD=1000
 # Some engines (e.g. honggfuzz) may make a very small number of calls to msan
 # for memory poisoning.
-MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=2
+MSAN_CALLS_THRESHOLD_FOR_NON_MSAN_BUILD=3
 
 # Usually, a non UBSan build (e.g. ASan) has 165 calls to UBSan runtime. The
 # majority of targets built with UBSan have 200+ UBSan calls, but there are
@@ -90,10 +90,7 @@ function check_engine {
       echo "BAD BUILD: $FUZZER seems to have only partial coverage instrumentation."
     fi
   elif [[ "$FUZZING_ENGINE" == afl ]]; then
-    # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use
-    # AFL_DRIVER_DONT_DEFER by default, support .options files in
-    # bad_build_check instead.
-    AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
+    AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
     CHECK_PASSED=$(egrep "All set and ready to roll" -c $FUZZER_OUTPUT)
     if (( $CHECK_PASSED == 0 )); then
       echo "BAD BUILD: fuzzing $FUZZER with afl-fuzz failed."
@@ -136,10 +133,7 @@ function check_startup_crash {
     SKIP_SEED_CORPUS=1 run_fuzzer $FUZZER_NAME -seed=1337 -runs=$MIN_NUMBER_OF_RUNS &>$FUZZER_OUTPUT
     CHECK_PASSED=$(egrep "Done $MIN_NUMBER_OF_RUNS runs" -c $FUZZER_OUTPUT)
   elif [[ "$FUZZING_ENGINE" = afl ]]; then
-    # TODO(https://github.com/google/oss-fuzz/issues/2470): Dont use
-    # AFL_DRIVER_DONT_DEFER by default, support .options files in
-    # bad_build_check instead.
-    AFL_DRIVER_DONT_DEFER=1 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 20s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
+    AFL_FORKSRV_INIT_TMOUT=30000 AFL_NO_UI=1 SKIP_SEED_CORPUS=1 timeout --preserve-status -s INT 35s run_fuzzer $FUZZER_NAME &>$FUZZER_OUTPUT
     if [ $(egrep "target binary (crashed|terminated)" -c $FUZZER_OUTPUT) -eq 0 ]; then
       CHECK_PASSED=1
     fi
diff --git a/infra/base-images/base-runner/coverage b/infra/base-images/base-runner/coverage
index a86b00dec..3c7b274e4 100755
--- a/infra/base-images/base-runner/coverage
+++ b/infra/base-images/base-runner/coverage
@@ -19,14 +19,21 @@ cd $OUT
 if (( $# > 0 )); then
   FUZZ_TARGETS="$@"
 else
-  FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n')"
+  FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n' | \
+      grep -v -x -F \
+      -e 'llvm-symbolizer' \
+      -e 'jazzer_agent_deploy.jar' \
+      -e 'jazzer_driver' \
+      -e 'jazzer_driver_with_sanitizer')"
 fi
 
-DUMPS_DIR="$OUT/dumps"
-FUZZER_STATS_DIR="$OUT/fuzzer_stats"
-LOGS_DIR="$OUT/logs"
-REPORT_ROOT_DIR="$OUT/report"
-REPORT_PLATFORM_DIR="$OUT/report/linux"
+COVERAGE_OUTPUT_DIR=${COVERAGE_OUTPUT_DIR:-$OUT}
+
+DUMPS_DIR="$COVERAGE_OUTPUT_DIR/dumps"
+FUZZER_STATS_DIR="$COVERAGE_OUTPUT_DIR/fuzzer_stats"
+LOGS_DIR="$COVERAGE_OUTPUT_DIR/logs"
+REPORT_ROOT_DIR="$COVERAGE_OUTPUT_DIR/report"
+REPORT_PLATFORM_DIR="$COVERAGE_OUTPUT_DIR/report/linux"
 
 for directory in $DUMPS_DIR $FUZZER_STATS_DIR $LOGS_DIR $REPORT_ROOT_DIR \
                  $REPORT_PLATFORM_DIR; do
@@ -54,6 +61,8 @@ objects=""
 # Number of CPUs available, this is needed for running tests in parallel.
 NPROC=$(nproc)
 
+CORPUS_DIR=${CORPUS_DIR:-"/corpus"}
+
 function run_fuzz_target {
   local target=$1
 
@@ -62,7 +71,7 @@ function run_fuzz_target {
   local profraw_file="$DUMPS_DIR/$target.%1m.profraw"
   local profraw_file_mask="$DUMPS_DIR/$target.*.profraw"
   local profdata_file="$DUMPS_DIR/$target.profdata"
-  local corpus_real="/corpus/${target}"
+  local corpus_real="$CORPUS_DIR/${target}"
 
   # -merge=1 requires an output directory, create a new, empty dir for that.
   local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
@@ -74,7 +83,7 @@ function run_fuzz_target {
   # because (A) corpuses are already minimized; (B) we do not use sancov, and so
   # libFuzzer always finishes merge with an empty output dir.
   # Use 100s timeout instead of 25s as code coverage builds can be very slow.
-  local args="-merge=1 -timeout=100 -close_fd_mask=3 $corpus_dummy $corpus_real"
+  local args="-merge=1 -timeout=100 $corpus_dummy $corpus_real"
 
   export LLVM_PROFILE_FILE=$profraw_file
   timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
@@ -90,6 +99,9 @@ function run_fuzz_target {
     return 0
   fi
 
+  # If necessary translate to latest profraw version.
+  profraw_update.py $OUT/$target $profraw_file_mask tmp.profraw
+  mv tmp.profraw $profraw_file_mask
   llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file
 
   # Delete unnecessary and (potentially) large .profraw files.
@@ -115,7 +127,7 @@ function run_go_fuzz_target {
   local target=$1
 
   echo "Running go target $target"
-  export FUZZ_CORPUS_DIR="/corpus/${target}/"
+  export FUZZ_CORPUS_DIR="$CORPUS_DIR/${target}/"
   export FUZZ_PROFILE_NAME="$DUMPS_DIR/$target.perf"
   $OUT/$target -test.coverprofile $DUMPS_DIR/$target.profdata &> $LOGS_DIR/$target.log
   # translate from golangish paths to current absolute paths
@@ -125,6 +137,47 @@ function run_go_fuzz_target {
   $SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json
 }
 
+function run_java_fuzz_target {
+  local target=$1
+
+  local exec_file="$DUMPS_DIR/$target.exec"
+  local class_dump_dir="$DUMPS_DIR/${target}_classes/"
+  mkdir "$class_dump_dir"
+  local corpus_real="$CORPUS_DIR/${target}"
+
+  # -merge=1 requires an output directory, create a new, empty dir for that.
+  local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
+  rm -rf $corpus_dummy && mkdir -p $corpus_dummy
+
+  # Use 100s timeout instead of 25s as code coverage builds can be very slow.
+  local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*"
+  local args="-merge=1 -timeout=100 --nohooks \
+      --additional_jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \
+      $corpus_dummy $corpus_real"
+
+  timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
+  if (( $? != 0 )); then
+    echo "Error occured while running $target:"
+    cat $LOGS_DIR/$target.log
+  fi
+
+  if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then
+    # Skip fuzz targets that failed to produce .exec files.
+    return 0
+  fi
+
+  # Generate XML report only as input to jacoco_report_converter.
+  # Source files are not needed for the summary.
+  local xml_report="$DUMPS_DIR/${target}.xml"
+  local summary_file="$FUZZER_STATS_DIR/$target.json"
+  java -jar /opt/jacoco-cli.jar report $exec_file \
+      --xml $xml_report \
+      --classfiles $class_dump_dir
+
+  # Write llvm-cov summary file.
+  jacoco_report_converter.py $xml_report $summary_file
+}
+
 export SYSGOPATH=$GOPATH
 export GOPATH=$OUT/$GOPATH
 # Run each fuzz target, generate raw coverage dumps.
@@ -136,6 +189,14 @@ for fuzz_target in $FUZZ_TARGETS; do
       grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue
     fi
     run_go_fuzz_target $fuzz_target &
+  elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
+    # Continue if not a fuzz target.
+    if [[ $FUZZING_ENGINE != "none" ]]; then
+      grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
+    fi
+
+    echo "Running $fuzz_target"
+    run_java_fuzz_target $fuzz_target &
   else
     # Continue if not a fuzz target.
     if [[ $FUZZING_ENGINE != "none" ]]; then
@@ -175,6 +236,43 @@ if [[ $FUZZING_LANGUAGE == "go" ]]; then
   mv merged.data $REPORT_ROOT_DIR/heap.prof
   #TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof
   echo "Finished generating code coverage report for Go fuzz targets."
+elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
+
+  # From this point on the script does not tolerate any errors.
+  set -e
+
+  # Merge .exec files from the individual targets.
+  jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec
+  java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \
+      --destfile $jacoco_merged_exec
+
+  # Merge .class files from the individual targets.
+  classes_dir=$DUMPS_DIR/classes
+  mkdir $classes_dir
+  for fuzz_target in $FUZZ_TARGETS; do
+    cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/
+  done
+
+  # Heuristically determine source directories based on Maven structure.
+  # Always include the $SRC root as it likely contains the fuzzer sources.
+  sourcefiles_args=(--sourcefiles $OUT/$SRC)
+  source_dirs=$(find $OUT/$SRC -type d -name 'java')
+  for source_dir in $source_dirs; do
+    sourcefiles_args+=(--sourcefiles "$source_dir")
+  done
+
+  # Generate HTML and XML reports.
+  xml_report=$REPORT_PLATFORM_DIR/index.xml
+  java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \
+      --html $REPORT_PLATFORM_DIR \
+      --xml $xml_report \
+      --classfiles $classes_dir \
+      "${sourcefiles_args[@]}"
+
+  # Write llvm-cov summary file.
+  jacoco_report_converter.py $xml_report $SUMMARY_FILE
+
+  set +e
 else
 
   # From this point on the script does not tolerate any errors.
@@ -206,6 +304,10 @@ else
 
 fi
 
+# Make sure report is readable.
+chmod -R +r $REPORT_ROOT_DIR
+find $REPORT_ROOT_DIR -type d -exec chmod +x {} +
+
 if [[ -n $HTTP_PORT ]]; then
   # Serve the report locally.
   echo "Serving the report on http://127.0.0.1:$HTTP_PORT/linux/index.html"
diff --git a/infra/base-images/base-runner/coverage_helper b/infra/base-images/base-runner/coverage_helper
index 22c9cb5d6..4d29ceac8 100755
--- a/infra/base-images/base-runner/coverage_helper
+++ b/infra/base-images/base-runner/coverage_helper
@@ -14,4 +14,4 @@
 # limitations under the License.
 #
 ################################################################################
-python3 /opt/code_coverage/coverage_utils.py $@
+python3 $CODE_COVERAGE_SRC/coverage_utils.py $@
diff --git a/infra/base-images/base-runner/jacoco_report_converter.py b/infra/base-images/base-runner/jacoco_report_converter.py
new file mode 100755
index 000000000..3c36065f1
--- /dev/null
+++ b/infra/base-images/base-runner/jacoco_report_converter.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for creating an llvm-cov style JSON summary from a JaCoCo XML
+report."""
+import json
+import os
+import sys
+import xml.etree.ElementTree as ET
+
+
+def convert(xml):
+  """Turns a JaCoCo XML report into an llvm-cov JSON summary."""
+  summary = {
+      "type": "oss-fuzz.java.coverage.json.export",
+      "version": "1.0.0",
+      "data": [{
+          "totals": {},
+          "files": [],
+      }],
+  }
+
+  report = ET.fromstring(xml)
+  totals = make_element_summary(report)
+  summary["data"][0]["totals"] = totals
+
+  # Since Java compilation does not track source file location, we match
+  # coverage info to source files via the full class name, e.g. we search for
+  # a path in /out/src ending in foo/bar/Baz.java for the class foo.bar.Baz.
+  # Under the assumptions that a given project only ever contains a single
+  # version of a class and that no class name appears as a suffix of another
+  # class name, we can assign coverage info to every source file matched in that
+  # way.
+  src_files = list_src_files()
+
+  for class_element in report.findall("./package/class"):
+    class_name = class_element.attrib["name"]
+    package_name = os.path.dirname(class_name)
+    if "sourcefilename" not in class_element.attrib:
+      continue
+    basename = class_element.attrib["sourcefilename"]
+    # This path is "foo/Bar.java" for the class element
+    # <class name="foo/Bar" sourcefilename="Bar.java">.
+    canonical_path = os.path.join(package_name, basename)
+
+    class_summary = make_element_summary(class_element)
+    summary["data"][0]["files"].append({
+        "filename": relative_to_src_path(src_files, canonical_path),
+        "summary": class_summary,
+    })
+
+  return json.dumps(summary)
+
+
+def list_src_files():
+  """Returns a map from basename to full path for all files in $OUT/$SRC."""
+  filename_to_paths = {}
+  out_path = os.environ["OUT"] + "/"
+  src_path = os.environ["SRC"]
+  src_in_out = out_path + src_path
+  for dirpath, _, filenames in os.walk(src_in_out):
+    for filename in filenames:
+      full_path = dirpath + "/" + filename
+      # Map /out//src/... to /src/...
+      src_path = full_path[len(out_path):]
+      filename_to_paths.setdefault(filename, []).append(src_path)
+  return filename_to_paths
+
+
+def relative_to_src_path(src_files, canonical_path):
+  """Returns all paths in src_files ending in canonical_path."""
+  basename = os.path.basename(canonical_path)
+  if basename not in src_files:
+    return []
+  candidate_paths = src_files[basename]
+  return [
+      path for path in candidate_paths if path.endswith("/" + canonical_path)
+  ]
+
+
+def make_element_summary(element):
+  """Returns a coverage summary for an element in the XML report."""
+  summary = {}
+
+  function_counter = element.find("./counter[@type='METHOD']")
+  summary["functions"] = make_counter_summary(function_counter)
+
+  line_counter = element.find("./counter[@type='LINE']")
+  summary["lines"] = make_counter_summary(line_counter)
+
+  # JaCoCo tracks branch coverage, which counts the covered control-flow edges
+  # between llvm-cov's regions instead of the covered regions themselves. For
+  # non-trivial code parts, the difference is usually negligible. However, if
+  # all methods of a class consist of a single region only (no branches),
+  # JaCoCo does not report any branch coverage even if there is instruction
+  # coverage. Since this would give incorrect results for CI Fuzz purposes, we
+  # increase the regions counter by 1 if there is any amount of instruction
+  # coverage.
+  instruction_counter = element.find("./counter[@type='INSTRUCTION']")
+  has_some_coverage = instruction_counter is not None and int(
+      instruction_counter.attrib["covered"]) > 0
+  branch_covered_adjustment = 1 if has_some_coverage else 0
+  region_counter = element.find("./counter[@type='BRANCH']")
+  summary["regions"] = make_counter_summary(
+      region_counter, covered_adjustment=branch_covered_adjustment)
+
+  return summary
+
+
+def make_counter_summary(counter_element, covered_adjustment=0):
+  """Turns a JaCoCo <counter> element into an llvm-cov totals entry."""
+  summary = {}
+  covered = covered_adjustment
+  missed = 0
+  if counter_element is not None:
+    covered += int(counter_element.attrib["covered"])
+    missed += int(counter_element.attrib["missed"])
+  summary["covered"] = covered
+  summary["notcovered"] = missed
+  summary["count"] = summary["covered"] + summary["notcovered"]
+  if summary["count"] != 0:
+    summary["percent"] = (100.0 * summary["covered"]) / summary["count"]
+  else:
+    summary["percent"] = 0
+  return summary
+
+
+def main():
+  """Produces an llvm-cov style JSON summary from a JaCoCo XML report."""
+  if len(sys.argv) != 3:
+    sys.stderr.write('Usage: %s <path_to_jacoco_xml> <out_path_json>\n' %
+                     sys.argv[0])
+    return 1
+
+  with open(sys.argv[1], 'r') as xml_file:
+    xml_report = xml_file.read()
+  json_summary = convert(xml_report)
+  with open(sys.argv[2], 'w') as json_file:
+    json_file.write(json_summary)
+
+  return 0
+
+
+if __name__ == "__main__":
+  sys.exit(main())
diff --git a/infra/base-images/base-runner/profraw_update.py b/infra/base-images/base-runner/profraw_update.py
new file mode 100644
index 000000000..408b5fb93
--- /dev/null
+++ b/infra/base-images/base-runner/profraw_update.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+"""Helper script for upgrading a profraw file to latest version."""
+
+from collections import namedtuple
+import struct
+import subprocess
+import sys
+
+HeaderGeneric = namedtuple('HeaderGeneric', 'magic version')
+HeaderVersion7 = namedtuple(
+    'HeaderVersion7',
+    'BinaryIdsSize DataSize PaddingBytesBeforeCounters CountersSize \
+    PaddingBytesAfterCounters NamesSize CountersDelta NamesDelta ValueKindLast')
+
+PROFRAW_MAGIC = 0xff6c70726f667281
+
+
+def relativize_address(data, offset, databegin, sect_prf_cnts, sect_prf_data):
+  """Turns an absolute offset into a relative one."""
+  value = struct.unpack('Q', data[offset:offset + 8])[0]
+  if sect_prf_cnts <= value < sect_prf_data:
+    # If the value is an address in the right section, make it relative.
+    value = (value - databegin) & 0xffffffffffffffff
+    value = struct.pack('Q', value)
+    for i in range(8):
+      data[offset + i] = value[i]
+
+
+def upgrade(data, sect_prf_cnts, sect_prf_data):
+  """Upgrades profraw data, knowing the sections addresses."""
+  generic_header = HeaderGeneric._make(struct.unpack('QQ', data[:16]))
+  if generic_header.magic != PROFRAW_MAGIC:
+    raise Exception('Bad magic.')
+  if generic_header.version == 5:
+    generic_header = generic_header._replace(version=7)
+    # Upgrade from version 5 to 7 by adding binaryids field.
+    data = struct.pack('QQ', generic_header) + struct.pack('Q', 0) + data[16:]
+  if generic_header.version < 7:
+    raise Exception('Unhandled version.')
+  v7_header = HeaderVersion7._make(struct.unpack('QQQQQQQQQ', data[16:88]))
+
+  if v7_header.BinaryIdsSize % 8 != 0:
+    # Adds padding for binary ids.
+    # cf commit b9f547e8e51182d32f1912f97a3e53f4899ea6be
+    # cf https://reviews.llvm.org/D110365
+    padlen = 8 - (v7_header.BinaryIdsSize % 8)
+    v7_header = v7_header._replace(BinaryIdsSize=v7_header.BinaryIdsSize +
+                                   padlen)
+    data = data[:16] + struct.pack('Q', v7_header.BinaryIdsSize) + data[24:]
+    data = data[:88 + v7_header.BinaryIdsSize] + bytes(
+        padlen) + data[88 + v7_header.BinaryIdsSize:]
+
+  if v7_header.CountersDelta != sect_prf_cnts - sect_prf_data:
+    # Rust linking seems to add an offset...
+    sect_prf_data = v7_header.CountersDelta - sect_prf_cnts + sect_prf_data
+    sect_prf_cnts = v7_header.CountersDelta
+
+  dataref = sect_prf_data
+  relativize_address(data, 64, dataref, sect_prf_cnts, sect_prf_data)
+
+  offset = 88 + v7_header.BinaryIdsSize
+  # This also works for C+Rust binaries compiled with
+  # clang-14/rust-nightly-clang-13.
+  for _ in range(v7_header.DataSize):
+    # 16 is the offset of CounterPtr in ProfrawData structure.
+    relativize_address(data, offset + 16, dataref, sect_prf_cnts, sect_prf_data)
+    # We need this because of CountersDelta -= sizeof(*SrcData);
+    # seen in __llvm_profile_merge_from_buffer.
+    dataref += 44 + 2 * (v7_header.ValueKindLast + 1)
+    # This is the size of one ProfrawData structure.
+    offset += 44 + 2 * (v7_header.ValueKindLast + 1)
+
+  return data
+
+
+def main():
+  """Helper script for upgrading a profraw file to latest version."""
+  if len(sys.argv) != 4:
+    sys.stderr.write('Usage: %s <binary> <profraw> <output>\n' % sys.argv[0])
+    return 1
+
+  # First find llvm profile sections addresses in the elf, quick and dirty.
+  process = subprocess.Popen(['readelf', '-S', sys.argv[1]],
+                             stdout=subprocess.PIPE)
+  output, err = process.communicate()
+  if err:
+    print('readelf failed')
+    return 2
+  for line in iter(output.split(b'\n')):
+    if b'__llvm_prf_cnts' in line:
+      sect_prf_cnts = int(line.split()[4], 16)
+    elif b'__llvm_prf_data' in line:
+      sect_prf_data = int(line.split()[4], 16)
+
+  # Then open and read the input profraw file.
+  with open(sys.argv[2], 'rb') as input_file:
+    profraw_base = bytearray(input_file.read())
+  # Do the upgrade, returning a bytes object.
+  profraw_latest = upgrade(profraw_base, sect_prf_cnts, sect_prf_data)
+  # Write the output to the file given to the command line.
+  with open(sys.argv[3], 'wb') as output_file:
+    output_file.write(profraw_latest)
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/infra/base-images/base-runner/run_fuzzer b/infra/base-images/base-runner/run_fuzzer
index b9bc8d9d6..426688ea3 100755
--- a/infra/base-images/base-runner/run_fuzzer
+++ b/infra/base-images/base-runner/run_fuzzer
@@ -26,7 +26,14 @@ DEBUGGER=${DEBUGGER:-}
 FUZZER=$1
 shift
 
-CORPUS_DIR=${CORPUS_DIR:-"/tmp/${FUZZER}_corpus"}
+# This env var is set by CIFuzz. CIFuzz fills this directory with the corpus
+# from ClusterFuzz.
+CORPUS_DIR=${CORPUS_DIR:-}
+if [ -z "$CORPUS_DIR" ]
+then
+  CORPUS_DIR="/tmp/${FUZZER}_corpus"
+  rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
+fi
 
 SANITIZER=${SANITIZER:-}
 if [ -z $SANITIZER ]; then
@@ -63,14 +70,13 @@ function get_dictionary() {
   fi
 }
 
-rm -rf $CORPUS_DIR && mkdir -p $CORPUS_DIR
 rm -rf $FUZZER_OUT && mkdir -p $FUZZER_OUT
 
 SEED_CORPUS="${FUZZER}_seed_corpus.zip"
 
 if [ -f $SEED_CORPUS ] && [ -z ${SKIP_SEED_CORPUS:-} ]; then
   echo "Using seed corpus: $SEED_CORPUS"
-  unzip -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
+  unzip -o -d ${CORPUS_DIR}/ $SEED_CORPUS > /dev/null
 fi
 
 OPTIONS_FILE="${FUZZER}.options"
@@ -103,19 +109,18 @@ if [[ "$FUZZING_ENGINE" = afl ]]; then
   export UBSAN_OPTIONS="$UBSAN_OPTIONS:symbolize=0"
   export AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
   export AFL_SKIP_CPUFREQ=1
-  export AFL_NO_AFFINITY=1
+  export AFL_TRY_AFFINITY=1
   export AFL_FAST_CAL=1
+  export AFL_CMPLOG_ONLY_NEW=1
+  export AFL_FORKSRV_INIT_TMOUT=30000
   # If $OUT/afl_cmplog.txt is present this means the target was compiled for
-  # CMPLOG. So we have to add the proper parameters to afl-fuzz. `-l 2` is
-  # CMPLOG level 2, which will colorize larger files but not huge files and
-  # not enable transform analysis unless there have been several cycles without
-  # any finds.
-  test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -l 2 -c $OUT/$FUZZER"
+  # CMPLOG. So we have to add the proper parameters to afl-fuzz.
+  test -e "$OUT/afl_cmplog.txt" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -c $OUT/$FUZZER"
   # If $OUT/afl++.dict we load it as a dictionary for afl-fuzz.
   test -e "$OUT/afl++.dict" && AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -x $OUT/afl++.dict"
-  # Ensure timeout is a bit large than 1sec as some of the OSS-Fuzz fuzzers
-  # are slower than this. 
-  AFL_FUZZER_ARGS="$AFL_FUZZER_ARGS -t 5000+"
+  # Ensure timeout is a bit larger than 1sec as some of the OSS-Fuzz fuzzers
+  # are slower than this.
+  AFL_FUZZER_ARGS="$FUZZER_ARGS $AFL_FUZZER_ARGS -t 5000+"
   # AFL expects at least 1 file in the input dir.
   echo input > ${CORPUS_DIR}/input
   echo afl++ setup:
@@ -135,7 +140,7 @@ elif [[ "$FUZZING_ENGINE" = honggfuzz ]]; then
   # -P: use persistent mode of fuzzing (i.e. LLVMFuzzerTestOneInput)
   # -f: location of the initial (and destination) file corpus
   # -n: number of fuzzing threads (and processes)
-  CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $* -- \"$OUT/$FUZZER\""
+  CMD_LINE="$OUT/honggfuzz -n 1 --exit_upon_crash -R /tmp/${FUZZER}_honggfuzz.report -W $FUZZER_OUT -v -z -P -f \"$CORPUS_DIR\" $(get_dictionary) $FUZZER_ARGS $* -- \"$OUT/$FUZZER\""
 
 else
 
diff --git a/infra/base-images/base-runner/targets_list b/infra/base-images/base-runner/targets_list
index d35534258..95615c811 100755
--- a/infra/base-images/base-runner/targets_list
+++ b/infra/base-images/base-runner/targets_list
@@ -2,7 +2,8 @@
 
 for binary in $(find $OUT/ -executable -type f); do
   [[ "$binary" != *.so ]] || continue
-  file "$binary" | grep ELF > /dev/null 2>&1 || continue
+  [[ $(basename "$binary") != jazzer_driver* ]] || continue
+  file "$binary" | grep -e ELF -e "shell script" > /dev/null 2>&1 || continue
   grep "LLVMFuzzerTestOneInput" "$binary" > /dev/null 2>&1 || continue
 
   basename "$binary"
diff --git a/infra/base-images/base-runner/test_all.py b/infra/base-images/base-runner/test_all.py
index 925ebde69..16dfcbfa9 100755
--- a/infra/base-images/base-runner/test_all.py
+++ b/infra/base-images/base-runner/test_all.py
@@ -20,12 +20,12 @@ import contextlib
 import multiprocessing
 import os
 import re
-import shutil
 import subprocess
 import stat
 import sys
+import tempfile
 
-TMP_FUZZER_DIR = '/tmp/not-out'
+BASE_TMP_FUZZER_DIR = '/tmp/not-out'
 
 EXECUTABLE = stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH
 
@@ -37,14 +37,6 @@ IGNORED_TARGETS = [
 IGNORED_TARGETS_RE = re.compile('^' + r'$|^'.join(IGNORED_TARGETS) + '$')
 
 
-def recreate_directory(directory):
-  """Creates |directory|. If it already exists than deletes it first before
-  creating."""
-  if os.path.exists(directory):
-    shutil.rmtree(directory)
-  os.mkdir(directory)
-
-
 def move_directory_contents(src_directory, dst_directory):
   """Moves contents of |src_directory| to |dst_directory|."""
   # Use mv because mv preserves file permissions. If we don't preserve file
@@ -67,7 +59,15 @@ def is_elf(filepath):
   return b'ELF' in result.stdout
 
 
-def find_fuzz_targets(directory, fuzzing_language):
+def is_shell_script(filepath):
+  """Returns True if |filepath| is a shell script."""
+  result = subprocess.run(['file', filepath],
+                          stdout=subprocess.PIPE,
+                          check=False)
+  return b'shell script' in result.stdout
+
+
+def find_fuzz_targets(directory):
   """Returns paths to fuzz targets in |directory|."""
   # TODO(https://github.com/google/oss-fuzz/issues/4585): Use libClusterFuzz for
   # this.
@@ -84,10 +84,10 @@ def find_fuzz_targets(directory, fuzzing_language):
       continue
     if not os.stat(path).st_mode & EXECUTABLE:
       continue
-    # Fuzz targets are expected to be ELF binaries for languages other than
-    # Python and Java.
-    if (fuzzing_language != 'python' and fuzzing_language != 'jvm' and
-        not is_elf(path)):
+    # Fuzz targets can either be ELF binaries or shell scripts (e.g. wrapper
+    # scripts for Python and JVM targets or rules_fuzzing builds with runfiles
+    # trees).
+    if not is_elf(path) and not is_shell_script(path):
       continue
     if os.getenv('FUZZING_ENGINE') != 'none':
       with open(path, 'rb') as file_handle:
@@ -132,51 +132,66 @@ def has_ignored_targets(out_dir):
 
 @contextlib.contextmanager
 def use_different_out_dir():
-  """Context manager that moves OUT to TMP_FUZZER_DIR. This is useful for
-  catching hardcoding. Note that this sets the environment variable OUT and
-  therefore must be run before multiprocessing.Pool is created. Resets OUT at
-  the end."""
+  """Context manager that moves OUT to subdirectory of BASE_TMP_FUZZER_DIR. This
+  is useful for catching hardcoding. Note that this sets the environment
+  variable OUT and therefore must be run before multiprocessing.Pool is created.
+  Resets OUT at the end."""
   # Use a fake OUT directory to catch path hardcoding that breaks on
   # ClusterFuzz.
-  out = os.getenv('OUT')
-  initial_out = out
-  recreate_directory(TMP_FUZZER_DIR)
-  out = TMP_FUZZER_DIR
-  # Set this so that run_fuzzer which is called by bad_build_check works
-  # properly.
-  os.environ['OUT'] = out
-  # We move the contents of the directory because we can't move the
-  # directory itself because it is a mount.
-  move_directory_contents(initial_out, out)
-  try:
-    yield out
-  finally:
-    move_directory_contents(out, initial_out)
-    shutil.rmtree(out)
-    os.environ['OUT'] = initial_out
-
-
-def test_all_outside_out(fuzzing_language, allowed_broken_targets_percentage):
+  initial_out = os.getenv('OUT')
+  os.makedirs(BASE_TMP_FUZZER_DIR, exist_ok=True)
+  # Use a random subdirectory of BASE_TMP_FUZZER_DIR to allow running multiple
+  # instances of test_all in parallel (useful for integration testing).
+  with tempfile.TemporaryDirectory(dir=BASE_TMP_FUZZER_DIR) as out:
+    # Set this so that run_fuzzer which is called by bad_build_check works
+    # properly.
+    os.environ['OUT'] = out
+    # We move the contents of the directory because we can't move the
+    # directory itself because it is a mount.
+    move_directory_contents(initial_out, out)
+    try:
+      yield out
+    finally:
+      move_directory_contents(out, initial_out)
+      os.environ['OUT'] = initial_out
+
+
+def test_all_outside_out(allowed_broken_targets_percentage):
   """Wrapper around test_all that changes OUT and returns the result."""
   with use_different_out_dir() as out:
-    return test_all(out, fuzzing_language, allowed_broken_targets_percentage)
+    return test_all(out, allowed_broken_targets_percentage)
 
 
-def test_all(out, fuzzing_language, allowed_broken_targets_percentage):
+def test_all(out, allowed_broken_targets_percentage):
   """Do bad_build_check on all fuzz targets."""
   # TODO(metzman): Refactor so that we can convert test_one to python.
-  fuzz_targets = find_fuzz_targets(out, fuzzing_language)
+  fuzz_targets = find_fuzz_targets(out)
   if not fuzz_targets:
     print('ERROR: No fuzz targets found.')
     return False
 
   pool = multiprocessing.Pool()
   bad_build_results = pool.map(do_bad_build_check, fuzz_targets)
+  pool.close()
+  pool.join()
   broken_targets = get_broken_fuzz_targets(bad_build_results, fuzz_targets)
   broken_targets_count = len(broken_targets)
   if not broken_targets_count:
     return True
 
+  print('Retrying failed fuzz targets sequentially', broken_targets_count)
+  pool = multiprocessing.Pool(1)
+  retry_targets = []
+  for broken_target, result in broken_targets:
+    retry_targets.append(broken_target)
+  bad_build_results = pool.map(do_bad_build_check, retry_targets)
+  pool.close()
+  pool.join()
+  broken_targets = get_broken_fuzz_targets(bad_build_results, broken_targets)
+  broken_targets_count = len(broken_targets)
+  if not broken_targets_count:
+    return True
+
   print('Broken fuzz targets', broken_targets_count)
   total_targets_count = len(fuzz_targets)
   broken_targets_percentage = 100 * broken_targets_count / total_targets_count
@@ -211,11 +226,8 @@ def get_allowed_broken_targets_percentage():
 def main():
   """Does bad_build_check on all fuzz targets in parallel. Returns 0 on success.
   Returns 1 on failure."""
-  # Set these environment variables here so that stdout
-  fuzzing_language = os.getenv('FUZZING_LANGUAGE')
   allowed_broken_targets_percentage = get_allowed_broken_targets_percentage()
-  if not test_all_outside_out(fuzzing_language,
-                              allowed_broken_targets_percentage):
+  if not test_all_outside_out(allowed_broken_targets_percentage):
     return 1
   return 0
 
diff --git a/infra/base-images/base-runner/test_all_test.py b/infra/base-images/base-runner/test_all_test.py
index 3771ec231..b3077ec1e 100644
--- a/infra/base-images/base-runner/test_all_test.py
+++ b/infra/base-images/base-runner/test_all_test.py
@@ -25,15 +25,13 @@ class TestTestAll(unittest.TestCase):
 
   @mock.patch('test_all.find_fuzz_targets', return_value=[])
   @mock.patch('builtins.print')
-  def test_test_all_no_fuzz_targets(self, mocked_print, _):
+  def test_test_all_no_fuzz_targets(self, mock_print, _):
     """Tests that test_all returns False when there are no fuzz targets."""
     outdir = '/out'
-    fuzzing_language = 'c++'
     allowed_broken_targets_percentage = 0
     self.assertFalse(
-        test_all.test_all(outdir, fuzzing_language,
-                          allowed_broken_targets_percentage))
-    mocked_print.assert_called_with('ERROR: No fuzz targets found.')
+        test_all.test_all(outdir, allowed_broken_targets_percentage))
+    mock_print.assert_called_with('ERROR: No fuzz targets found.')
 
 
 if __name__ == '__main__':