aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt9
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt137
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt8
-rw-r--r--driver/jvm_tooling.cpp6
-rw-r--r--driver/libfuzzer_driver.cpp70
-rw-r--r--examples/BUILD.bazel1
6 files changed, 228 insertions, 3 deletions
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
index 1a0cccc4..333f47c6 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
@@ -20,6 +20,7 @@ import com.code_intelligence.jazzer.instrumentor.InstrumentationType
import com.code_intelligence.jazzer.instrumentor.loadHooks
import com.code_intelligence.jazzer.runtime.ManifestUtils
import java.lang.instrument.Instrumentation
+import java.nio.file.Path
val KNOWN_ARGUMENTS = listOf(
"instrumentation_includes",
@@ -28,6 +29,7 @@ val KNOWN_ARGUMENTS = listOf(
"custom_hook_excludes",
"trace",
"custom_hooks",
+ "id_sync_file",
)
fun premain(agentArgs: String?, instrumentation: Instrumentation) {
@@ -73,8 +75,13 @@ fun premain(agentArgs: String?, instrumentation: Instrumentation) {
}
}
}.toSet()
+ val idSyncFile = argumentMap["id_sync_file"]?.let {
+ Path.of(it.single()).also { path ->
+ println("INFO: Synchronizing coverage IDs in ${path.toAbsolutePath()}")
+ }
+ }
- val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes)
+ val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes, idSyncFile)
instrumentation.apply {
addTransformer(runtimeInstrumentor)
}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
index e06b7a25..0ce6523b 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
@@ -55,3 +55,140 @@ internal class TrivialCoverageIdStrategy : CoverageIdStrategy {
}
}
+/**
+ * Reads the [FileChannel] to the end as a UTF-8 string.
+ */
+private fun FileChannel.readFully(): String {
+ check(size() <= Int.MAX_VALUE)
+ val buffer = ByteBuffer.allocate(size().toInt())
+ while (buffer.hasRemaining()) {
+ when (read(buffer)) {
+ 0 -> throw IllegalStateException("No bytes read")
+ -1 -> break
+ }
+ }
+ return String(buffer.array())
+}
+
+/**
+ * Appends [string] to the end of the [FileChannel].
+ */
+private fun FileChannel.append(string: String) {
+ position(size())
+ write(ByteBuffer.wrap(string.toByteArray()))
+}
+
+/**
+ * A strategy for coverage ID generation that synchronizes the IDs assigned to a class with other processes via the
+ * specified [idSyncFile].
+ * This class takes care of synchronizing the access to the file between multiple processes as long as the general
+ * contract of [CoverageIdStrategy] is followed.
+ *
+ * Rationale: Coverage (i.e., edge) IDs differ from other kinds of IDs, such as those generated for call sites or cmp
+ * instructions, in that they should be consecutive, collision-free, and lie in a known, small range. This precludes us
+ * from generating them simply as hashes of class names and explains why go through the arduous process of synchronizing
+ * them across multiple agents.
+ */
+internal class SynchronizedCoverageIdStrategy(private val idSyncFile: Path) : CoverageIdStrategy {
+ var idFileLock: FileLock? = null
+
+ var cachedFirstId: Int? = null
+ var cachedClassName: String? = null
+ var cachedIdCount: Int? = null
+
+ /**
+ * Obtains a coverage ID for [className] such that all cooperating agent processes will obtain the same ID.
+ * There are two cases to consider:
+ * - This agent process is the first to encounter [className], i.e., it does not find a record for that class in
+ * [idSyncFile]. In this case, a lock on the file is held until the class has been instrumented and a record with
+ * the required number of coverage IDs has been added.
+ * - Another agent process has already encountered [className], i.e., there is a record that class in [idSyncFile].
+ * In this case, the lock on the file is returned immediately and the extracted first coverage ID is returned to
+ * the caller. The caller is still expected to call [commitIdCount] so that desynchronization can be detected.
+ */
+ override fun obtainFirstId(className: String): Int {
+ try {
+ check(idFileLock == null) { "Already holding a lock on the ID file" }
+ val localIdFile = FileChannel.open(
+ idSyncFile,
+ StandardOpenOption.WRITE,
+ StandardOpenOption.READ
+ )
+ // Wait until we have obtained the lock on the sync file. We hold the lock from this point until we have
+ // finished reading and writing (if necessary) to the file.
+ val localIdFileLock = localIdFile.lock()
+ // Parse the sync file, which consists of lines of the form
+ // <class name>:<first ID>:<num IDs>
+ val idInfo = localIdFileLock.channel().readFully()
+ .lineSequence()
+ .filterNot { it.isBlank() }
+ .map { line ->
+ val parts = line.split(':')
+ check(parts.size == 3) { "Expected ID file line to be of the form '<class name>:<first ID>:<num IDs>', got '$line'" }
+ val lineClassName = parts[0]
+ val lineFirstId = parts[1].toInt()
+ check(lineFirstId >= 0) { "Negative first ID in line: $line" }
+ val lineIdCount = parts[2].toInt()
+ check(lineIdCount >= 0) { "Negative ID count in line: $line" }
+ Triple(lineClassName, lineFirstId, lineIdCount)
+ }.toList()
+ cachedClassName = className
+ val idInfoForClass = idInfo.filter { it.first == className }
+ return when (idInfoForClass.size) {
+ 0 -> {
+ // We are the first to encounter this class and thus need to hold the lock until the class has been
+ // instrumented and we know the required number of coverage IDs.
+ idFileLock = localIdFileLock
+ // Compute the next free ID as the maximum over the sums of first ID and ID count, starting at 0 if
+ // this is the first ID to be assigned. In fact, since this is the only way new lines are added to
+ // the file, the maximum is always attained by the last line.
+ val nextFreeId = idInfo.asSequence().map { it.second + it.third }.lastOrNull() ?: 0
+ cachedFirstId = nextFreeId
+ nextFreeId
+ }
+ 1 -> {
+ // This class has already been instrumented elsewhere, so we just return the first ID and ID count
+ // reported from there and release the lock right away. The caller is still expected to call
+ // commitIdCount.
+ localIdFileLock.release()
+ cachedIdCount = idInfoForClass.single().third
+ idInfoForClass.single().second
+ }
+ else -> {
+ localIdFileLock.release()
+ throw IllegalStateException("Multiple entries for $className in ID file")
+ }
+ }
+ } catch (e: Exception) {
+ throw CoverageIdException(e)
+ }
+ }
+
+ override fun commitIdCount(idCount: Int) {
+ val localIdFileLock = idFileLock
+ try {
+ check(cachedClassName != null)
+ if (localIdFileLock == null) {
+ // We released the lock already in obtainFirstId since the class had already been instrumented
+ // elsewhere. As we know the expected number of IDs for the current class in this case, check for
+ // deviations.
+ check(cachedIdCount != null)
+ check(idCount == cachedIdCount) {
+ "$cachedClassName has $idCount edges, but $cachedIdCount edges reserved in ID file"
+ }
+ } else {
+ // We are the first to instrument this class and should record the number of IDs in the sync file.
+ check(cachedFirstId != null)
+ localIdFileLock.channel().append("$cachedClassName:$cachedFirstId:$idCount\n")
+ }
+ idFileLock = null
+ cachedFirstId = null
+ cachedIdCount = null
+ cachedClassName = null
+ } catch (e: Exception) {
+ throw CoverageIdException(e)
+ } finally {
+ localIdFileLock?.release()
+ }
+ }
+}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
index 83076331..9aef639c 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
@@ -65,10 +65,14 @@ internal class ClassNameGlobber(includes: List<String>, excludes: List<String>)
internal class RuntimeInstrumentor(
private val classesToInstrument: ClassNameGlobber,
private val dependencyClassesToInstrument: ClassNameGlobber,
- private val instrumentationTypes: Set<InstrumentationType>
+ private val instrumentationTypes: Set<InstrumentationType>,
+ idSyncFile: Path?,
) : ClassFileTransformer {
- private val coverageIdSynchronizer = TrivialCoverageIdStrategy()
+ private val coverageIdSynchronizer = if (idSyncFile != null)
+ SynchronizedCoverageIdStrategy(idSyncFile)
+ else
+ TrivialCoverageIdStrategy()
private val includedHooks = instrumentationTypes
.mapNotNull { type ->
diff --git a/driver/jvm_tooling.cpp b/driver/jvm_tooling.cpp
index f79469a0..68dff9a6 100644
--- a/driver/jvm_tooling.cpp
+++ b/driver/jvm_tooling.cpp
@@ -61,6 +61,11 @@ DEFINE_string(
"list of instrumentation to perform separated by colon \":\". "
"Available options are cov, cmp, div, gep, all. These options "
"correspond to the \"-fsanitize-coverage=trace-*\" flags in clang.");
+DEFINE_string(
+ id_sync_file, "",
+ "path to a file that should be used to synchronize coverage IDs "
+ "between parallel fuzzing processes. Defaults to a temporary file "
+ "created for this purpose if running in parallel.");
DECLARE_bool(hooks);
@@ -124,6 +129,7 @@ std::string agentArgsFromFlags() {
{"custom_hook_includes", FLAGS_custom_hook_includes},
{"custom_hook_excludes", FLAGS_custom_hook_excludes},
{"trace", FLAGS_trace},
+ {"id_sync_file", FLAGS_id_sync_file},
}) {
if (!flag_pair.second.empty()) {
args.push_back(flag_pair.first + "=" + flag_pair.second);
diff --git a/driver/libfuzzer_driver.cpp b/driver/libfuzzer_driver.cpp
index fdc9d123..2accc40d 100644
--- a/driver/libfuzzer_driver.cpp
+++ b/driver/libfuzzer_driver.cpp
@@ -14,10 +14,15 @@
#include "libfuzzer_driver.h"
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <random>
#include <string>
#include <vector>
#include "absl/strings/match.h"
+#include "absl/strings/str_format.h"
#include "coverage_tracker.h"
#include "driver/libfuzzer_callbacks.h"
#include "fuzz_target_runner.h"
@@ -40,6 +45,9 @@ DECLARE_bool(log_prefix);
// Defined in libfuzzer_callbacks.cpp
DECLARE_bool(fake_pcs);
+// Defined in jvm_tooling.cpp
+DECLARE_string(id_sync_file);
+
extern "C" void __real___sanitizer_set_death_callback(void (*callback)());
// We use the linker opt -Wl,--wrap=__sanitizer_set_death_callback to wrap the
@@ -49,6 +57,27 @@ extern "C" void __wrap___sanitizer_set_death_callback(void (*callback)()) {
__real___sanitizer_set_death_callback(callback);
}
+namespace {
+char *additional_arg;
+std::vector<char *> modified_argv;
+
+std::string GetNewTempFilePath() {
+ auto temp_dir = std::filesystem::temp_directory_path();
+
+ std::string temp_filename_suffix(32, '\0');
+ std::random_device rng;
+ std::uniform_int_distribution<char> dist('a', 'z');
+ std::generate_n(temp_filename_suffix.begin(), temp_filename_suffix.length(),
+ [&rng, &dist] { return dist(rng); });
+
+ auto temp_path = temp_dir / ("jazzer-" + temp_filename_suffix);
+ if (std::filesystem::exists(temp_path))
+ throw std::runtime_error("Random temp file path exists: " +
+ temp_path.string());
+ return temp_path;
+}
+} // namespace
+
namespace jazzer {
// A libFuzzer-registered callback that outputs the crashing input, but does
// not include a stack trace.
@@ -81,6 +110,47 @@ AbstractLibfuzzerDriver::AbstractLibfuzzerDriver(
// libFuzzer forwards the command line (e.g. with -jobs or -minimize_crash).
gflags::ParseCommandLineFlags(&our_argc, &our_argv, false);
+ if (std::any_of(argv_start, argv_end, [](const std::string_view &arg) {
+ return absl::StartsWith(arg, "-fork=") ||
+ absl::StartsWith(arg, "-jobs=") ||
+ absl::StartsWith(arg, "-merge=");
+ })) {
+ if (FLAGS_id_sync_file.empty()) {
+ // Create an empty temporary file used for coverage ID synchronization and
+ // pass its path to the agent in every child process. This requires adding
+ // the argument to argv for it to be picked up by libFuzzer, which then
+ // forwards it to child processes.
+ FLAGS_id_sync_file = GetNewTempFilePath();
+ std::string new_arg =
+ absl::StrFormat("--id_sync_file=%s", FLAGS_id_sync_file);
+ // This argument can be accessed by libFuzzer at any (later) time and thus
+ // cannot be safely freed by us.
+ additional_arg = strdup(new_arg.c_str());
+ modified_argv = std::vector<char *>(argv_start, argv_end);
+ modified_argv.push_back(additional_arg);
+ // Terminate modified_argv.
+ modified_argv.push_back(nullptr);
+ // Modify argv and argc for libFuzzer. modified_argv must not be changed
+ // after this point.
+ *argc += 1;
+ *argv = modified_argv.data();
+ argv_start = *argv;
+ argv_end = *argv + *argc;
+ }
+ // Creates the file, truncating it if it exists.
+ std::ofstream touch_file(FLAGS_id_sync_file, std::ios_base::trunc);
+
+ auto cleanup_fn = [] {
+ try {
+ std::filesystem::remove(std::filesystem::path(FLAGS_id_sync_file));
+ } catch (...) {
+ // We should not throw exceptions during shutdown.
+ }
+ };
+ std::atexit(cleanup_fn);
+ std::at_quick_exit(cleanup_fn);
+ }
+
initJvm(*argv_start);
}
diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel
index 54fbbc46..6304bc7f 100644
--- a/examples/BUILD.bazel
+++ b/examples/BUILD.bazel
@@ -39,6 +39,7 @@ java_fuzz_target_test(
srcs = [
"src/main/java/com/example/JpegImageParserFuzzer.java",
],
+ fuzzer_args = ["-fork=5"],
target_class = "com.example.JpegImageParserFuzzer",
deps = [
"@maven//:org_apache_commons_commons_imaging",