aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabian Meumertzheim <meumertzheim@code-intelligence.com>2021-02-24 09:03:49 +0100
committerFabian Meumertzheim <fabian@meumertzhe.im>2021-02-24 17:48:03 +0100
commit4b37786dfbf9851ba7c46707061ba7d561b7d10d (patch)
treeeee0bc6681e18e933146b30697990a6de868771b
parent4ad5978b37c1e62f0b506091449c73336e6c6fa6 (diff)
downloadjazzer-api-4b37786dfbf9851ba7c46707061ba7d561b7d10d.tar.gz
Synchronize coverage IDs between separate processes
When run with e.g. the -fork flag, libFuzzer delegates the fuzzing to multiple child processes running concurrently. As each of these processes runs its own JVM with its own instance of the Jazzer agent, different ranges of coverage IDs may be assigned to the same class depending on when it is discovered by that fuzzer process. Since libFuzzer collates the coverage counter buffers, this leads to misreported coverage and unnecessarily large corpora. This commit adds a coverage ID generation strategy that uses a lockable temporary file as a means to synchronize the IDs between multiple processes. This requires injecting new command-line arguments into libFuzzer, building on the previous commit. One of the example fuzz targets is set to be fuzzed in fork mode, which provides test coverage for the new feature.
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt9
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt137
-rw-r--r--agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt8
-rw-r--r--driver/jvm_tooling.cpp6
-rw-r--r--driver/libfuzzer_driver.cpp70
-rw-r--r--examples/BUILD.bazel1
6 files changed, 228 insertions, 3 deletions
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
index 1a0cccc4..333f47c6 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt
@@ -20,6 +20,7 @@ import com.code_intelligence.jazzer.instrumentor.InstrumentationType
import com.code_intelligence.jazzer.instrumentor.loadHooks
import com.code_intelligence.jazzer.runtime.ManifestUtils
import java.lang.instrument.Instrumentation
+import java.nio.file.Path
val KNOWN_ARGUMENTS = listOf(
"instrumentation_includes",
@@ -28,6 +29,7 @@ val KNOWN_ARGUMENTS = listOf(
"custom_hook_excludes",
"trace",
"custom_hooks",
+ "id_sync_file",
)
fun premain(agentArgs: String?, instrumentation: Instrumentation) {
@@ -73,8 +75,13 @@ fun premain(agentArgs: String?, instrumentation: Instrumentation) {
}
}
}.toSet()
+ val idSyncFile = argumentMap["id_sync_file"]?.let {
+ Path.of(it.single()).also { path ->
+ println("INFO: Synchronizing coverage IDs in ${path.toAbsolutePath()}")
+ }
+ }
- val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes)
+ val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes, idSyncFile)
instrumentation.apply {
addTransformer(runtimeInstrumentor)
}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
index e06b7a25..0ce6523b 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt
@@ -55,3 +55,140 @@ internal class TrivialCoverageIdStrategy : CoverageIdStrategy {
}
}
+/**
+ * Reads the [FileChannel] to the end as a UTF-8 string.
+ */
+private fun FileChannel.readFully(): String {
+ check(size() <= Int.MAX_VALUE)
+ val buffer = ByteBuffer.allocate(size().toInt())
+ while (buffer.hasRemaining()) {
+ when (read(buffer)) {
+ 0 -> throw IllegalStateException("No bytes read")
+ -1 -> break
+ }
+ }
+ return String(buffer.array())
+}
+
+/**
+ * Appends [string] to the end of the [FileChannel].
+ */
+private fun FileChannel.append(string: String) {
+ position(size())
+ write(ByteBuffer.wrap(string.toByteArray()))
+}
+
+/**
+ * A strategy for coverage ID generation that synchronizes the IDs assigned to a class with other processes via the
+ * specified [idSyncFile].
+ * This class takes care of synchronizing the access to the file between multiple processes as long as the general
+ * contract of [CoverageIdStrategy] is followed.
+ *
+ * Rationale: Coverage (i.e., edge) IDs differ from other kinds of IDs, such as those generated for call sites or cmp
+ * instructions, in that they should be consecutive, collision-free, and lie in a known, small range. This precludes us
+ * from generating them simply as hashes of class names and explains why go through the arduous process of synchronizing
+ * them across multiple agents.
+ */
+internal class SynchronizedCoverageIdStrategy(private val idSyncFile: Path) : CoverageIdStrategy {
+ var idFileLock: FileLock? = null
+
+ var cachedFirstId: Int? = null
+ var cachedClassName: String? = null
+ var cachedIdCount: Int? = null
+
+ /**
+ * Obtains a coverage ID for [className] such that all cooperating agent processes will obtain the same ID.
+ * There are two cases to consider:
+ * - This agent process is the first to encounter [className], i.e., it does not find a record for that class in
+ * [idSyncFile]. In this case, a lock on the file is held until the class has been instrumented and a record with
+ * the required number of coverage IDs has been added.
+ * - Another agent process has already encountered [className], i.e., there is a record that class in [idSyncFile].
+ * In this case, the lock on the file is returned immediately and the extracted first coverage ID is returned to
+ * the caller. The caller is still expected to call [commitIdCount] so that desynchronization can be detected.
+ */
+ override fun obtainFirstId(className: String): Int {
+ try {
+ check(idFileLock == null) { "Already holding a lock on the ID file" }
+ val localIdFile = FileChannel.open(
+ idSyncFile,
+ StandardOpenOption.WRITE,
+ StandardOpenOption.READ
+ )
+ // Wait until we have obtained the lock on the sync file. We hold the lock from this point until we have
+ // finished reading and writing (if necessary) to the file.
+ val localIdFileLock = localIdFile.lock()
+ // Parse the sync file, which consists of lines of the form
+ // <class name>:<first ID>:<num IDs>
+ val idInfo = localIdFileLock.channel().readFully()
+ .lineSequence()
+ .filterNot { it.isBlank() }
+ .map { line ->
+ val parts = line.split(':')
+ check(parts.size == 3) { "Expected ID file line to be of the form '<class name>:<first ID>:<num IDs>', got '$line'" }
+ val lineClassName = parts[0]
+ val lineFirstId = parts[1].toInt()
+ check(lineFirstId >= 0) { "Negative first ID in line: $line" }
+ val lineIdCount = parts[2].toInt()
+ check(lineIdCount >= 0) { "Negative ID count in line: $line" }
+ Triple(lineClassName, lineFirstId, lineIdCount)
+ }.toList()
+ cachedClassName = className
+ val idInfoForClass = idInfo.filter { it.first == className }
+ return when (idInfoForClass.size) {
+ 0 -> {
+ // We are the first to encounter this class and thus need to hold the lock until the class has been
+ // instrumented and we know the required number of coverage IDs.
+ idFileLock = localIdFileLock
+ // Compute the next free ID as the maximum over the sums of first ID and ID count, starting at 0 if
+ // this is the first ID to be assigned. In fact, since this is the only way new lines are added to
+ // the file, the maximum is always attained by the last line.
+ val nextFreeId = idInfo.asSequence().map { it.second + it.third }.lastOrNull() ?: 0
+ cachedFirstId = nextFreeId
+ nextFreeId
+ }
+ 1 -> {
+ // This class has already been instrumented elsewhere, so we just return the first ID and ID count
+ // reported from there and release the lock right away. The caller is still expected to call
+ // commitIdCount.
+ localIdFileLock.release()
+ cachedIdCount = idInfoForClass.single().third
+ idInfoForClass.single().second
+ }
+ else -> {
+ localIdFileLock.release()
+ throw IllegalStateException("Multiple entries for $className in ID file")
+ }
+ }
+ } catch (e: Exception) {
+ throw CoverageIdException(e)
+ }
+ }
+
+ override fun commitIdCount(idCount: Int) {
+ val localIdFileLock = idFileLock
+ try {
+ check(cachedClassName != null)
+ if (localIdFileLock == null) {
+ // We released the lock already in obtainFirstId since the class had already been instrumented
+ // elsewhere. As we know the expected number of IDs for the current class in this case, check for
+ // deviations.
+ check(cachedIdCount != null)
+ check(idCount == cachedIdCount) {
+ "$cachedClassName has $idCount edges, but $cachedIdCount edges reserved in ID file"
+ }
+ } else {
+ // We are the first to instrument this class and should record the number of IDs in the sync file.
+ check(cachedFirstId != null)
+ localIdFileLock.channel().append("$cachedClassName:$cachedFirstId:$idCount\n")
+ }
+ idFileLock = null
+ cachedFirstId = null
+ cachedIdCount = null
+ cachedClassName = null
+ } catch (e: Exception) {
+ throw CoverageIdException(e)
+ } finally {
+ localIdFileLock?.release()
+ }
+ }
+}
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
index 83076331..9aef639c 100644
--- a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
+++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt
@@ -65,10 +65,14 @@ internal class ClassNameGlobber(includes: List<String>, excludes: List<String>)
internal class RuntimeInstrumentor(
private val classesToInstrument: ClassNameGlobber,
private val dependencyClassesToInstrument: ClassNameGlobber,
- private val instrumentationTypes: Set<InstrumentationType>
+ private val instrumentationTypes: Set<InstrumentationType>,
+ idSyncFile: Path?,
) : ClassFileTransformer {
- private val coverageIdSynchronizer = TrivialCoverageIdStrategy()
+ private val coverageIdSynchronizer = if (idSyncFile != null)
+ SynchronizedCoverageIdStrategy(idSyncFile)
+ else
+ TrivialCoverageIdStrategy()
private val includedHooks = instrumentationTypes
.mapNotNull { type ->
diff --git a/driver/jvm_tooling.cpp b/driver/jvm_tooling.cpp
index f79469a0..68dff9a6 100644
--- a/driver/jvm_tooling.cpp
+++ b/driver/jvm_tooling.cpp
@@ -61,6 +61,11 @@ DEFINE_string(
"list of instrumentation to perform separated by colon \":\". "
"Available options are cov, cmp, div, gep, all. These options "
"correspond to the \"-fsanitize-coverage=trace-*\" flags in clang.");
+DEFINE_string(
+ id_sync_file, "",
+ "path to a file that should be used to synchronize coverage IDs "
+ "between parallel fuzzing processes. Defaults to a temporary file "
+ "created for this purpose if running in parallel.");
DECLARE_bool(hooks);
@@ -124,6 +129,7 @@ std::string agentArgsFromFlags() {
{"custom_hook_includes", FLAGS_custom_hook_includes},
{"custom_hook_excludes", FLAGS_custom_hook_excludes},
{"trace", FLAGS_trace},
+ {"id_sync_file", FLAGS_id_sync_file},
}) {
if (!flag_pair.second.empty()) {
args.push_back(flag_pair.first + "=" + flag_pair.second);
diff --git a/driver/libfuzzer_driver.cpp b/driver/libfuzzer_driver.cpp
index fdc9d123..2accc40d 100644
--- a/driver/libfuzzer_driver.cpp
+++ b/driver/libfuzzer_driver.cpp
@@ -14,10 +14,15 @@
#include "libfuzzer_driver.h"
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <random>
#include <string>
#include <vector>
#include "absl/strings/match.h"
+#include "absl/strings/str_format.h"
#include "coverage_tracker.h"
#include "driver/libfuzzer_callbacks.h"
#include "fuzz_target_runner.h"
@@ -40,6 +45,9 @@ DECLARE_bool(log_prefix);
// Defined in libfuzzer_callbacks.cpp
DECLARE_bool(fake_pcs);
+// Defined in jvm_tooling.cpp
+DECLARE_string(id_sync_file);
+
extern "C" void __real___sanitizer_set_death_callback(void (*callback)());
// We use the linker opt -Wl,--wrap=__sanitizer_set_death_callback to wrap the
@@ -49,6 +57,27 @@ extern "C" void __wrap___sanitizer_set_death_callback(void (*callback)()) {
__real___sanitizer_set_death_callback(callback);
}
+namespace {
+char *additional_arg;
+std::vector<char *> modified_argv;
+
+std::string GetNewTempFilePath() {
+ auto temp_dir = std::filesystem::temp_directory_path();
+
+ std::string temp_filename_suffix(32, '\0');
+ std::random_device rng;
+ std::uniform_int_distribution<char> dist('a', 'z');
+ std::generate_n(temp_filename_suffix.begin(), temp_filename_suffix.length(),
+ [&rng, &dist] { return dist(rng); });
+
+ auto temp_path = temp_dir / ("jazzer-" + temp_filename_suffix);
+ if (std::filesystem::exists(temp_path))
+ throw std::runtime_error("Random temp file path exists: " +
+ temp_path.string());
+ return temp_path;
+}
+} // namespace
+
namespace jazzer {
// A libFuzzer-registered callback that outputs the crashing input, but does
// not include a stack trace.
@@ -81,6 +110,47 @@ AbstractLibfuzzerDriver::AbstractLibfuzzerDriver(
// libFuzzer forwards the command line (e.g. with -jobs or -minimize_crash).
gflags::ParseCommandLineFlags(&our_argc, &our_argv, false);
+ if (std::any_of(argv_start, argv_end, [](const std::string_view &arg) {
+ return absl::StartsWith(arg, "-fork=") ||
+ absl::StartsWith(arg, "-jobs=") ||
+ absl::StartsWith(arg, "-merge=");
+ })) {
+ if (FLAGS_id_sync_file.empty()) {
+ // Create an empty temporary file used for coverage ID synchronization and
+ // pass its path to the agent in every child process. This requires adding
+ // the argument to argv for it to be picked up by libFuzzer, which then
+ // forwards it to child processes.
+ FLAGS_id_sync_file = GetNewTempFilePath();
+ std::string new_arg =
+ absl::StrFormat("--id_sync_file=%s", FLAGS_id_sync_file);
+ // This argument can be accessed by libFuzzer at any (later) time and thus
+ // cannot be safely freed by us.
+ additional_arg = strdup(new_arg.c_str());
+ modified_argv = std::vector<char *>(argv_start, argv_end);
+ modified_argv.push_back(additional_arg);
+ // Terminate modified_argv.
+ modified_argv.push_back(nullptr);
+ // Modify argv and argc for libFuzzer. modified_argv must not be changed
+ // after this point.
+ *argc += 1;
+ *argv = modified_argv.data();
+ argv_start = *argv;
+ argv_end = *argv + *argc;
+ }
+ // Creates the file, truncating it if it exists.
+ std::ofstream touch_file(FLAGS_id_sync_file, std::ios_base::trunc);
+
+ auto cleanup_fn = [] {
+ try {
+ std::filesystem::remove(std::filesystem::path(FLAGS_id_sync_file));
+ } catch (...) {
+ // We should not throw exceptions during shutdown.
+ }
+ };
+ std::atexit(cleanup_fn);
+ std::at_quick_exit(cleanup_fn);
+ }
+
initJvm(*argv_start);
}
diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel
index 54fbbc46..6304bc7f 100644
--- a/examples/BUILD.bazel
+++ b/examples/BUILD.bazel
@@ -39,6 +39,7 @@ java_fuzz_target_test(
srcs = [
"src/main/java/com/example/JpegImageParserFuzzer.java",
],
+ fuzzer_args = ["-fork=5"],
target_class = "com.example.JpegImageParserFuzzer",
deps = [
"@maven//:org_apache_commons_commons_imaging",