diff options
-rw-r--r-- | agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt | 9 | ||||
-rw-r--r-- | agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt | 137 | ||||
-rw-r--r-- | agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt | 8 | ||||
-rw-r--r-- | driver/jvm_tooling.cpp | 6 | ||||
-rw-r--r-- | driver/libfuzzer_driver.cpp | 70 | ||||
-rw-r--r-- | examples/BUILD.bazel | 1 |
6 files changed, 228 insertions, 3 deletions
diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt index 1a0cccc4..333f47c6 100644 --- a/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt +++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt @@ -20,6 +20,7 @@ import com.code_intelligence.jazzer.instrumentor.InstrumentationType import com.code_intelligence.jazzer.instrumentor.loadHooks import com.code_intelligence.jazzer.runtime.ManifestUtils import java.lang.instrument.Instrumentation +import java.nio.file.Path val KNOWN_ARGUMENTS = listOf( "instrumentation_includes", @@ -28,6 +29,7 @@ val KNOWN_ARGUMENTS = listOf( "custom_hook_excludes", "trace", "custom_hooks", + "id_sync_file", ) fun premain(agentArgs: String?, instrumentation: Instrumentation) { @@ -73,8 +75,13 @@ fun premain(agentArgs: String?, instrumentation: Instrumentation) { } } }.toSet() + val idSyncFile = argumentMap["id_sync_file"]?.let { + Path.of(it.single()).also { path -> + println("INFO: Synchronizing coverage IDs in ${path.toAbsolutePath()}") + } + } - val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes) + val runtimeInstrumentor = RuntimeInstrumentor(classNameGlobber, dependencyClassNameGlobber, instrumentationTypes, idSyncFile) instrumentation.apply { addTransformer(runtimeInstrumentor) } diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt index e06b7a25..0ce6523b 100644 --- a/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt +++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt @@ -55,3 +55,140 @@ internal class TrivialCoverageIdStrategy : CoverageIdStrategy { } } +/** + * Reads the [FileChannel] to the end as a UTF-8 string. + */ +private fun FileChannel.readFully(): String { + check(size() <= Int.MAX_VALUE) + val buffer = ByteBuffer.allocate(size().toInt()) + while (buffer.hasRemaining()) { + when (read(buffer)) { + 0 -> throw IllegalStateException("No bytes read") + -1 -> break + } + } + return String(buffer.array()) +} + +/** + * Appends [string] to the end of the [FileChannel]. + */ +private fun FileChannel.append(string: String) { + position(size()) + write(ByteBuffer.wrap(string.toByteArray())) +} + +/** + * A strategy for coverage ID generation that synchronizes the IDs assigned to a class with other processes via the + * specified [idSyncFile]. + * This class takes care of synchronizing the access to the file between multiple processes as long as the general + * contract of [CoverageIdStrategy] is followed. + * + * Rationale: Coverage (i.e., edge) IDs differ from other kinds of IDs, such as those generated for call sites or cmp + * instructions, in that they should be consecutive, collision-free, and lie in a known, small range. This precludes us + * from generating them simply as hashes of class names and explains why go through the arduous process of synchronizing + * them across multiple agents. + */ +internal class SynchronizedCoverageIdStrategy(private val idSyncFile: Path) : CoverageIdStrategy { + var idFileLock: FileLock? = null + + var cachedFirstId: Int? = null + var cachedClassName: String? = null + var cachedIdCount: Int? = null + + /** + * Obtains a coverage ID for [className] such that all cooperating agent processes will obtain the same ID. + * There are two cases to consider: + * - This agent process is the first to encounter [className], i.e., it does not find a record for that class in + * [idSyncFile]. In this case, a lock on the file is held until the class has been instrumented and a record with + * the required number of coverage IDs has been added. + * - Another agent process has already encountered [className], i.e., there is a record that class in [idSyncFile]. + * In this case, the lock on the file is returned immediately and the extracted first coverage ID is returned to + * the caller. The caller is still expected to call [commitIdCount] so that desynchronization can be detected. + */ + override fun obtainFirstId(className: String): Int { + try { + check(idFileLock == null) { "Already holding a lock on the ID file" } + val localIdFile = FileChannel.open( + idSyncFile, + StandardOpenOption.WRITE, + StandardOpenOption.READ + ) + // Wait until we have obtained the lock on the sync file. We hold the lock from this point until we have + // finished reading and writing (if necessary) to the file. + val localIdFileLock = localIdFile.lock() + // Parse the sync file, which consists of lines of the form + // <class name>:<first ID>:<num IDs> + val idInfo = localIdFileLock.channel().readFully() + .lineSequence() + .filterNot { it.isBlank() } + .map { line -> + val parts = line.split(':') + check(parts.size == 3) { "Expected ID file line to be of the form '<class name>:<first ID>:<num IDs>', got '$line'" } + val lineClassName = parts[0] + val lineFirstId = parts[1].toInt() + check(lineFirstId >= 0) { "Negative first ID in line: $line" } + val lineIdCount = parts[2].toInt() + check(lineIdCount >= 0) { "Negative ID count in line: $line" } + Triple(lineClassName, lineFirstId, lineIdCount) + }.toList() + cachedClassName = className + val idInfoForClass = idInfo.filter { it.first == className } + return when (idInfoForClass.size) { + 0 -> { + // We are the first to encounter this class and thus need to hold the lock until the class has been + // instrumented and we know the required number of coverage IDs. + idFileLock = localIdFileLock + // Compute the next free ID as the maximum over the sums of first ID and ID count, starting at 0 if + // this is the first ID to be assigned. In fact, since this is the only way new lines are added to + // the file, the maximum is always attained by the last line. + val nextFreeId = idInfo.asSequence().map { it.second + it.third }.lastOrNull() ?: 0 + cachedFirstId = nextFreeId + nextFreeId + } + 1 -> { + // This class has already been instrumented elsewhere, so we just return the first ID and ID count + // reported from there and release the lock right away. The caller is still expected to call + // commitIdCount. + localIdFileLock.release() + cachedIdCount = idInfoForClass.single().third + idInfoForClass.single().second + } + else -> { + localIdFileLock.release() + throw IllegalStateException("Multiple entries for $className in ID file") + } + } + } catch (e: Exception) { + throw CoverageIdException(e) + } + } + + override fun commitIdCount(idCount: Int) { + val localIdFileLock = idFileLock + try { + check(cachedClassName != null) + if (localIdFileLock == null) { + // We released the lock already in obtainFirstId since the class had already been instrumented + // elsewhere. As we know the expected number of IDs for the current class in this case, check for + // deviations. + check(cachedIdCount != null) + check(idCount == cachedIdCount) { + "$cachedClassName has $idCount edges, but $cachedIdCount edges reserved in ID file" + } + } else { + // We are the first to instrument this class and should record the number of IDs in the sync file. + check(cachedFirstId != null) + localIdFileLock.channel().append("$cachedClassName:$cachedFirstId:$idCount\n") + } + idFileLock = null + cachedFirstId = null + cachedIdCount = null + cachedClassName = null + } catch (e: Exception) { + throw CoverageIdException(e) + } finally { + localIdFileLock?.release() + } + } +} diff --git a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt index 83076331..9aef639c 100644 --- a/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt +++ b/agent/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt @@ -65,10 +65,14 @@ internal class ClassNameGlobber(includes: List<String>, excludes: List<String>) internal class RuntimeInstrumentor( private val classesToInstrument: ClassNameGlobber, private val dependencyClassesToInstrument: ClassNameGlobber, - private val instrumentationTypes: Set<InstrumentationType> + private val instrumentationTypes: Set<InstrumentationType>, + idSyncFile: Path?, ) : ClassFileTransformer { - private val coverageIdSynchronizer = TrivialCoverageIdStrategy() + private val coverageIdSynchronizer = if (idSyncFile != null) + SynchronizedCoverageIdStrategy(idSyncFile) + else + TrivialCoverageIdStrategy() private val includedHooks = instrumentationTypes .mapNotNull { type -> diff --git a/driver/jvm_tooling.cpp b/driver/jvm_tooling.cpp index f79469a0..68dff9a6 100644 --- a/driver/jvm_tooling.cpp +++ b/driver/jvm_tooling.cpp @@ -61,6 +61,11 @@ DEFINE_string( "list of instrumentation to perform separated by colon \":\". " "Available options are cov, cmp, div, gep, all. These options " "correspond to the \"-fsanitize-coverage=trace-*\" flags in clang."); +DEFINE_string( + id_sync_file, "", + "path to a file that should be used to synchronize coverage IDs " + "between parallel fuzzing processes. Defaults to a temporary file " + "created for this purpose if running in parallel."); DECLARE_bool(hooks); @@ -124,6 +129,7 @@ std::string agentArgsFromFlags() { {"custom_hook_includes", FLAGS_custom_hook_includes}, {"custom_hook_excludes", FLAGS_custom_hook_excludes}, {"trace", FLAGS_trace}, + {"id_sync_file", FLAGS_id_sync_file}, }) { if (!flag_pair.second.empty()) { args.push_back(flag_pair.first + "=" + flag_pair.second); diff --git a/driver/libfuzzer_driver.cpp b/driver/libfuzzer_driver.cpp index fdc9d123..2accc40d 100644 --- a/driver/libfuzzer_driver.cpp +++ b/driver/libfuzzer_driver.cpp @@ -14,10 +14,15 @@ #include "libfuzzer_driver.h" +#include <algorithm> +#include <filesystem> +#include <fstream> +#include <random> #include <string> #include <vector> #include "absl/strings/match.h" +#include "absl/strings/str_format.h" #include "coverage_tracker.h" #include "driver/libfuzzer_callbacks.h" #include "fuzz_target_runner.h" @@ -40,6 +45,9 @@ DECLARE_bool(log_prefix); // Defined in libfuzzer_callbacks.cpp DECLARE_bool(fake_pcs); +// Defined in jvm_tooling.cpp +DECLARE_string(id_sync_file); + extern "C" void __real___sanitizer_set_death_callback(void (*callback)()); // We use the linker opt -Wl,--wrap=__sanitizer_set_death_callback to wrap the @@ -49,6 +57,27 @@ extern "C" void __wrap___sanitizer_set_death_callback(void (*callback)()) { __real___sanitizer_set_death_callback(callback); } +namespace { +char *additional_arg; +std::vector<char *> modified_argv; + +std::string GetNewTempFilePath() { + auto temp_dir = std::filesystem::temp_directory_path(); + + std::string temp_filename_suffix(32, '\0'); + std::random_device rng; + std::uniform_int_distribution<char> dist('a', 'z'); + std::generate_n(temp_filename_suffix.begin(), temp_filename_suffix.length(), + [&rng, &dist] { return dist(rng); }); + + auto temp_path = temp_dir / ("jazzer-" + temp_filename_suffix); + if (std::filesystem::exists(temp_path)) + throw std::runtime_error("Random temp file path exists: " + + temp_path.string()); + return temp_path; +} +} // namespace + namespace jazzer { // A libFuzzer-registered callback that outputs the crashing input, but does // not include a stack trace. @@ -81,6 +110,47 @@ AbstractLibfuzzerDriver::AbstractLibfuzzerDriver( // libFuzzer forwards the command line (e.g. with -jobs or -minimize_crash). gflags::ParseCommandLineFlags(&our_argc, &our_argv, false); + if (std::any_of(argv_start, argv_end, [](const std::string_view &arg) { + return absl::StartsWith(arg, "-fork=") || + absl::StartsWith(arg, "-jobs=") || + absl::StartsWith(arg, "-merge="); + })) { + if (FLAGS_id_sync_file.empty()) { + // Create an empty temporary file used for coverage ID synchronization and + // pass its path to the agent in every child process. This requires adding + // the argument to argv for it to be picked up by libFuzzer, which then + // forwards it to child processes. + FLAGS_id_sync_file = GetNewTempFilePath(); + std::string new_arg = + absl::StrFormat("--id_sync_file=%s", FLAGS_id_sync_file); + // This argument can be accessed by libFuzzer at any (later) time and thus + // cannot be safely freed by us. + additional_arg = strdup(new_arg.c_str()); + modified_argv = std::vector<char *>(argv_start, argv_end); + modified_argv.push_back(additional_arg); + // Terminate modified_argv. + modified_argv.push_back(nullptr); + // Modify argv and argc for libFuzzer. modified_argv must not be changed + // after this point. + *argc += 1; + *argv = modified_argv.data(); + argv_start = *argv; + argv_end = *argv + *argc; + } + // Creates the file, truncating it if it exists. + std::ofstream touch_file(FLAGS_id_sync_file, std::ios_base::trunc); + + auto cleanup_fn = [] { + try { + std::filesystem::remove(std::filesystem::path(FLAGS_id_sync_file)); + } catch (...) { + // We should not throw exceptions during shutdown. + } + }; + std::atexit(cleanup_fn); + std::at_quick_exit(cleanup_fn); + } + initJvm(*argv_start); } diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel index 54fbbc46..6304bc7f 100644 --- a/examples/BUILD.bazel +++ b/examples/BUILD.bazel @@ -39,6 +39,7 @@ java_fuzz_target_test( srcs = [ "src/main/java/com/example/JpegImageParserFuzzer.java", ], + fuzzer_args = ["-fork=5"], target_class = "com.example.JpegImageParserFuzzer", deps = [ "@maven//:org_apache_commons_commons_imaging", |