diff options
Diffstat (limited to 'driver/src/main')
18 files changed, 2564 insertions, 0 deletions
diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/BUILD.bazel b/driver/src/main/java/com/code_intelligence/jazzer/driver/BUILD.bazel new file mode 100644 index 00000000..c8e6ba1e --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/BUILD.bazel @@ -0,0 +1,64 @@ +load("@fmeum_rules_jni//jni:defs.bzl", "java_jni_library") + +java_library( + name = "driver", + srcs = [":Driver.java"], + visibility = [ + "//agent:__pkg__", + ], + deps = [ + ":fuzz_target_runner", + ":opt", + ":utils", + "//agent/src/main/java/com/code_intelligence/jazzer/agent:agent_lib", + "@net_bytebuddy_byte_buddy_agent//jar", + ], +) + +java_jni_library( + name = "fuzz_target_runner", + srcs = ["FuzzTargetRunner.java"], + native_libs = [ + "//driver/src/main/native/com/code_intelligence/jazzer/driver:jazzer_driver", + ], + visibility = [ + "//agent:__pkg__", + "//driver/src/main/native/com/code_intelligence/jazzer/driver:__pkg__", + "//driver/src/test:__subpackages__", + ], + deps = [ + ":opt", + ":reproducer_template", + ":utils", + "//agent/src/main/java/com/code_intelligence/jazzer/api", + "//agent/src/main/java/com/code_intelligence/jazzer/autofuzz", + "//agent/src/main/java/com/code_intelligence/jazzer/instrumentor", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:coverage_map", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:fuzzed_data_provider", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:signal_handler", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:unsafe_provider", + "//agent/src/main/java/com/code_intelligence/jazzer/utils", + ], +) + +java_library( + name = "reproducer_template", + srcs = ["ReproducerTemplate.java"], + resources = ["Reproducer.java.tmpl"], + deps = [":opt"], +) + +java_library( + name = "opt", + srcs = ["Opt.java"], + visibility = [ + "//agent/src/main/java/com/code_intelligence/jazzer:__subpackages__", + "//driver/src/test/java/com/code_intelligence/jazzer/driver:__pkg__", + ], +) + +java_library( + name = "utils", + srcs = ["Utils.java"], +) diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/Driver.java b/driver/src/main/java/com/code_intelligence/jazzer/driver/Driver.java new file mode 100644 index 00000000..5b107ad8 --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/Driver.java @@ -0,0 +1,111 @@ +/* + * Copyright 2022 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.driver; + +import static java.lang.System.err; + +import com.code_intelligence.jazzer.agent.Agent; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.SecureRandom; +import java.util.List; +import net.bytebuddy.agent.ByteBuddyAgent; + +public class Driver { + // Accessed from jazzer_main.cpp. + @SuppressWarnings("unused") + private static int start(byte[][] nativeArgs) throws IOException { + List<String> args = Utils.fromNativeArgs(nativeArgs); + + final boolean spawnsSubprocesses = args.stream().anyMatch( + arg -> arg.startsWith("-fork=") || arg.startsWith("-jobs=") || arg.startsWith("-merge=")); + if (spawnsSubprocesses) { + if (!System.getProperty("jazzer.coverage_report", "").isEmpty()) { + err.println( + "WARN: --coverage_report does not support parallel fuzzing and has been disabled"); + System.clearProperty("jazzer.coverage_report"); + } + if (!System.getProperty("jazzer.coverage_dump", "").isEmpty()) { + err.println( + "WARN: --coverage_dump does not support parallel fuzzing and has been disabled"); + System.clearProperty("jazzer.coverage_dump"); + } + + String idSyncFileArg = System.getProperty("jazzer.id_sync_file", ""); + Path idSyncFile; + if (idSyncFileArg.isEmpty()) { + // Create an empty temporary file used for coverage ID synchronization and + // pass its path to the agent in every child process. This requires adding + // the argument to argv for it to be picked up by libFuzzer, which then + // forwards it to child processes. + idSyncFile = Files.createTempFile("jazzer-", ""); + args.add("--id_sync_file=" + idSyncFile.toAbsolutePath()); + } else { + // Creates the file, truncating it if it exists. + idSyncFile = Files.write(Paths.get(idSyncFileArg), new byte[] {}); + } + // This wouldn't run in case we exit the process with _Exit, but the parent process of a -fork + // run is expected to exit with a regular exit(0), which does cause JVM shutdown hooks to run: + // https://github.com/llvm/llvm-project/blob/940e178c0018b32af2f1478d331fc41a92a7dac7/compiler-rt/lib/fuzzer/FuzzerFork.cpp#L491 + idSyncFile.toFile().deleteOnExit(); + } + + // Jazzer's hooks use deterministic randomness and thus require a seed. Search for the last + // occurrence of a "-seed" argument as that is the one that is used by libFuzzer. If none is + // set, generate one and pass it to libFuzzer so that a fuzzing run can be reproduced simply by + // setting the seed printed by libFuzzer. + String seed = args.stream().reduce( + null, (prev, cur) -> cur.startsWith("-seed=") ? cur.substring("-seed=".length()) : prev); + if (seed == null) { + seed = Integer.toUnsignedString(new SecureRandom().nextInt()); + // Only add the -seed argument to the command line if not running in a mode + // that spawns subprocesses. These would inherit the same seed, which might + // make them less effective. + if (!spawnsSubprocesses) { + args.add("-seed=" + seed); + } + } + System.setProperty("jazzer.seed", seed); + + if (args.stream().noneMatch(arg -> arg.startsWith("-rss_limit_mb="))) { + args.add(getDefaultRssLimitMbArg()); + } + + // Do *not* modify system properties beyond this point - initializing Opt parses them as a side + // effect. + + if (Opt.hooks) { + Agent.premain(null, ByteBuddyAgent.install()); + } + + return FuzzTargetRunner.startLibFuzzer(args); + } + + private static String getDefaultRssLimitMbArg() { + // Java OutOfMemoryErrors are strictly more informative than libFuzzer's out of memory crashes. + // We thus want to scale the default libFuzzer memory limit, which includes all memory used by + // the process including Jazzer's native and non-native memory footprint, such that: + // 1. we never reach it purely by allocating memory on the Java heap; + // 2. it is still reached if the fuzz target allocates excessively on the native heap. + // As a heuristic, we set the overall memory limit to 2 * the maximum size of the Java heap and + // add a fixed 1 GiB on top for the fuzzer's own memory usage. + long maxHeapInBytes = Runtime.getRuntime().maxMemory(); + return "-rss_limit_mb=" + ((2 * maxHeapInBytes / (1024 * 1024)) + 1024); + } +} diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/FuzzTargetRunner.java b/driver/src/main/java/com/code_intelligence/jazzer/driver/FuzzTargetRunner.java new file mode 100644 index 00000000..5646e91a --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/FuzzTargetRunner.java @@ -0,0 +1,450 @@ +/* + * Copyright 2022 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.driver; + +import static java.lang.System.err; +import static java.lang.System.exit; +import static java.lang.System.out; + +import com.code_intelligence.jazzer.api.FuzzedDataProvider; +import com.code_intelligence.jazzer.autofuzz.FuzzTarget; +import com.code_intelligence.jazzer.instrumentor.CoverageRecorder; +import com.code_intelligence.jazzer.runtime.CoverageMap; +import com.code_intelligence.jazzer.runtime.FuzzedDataProviderImpl; +import com.code_intelligence.jazzer.runtime.JazzerInternal; +import com.code_intelligence.jazzer.runtime.RecordingFuzzedDataProvider; +import com.code_intelligence.jazzer.runtime.SignalHandler; +import com.code_intelligence.jazzer.runtime.UnsafeProvider; +import com.code_intelligence.jazzer.utils.ExceptionUtils; +import com.code_intelligence.jazzer.utils.ManifestUtils; +import com.github.fmeum.rules_jni.RulesJni; +import java.io.IOException; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.Base64; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import sun.misc.Unsafe; + +/** + * Executes a fuzz target and reports findings. + * + * <p>This class maintains global state (both native and non-native) and thus cannot be used + * concurrently. + */ +public final class FuzzTargetRunner { + static { + RulesJni.loadLibrary("jazzer_driver", FuzzTargetRunner.class); + } + + private static final Unsafe UNSAFE = UnsafeProvider.getUnsafe(); + private static final long BYTE_ARRAY_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); + + // Default value of the libFuzzer -error_exitcode flag. + private static final int LIBFUZZER_ERROR_EXIT_CODE = 77; + private static final String AUTOFUZZ_FUZZ_TARGET = + "com.code_intelligence.jazzer.autofuzz.FuzzTarget"; + private static final String FUZZER_TEST_ONE_INPUT = "fuzzerTestOneInput"; + private static final String FUZZER_INITIALIZE = "fuzzerInitialize"; + private static final String FUZZER_TEARDOWN = "fuzzerTearDown"; + + private static final Set<Long> ignoredTokens = new HashSet<>(Opt.ignore); + private static final FuzzedDataProviderImpl fuzzedDataProvider = + FuzzedDataProviderImpl.withNativeData(); + private static final Class<?> fuzzTargetClass; + private static final MethodHandle fuzzTarget; + public static final boolean useFuzzedDataProvider; + private static final ReproducerTemplate reproducerTemplate; + + static { + String targetClassName = determineFuzzTargetClassName(); + + // FuzzTargetRunner is loaded by the bootstrap class loader since Driver installs the agent + // before invoking FuzzTargetRunner.startLibFuzzer. We can't load the fuzz target with that + // class loader - we have to use the class loader that loaded Driver. This would be + // straightforward to do in Java 9+, but requires the use of reflection to maintain + // compatibility with Java 8, which doesn't have StackWalker. + // + // Note that we can't just move the agent initialization so that FuzzTargetRunner is loaded by + // Driver's class loader: The agent and FuzzTargetRunner have to share the native library that + // contains libFuzzer and that library needs to be available in the bootstrap class loader + // since instrumentation applied to Java standard library classes still needs to be able to call + // libFuzzer hooks. A fundamental JNI restriction is that a native library can't be shared + // between two different class loaders, so FuzzTargetRunner is thus forced to be loaded in the + // bootstrap class loader, which makes this ugly code block necessary. + // We also can't use the system class loader since Driver may be loaded by a custom class loader + // if not invoked from the native driver. + Class<?> driverClass; + try { + Class<?> reflectionClass = Class.forName("sun.reflect.Reflection"); + try { + driverClass = + (Class<?>) reflectionClass.getMethod("getCallerClass", int.class).invoke(null, 2); + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) { + throw new IllegalStateException(e); + } + } catch (ClassNotFoundException e) { + // sun.reflect.Reflection is no longer available after Java 8, use StackWalker. + try { + Class<?> stackWalker = Class.forName("java.lang.StackWalker"); + Class<? extends Enum<?>> stackWalkerOption = + (Class<? extends Enum<?>>) Class.forName("java.lang.StackWalker$Option"); + Enum<?> retainClassReferences = + Arrays.stream(stackWalkerOption.getEnumConstants()) + .filter(v -> v.name().equals("RETAIN_CLASS_REFERENCE")) + .findFirst() + .orElseThrow(() + -> new IllegalStateException( + "No RETAIN_CLASS_REFERENCE in java.lang.StackWalker$Option")); + Object stackWalkerInstance = stackWalker.getMethod("getInstance", stackWalkerOption) + .invoke(null, retainClassReferences); + Method stackWalkerGetCallerClass = stackWalker.getMethod("getCallerClass"); + driverClass = (Class<?>) stackWalkerGetCallerClass.invoke(stackWalkerInstance); + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException + | InvocationTargetException ex) { + throw new IllegalStateException(ex); + } + } + + try { + ClassLoader driverClassLoader = driverClass.getClassLoader(); + driverClassLoader.setDefaultAssertionStatus(true); + fuzzTargetClass = Class.forName(targetClassName, false, driverClassLoader); + } catch (ClassNotFoundException e) { + err.print("ERROR: "); + e.printStackTrace(err); + exit(1); + throw new IllegalStateException("Not reached"); + } + // Inform the agent about the fuzz target class. Important note: This has to be done *before* + // the class is initialized so that hooks can enable themselves in time for the fuzz target's + // static initializer. + JazzerInternal.onFuzzTargetReady(targetClassName); + + Method bytesFuzzTarget = targetPublicStaticMethodOrNull(FUZZER_TEST_ONE_INPUT, byte[].class); + Method dataFuzzTarget = + targetPublicStaticMethodOrNull(FUZZER_TEST_ONE_INPUT, FuzzedDataProvider.class); + if ((bytesFuzzTarget != null) == (dataFuzzTarget != null)) { + err.printf( + "ERROR: %s must define exactly one of the following two functions:%n", targetClassName); + err.println("public static void fuzzerTestOneInput(byte[] ...)"); + err.println("public static void fuzzerTestOneInput(FuzzedDataProvider ...)"); + err.println( + "Note: Fuzz targets returning boolean are no longer supported; exceptions should be thrown instead of returning true."); + exit(1); + } + try { + if (bytesFuzzTarget != null) { + useFuzzedDataProvider = false; + fuzzTarget = MethodHandles.publicLookup().unreflect(bytesFuzzTarget); + } else { + useFuzzedDataProvider = true; + fuzzTarget = MethodHandles.publicLookup().unreflect(dataFuzzTarget); + } + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + reproducerTemplate = new ReproducerTemplate(fuzzTargetClass.getName(), useFuzzedDataProvider); + + Method initializeNoArgs = targetPublicStaticMethodOrNull(FUZZER_INITIALIZE); + Method initializeWithArgs = targetPublicStaticMethodOrNull(FUZZER_INITIALIZE, String[].class); + try { + if (initializeWithArgs != null) { + initializeWithArgs.invoke(null, (Object) Opt.targetArgs.toArray(new String[] {})); + } else if (initializeNoArgs != null) { + initializeNoArgs.invoke(null); + } + } catch (IllegalAccessException | InvocationTargetException e) { + err.print("== Java Exception in fuzzerInitialize: "); + e.printStackTrace(err); + exit(1); + } + + if (Opt.hooks) { + // libFuzzer will clear the coverage map after this method returns and keeps no record of the + // coverage accumulated so far (e.g. by static initializers). We record it here to keep it + // around for JaCoCo coverage reports. + CoverageRecorder.updateCoveredIdsWithCoverageMap(); + } + + Runtime.getRuntime().addShutdownHook(new Thread(FuzzTargetRunner::shutdown)); + } + + /** + * A test-only convenience wrapper around {@link #runOne(long, int)}. + */ + static int runOne(byte[] data) { + long dataPtr = UNSAFE.allocateMemory(data.length); + UNSAFE.copyMemory(data, BYTE_ARRAY_OFFSET, null, dataPtr, data.length); + try { + return runOne(dataPtr, data.length); + } finally { + UNSAFE.freeMemory(dataPtr); + } + } + + /** + * Executes the user-provided fuzz target once. + * + * @param dataPtr a native pointer to beginning of the input provided by the fuzzer for this + * execution + * @param dataLength length of the fuzzer input + * @return the value that the native LLVMFuzzerTestOneInput function should return. Currently, + * this is always 0. The function may exit the process instead of returning. + */ + private static int runOne(long dataPtr, int dataLength) { + Throwable finding = null; + byte[] data = null; + try { + if (useFuzzedDataProvider) { + fuzzedDataProvider.setNativeData(dataPtr, dataLength); + fuzzTarget.invokeExact((FuzzedDataProvider) fuzzedDataProvider); + } else { + data = copyToArray(dataPtr, dataLength); + fuzzTarget.invokeExact(data); + } + } catch (Throwable uncaughtFinding) { + finding = uncaughtFinding; + } + // Explicitly reported findings take precedence over uncaught exceptions. + if (JazzerInternal.lastFinding != null) { + finding = JazzerInternal.lastFinding; + JazzerInternal.lastFinding = null; + } + if (finding == null) { + return 0; + } + if (Opt.hooks) { + finding = ExceptionUtils.preprocessThrowable(finding); + } + + long dedupToken = Opt.dedup ? ExceptionUtils.computeDedupToken(finding) : 0; + // Opt.keepGoing implies Opt.dedup. + if (Opt.keepGoing > 1 && !ignoredTokens.add(dedupToken)) { + return 0; + } + + err.println(); + err.print("== Java Exception: "); + finding.printStackTrace(err); + if (Opt.dedup) { + // Has to be printed to stdout as it is parsed by libFuzzer when minimizing a crash. It does + // not necessarily have to appear at the beginning of a line. + // https://github.com/llvm/llvm-project/blob/4c106c93eb68f8f9f201202677cd31e326c16823/compiler-rt/lib/fuzzer/FuzzerDriver.cpp#L342 + out.printf(Locale.ROOT, "DEDUP_TOKEN: %016x%n", dedupToken); + } + err.println("== libFuzzer crashing input =="); + printCrashingInput(); + // dumpReproducer needs to be called after libFuzzer printed its final stats as otherwise it + // would report incorrect coverage - the reproducer generation involved rerunning the fuzz + // target. + dumpReproducer(data); + + if (Opt.keepGoing == 1 || Long.compareUnsigned(ignoredTokens.size(), Opt.keepGoing) >= 0) { + // Reached the maximum amount of findings to keep going for, crash after shutdown. We use + // _Exit rather than System.exit to not trigger libFuzzer's exit handlers. + shutdown(); + _Exit(LIBFUZZER_ERROR_EXIT_CODE); + throw new IllegalStateException("Not reached"); + } + return 0; + } + + /* + * Starts libFuzzer via LLVMFuzzerRunDriver. + * + * Note: Must be public rather than package-private as it is loaded in a different class loader + * than Driver. + */ + public static int startLibFuzzer(List<String> args) { + SignalHandler.initialize(); + return startLibFuzzer(Utils.toNativeArgs(args)); + } + + private static void shutdown() { + if (!Opt.coverageDump.isEmpty() || !Opt.coverageReport.isEmpty()) { + int[] everCoveredIds = CoverageMap.getEverCoveredIds(); + if (!Opt.coverageDump.isEmpty()) { + CoverageRecorder.dumpJacocoCoverage(everCoveredIds, Opt.coverageDump); + } + if (!Opt.coverageReport.isEmpty()) { + CoverageRecorder.dumpCoverageReport(everCoveredIds, Opt.coverageReport); + } + } + + Method teardown = targetPublicStaticMethodOrNull(FUZZER_TEARDOWN); + if (teardown == null) { + return; + } + err.println("calling fuzzerTearDown function"); + try { + teardown.invoke(null); + } catch (InvocationTargetException e) { + // An exception in fuzzerTearDown is a regular finding. + err.print("== Java Exception in fuzzerTearDown: "); + e.getCause().printStackTrace(err); + _Exit(LIBFUZZER_ERROR_EXIT_CODE); + } catch (Throwable t) { + // Any other exception is an error. + t.printStackTrace(err); + _Exit(1); + } + } + + private static String determineFuzzTargetClassName() { + if (!Opt.autofuzz.isEmpty()) { + return AUTOFUZZ_FUZZ_TARGET; + } + if (!Opt.targetClass.isEmpty()) { + return Opt.targetClass; + } + String manifestTargetClass = ManifestUtils.detectFuzzTargetClass(); + if (manifestTargetClass != null) { + return manifestTargetClass; + } + err.println("Missing argument --target_class=<fuzz_target_class>"); + exit(1); + throw new IllegalStateException("Not reached"); + } + + private static void dumpReproducer(byte[] data) { + if (data == null) { + assert useFuzzedDataProvider; + fuzzedDataProvider.reset(); + data = fuzzedDataProvider.consumeRemainingAsBytes(); + } + MessageDigest digest; + try { + digest = MessageDigest.getInstance("SHA-1"); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException("SHA-1 not available", e); + } + String dataSha1 = toHexString(digest.digest(data)); + + if (!Opt.autofuzz.isEmpty()) { + fuzzedDataProvider.reset(); + FuzzTarget.dumpReproducer(fuzzedDataProvider, Opt.reproducerPath, dataSha1); + return; + } + + String base64Data; + if (useFuzzedDataProvider) { + fuzzedDataProvider.reset(); + FuzzedDataProvider recordingFuzzedDataProvider = + RecordingFuzzedDataProvider.makeFuzzedDataProviderProxy(fuzzedDataProvider); + try { + fuzzTarget.invokeExact(recordingFuzzedDataProvider); + if (JazzerInternal.lastFinding == null) { + err.println("Failed to reproduce crash when rerunning with recorder"); + } + } catch (Throwable ignored) { + // Expected. + } + try { + base64Data = RecordingFuzzedDataProvider.serializeFuzzedDataProviderProxy( + recordingFuzzedDataProvider); + } catch (IOException e) { + err.print("ERROR: Failed to create reproducer: "); + e.printStackTrace(err); + // Don't let libFuzzer print a native stack trace. + _Exit(1); + throw new IllegalStateException("Not reached"); + } + } else { + base64Data = Base64.getEncoder().encodeToString(data); + } + + reproducerTemplate.dumpReproducer(base64Data, dataSha1); + } + + private static Method targetPublicStaticMethodOrNull(String name, Class<?>... parameterTypes) { + try { + Method method = fuzzTargetClass.getMethod(name, parameterTypes); + if (!Modifier.isStatic(method.getModifiers()) || !Modifier.isPublic(method.getModifiers())) { + return null; + } + return method; + } catch (NoSuchMethodException e) { + return null; + } + } + + /** + * Convert a byte array to a lower-case hex string. + * + * <p>The returned hex string always has {@code 2 * bytes.length} characters. + * + * @param bytes the bytes to convert + * @return a lower-case hex string representing the bytes + */ + private static String toHexString(byte[] bytes) { + String unpadded = new BigInteger(1, bytes).toString(16); + int numLeadingZeroes = 2 * bytes.length - unpadded.length(); + return String.join("", Collections.nCopies(numLeadingZeroes, "0")) + unpadded; + } + + // Accessed by fuzz_target_runner.cpp. + @SuppressWarnings("unused") + private static void dumpAllStackTraces() { + ExceptionUtils.dumpAllStackTraces(); + } + + private static byte[] copyToArray(long ptr, int length) { + // TODO: Use Unsafe.allocateUninitializedArray instead once Java 9 is the base. + byte[] array = new byte[length]; + UNSAFE.copyMemory(null, ptr, array, BYTE_ARRAY_OFFSET, length); + return array; + } + + /** + * Starts libFuzzer via LLVMFuzzerRunDriver. + * + * @param args command-line arguments encoded in UTF-8 (not null-terminated) + * @return the return value of LLVMFuzzerRunDriver + */ + private static native int startLibFuzzer(byte[][] args); + + /** + * Causes libFuzzer to write the current input to disk as a crashing input and emit some + * information about it to stderr. + */ + private static native void printCrashingInput(); + + /** + * Immediately terminates the process without performing any cleanup. + * + * <p>Neither JVM shutdown hooks nor native exit handlers are called. This method does not return. + * + * <p>This method provides a way to exit Jazzer without triggering libFuzzer's exit hook that + * prints the "fuzz target exited" error message. It should thus be preferred over + * {@link System#exit} in any situation where Jazzer encounters an error after the fuzz target has + * started running. + * + * @param exitCode the exit code + */ + private static native void _Exit(int exitCode); +} diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/Opt.java b/driver/src/main/java/com/code_intelligence/jazzer/driver/Opt.java new file mode 100644 index 00000000..477c7d38 --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/Opt.java @@ -0,0 +1,173 @@ +/* + * Copyright 2022 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.driver; + +import static java.lang.System.err; +import static java.lang.System.exit; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Static options that determine the runtime behavior of the fuzzer, set via Java properties. + * + * <p>Each option corresponds to a command-line argument of the driver of the same name. + * + * <p>Every public field should be deeply immutable. + * + * <p>This class is loaded twice: As it is used in {@link FuzzTargetRunner}, it is loaded in the + * class loader that loads {@link Driver}. It is also used in + * {@link com.code_intelligence.jazzer.agent.Agent} after the agent JAR has been added to the + * bootstrap classpath and thus is loaded again in the bootstrap loader. This is not a problem since + * it only provides immutable fields and has no non-fatal side effects. + */ +public final class Opt { + private static final char SYSTEM_DELIMITER = + System.getProperty("os.name").startsWith("Windows") ? ';' : ':'; + + public static final String autofuzz = stringSetting("autofuzz", ""); + public static final List<String> autofuzzIgnore = stringListSetting("autofuzz_ignore", ','); + public static final String coverageDump = stringSetting("coverage_dump", ""); + public static final String coverageReport = stringSetting("coverage_report", ""); + public static final List<String> customHookIncludes = stringListSetting("custom_hook_includes"); + public static final List<String> customHookExcludes = stringListSetting("custom_hook_excludes"); + public static final List<String> customHooks = stringListSetting("custom_hooks"); + public static final List<String> disabledHooks = stringListSetting("disabled_hooks"); + public static final String dumpClassesDir = stringSetting("dump_classes_dir", ""); + public static final boolean hooks = boolSetting("hooks", true); + public static final String idSyncFile = stringSetting("id_sync_file", null); + public static final List<String> instrumentationIncludes = + stringListSetting("instrumentation_includes"); + public static final List<String> instrumentationExcludes = + stringListSetting("instrumentation_excludes"); + public static final Set<Long> ignore = + Collections.unmodifiableSet(stringListSetting("ignore", ',') + .stream() + .map(Long::parseUnsignedLong) + .collect(Collectors.toSet())); + public static final String reproducerPath = stringSetting("reproducer_path", "."); + public static final String targetClass = stringSetting("target_class", ""); + public static final List<String> trace = stringListSetting("trace"); + + // The values of these settings depend on autofuzz. + public static final List<String> targetArgs = autofuzz.isEmpty() + ? stringListSetting("target_args", ' ') + : Collections.unmodifiableList( + Stream.concat(Stream.of(autofuzz), autofuzzIgnore.stream()).collect(Collectors.toList())); + public static final long keepGoing = + uint64Setting("keep_going", autofuzz.isEmpty() ? 1 : Long.MAX_VALUE); + + // Default to false if hooks is false to mimic the original behavior of the native fuzz target + // runner, but still support hooks = false && dedup = true. + public static final boolean dedup = boolSetting("dedup", hooks); + + static { + if (!targetClass.isEmpty() && !autofuzz.isEmpty()) { + err.println("--target_class and --autofuzz cannot be specified together"); + exit(1); + } + if (!stringListSetting("target_args", ' ').isEmpty() && !autofuzz.isEmpty()) { + err.println("--target_args and --autofuzz cannot be specified together"); + exit(1); + } + if (autofuzz.isEmpty() && !autofuzzIgnore.isEmpty()) { + err.println("--autofuzz_ignore requires --autofuzz"); + exit(1); + } + if ((!ignore.isEmpty() || keepGoing > 1) && !dedup) { + // --autofuzz implicitly sets keepGoing to Integer.MAX_VALUE. + err.println("--nodedup is not supported with --ignore, --keep_going, or --autofuzz"); + exit(1); + } + } + + private static final String optionsPrefix = "jazzer."; + + private static String stringSetting(String name, String defaultValue) { + return System.getProperty(optionsPrefix + name, defaultValue); + } + + private static List<String> stringListSetting(String name) { + return stringListSetting(name, SYSTEM_DELIMITER); + } + + private static List<String> stringListSetting(String name, char separator) { + String value = System.getProperty(optionsPrefix + name); + if (value == null || value.isEmpty()) { + return Collections.emptyList(); + } + return splitOnUnescapedSeparator(value, separator); + } + + private static boolean boolSetting(String name, boolean defaultValue) { + String value = System.getProperty(optionsPrefix + name); + if (value == null) { + return defaultValue; + } + return Boolean.parseBoolean(value); + } + + private static long uint64Setting(String name, long defaultValue) { + String value = System.getProperty(optionsPrefix + name); + if (value == null) { + return defaultValue; + } + return Long.parseUnsignedLong(value, 10); + } + + /** + * Split value into non-empty takens separated by separator. Backslashes can be used to escape + * separators (or backslashes). + * + * @param value the string to split + * @param separator a single character to split on (backslash is not allowed) + * @return an immutable list of tokens obtained by splitting value on separator + */ + static List<String> splitOnUnescapedSeparator(String value, char separator) { + if (separator == '\\') { + throw new IllegalArgumentException("separator '\\' is not supported"); + } + ArrayList<String> tokens = new ArrayList<>(); + StringBuilder currentToken = new StringBuilder(); + boolean inEscapeState = false; + for (int pos = 0; pos < value.length(); pos++) { + char c = value.charAt(pos); + if (inEscapeState) { + currentToken.append(c); + inEscapeState = false; + } else if (c == '\\') { + inEscapeState = true; + } else if (c == separator) { + // Do not emit empty tokens between consecutive separators. + if (currentToken.length() > 0) { + tokens.add(currentToken.toString()); + } + currentToken.setLength(0); + } else { + currentToken.append(c); + } + } + if (currentToken.length() > 0) { + tokens.add(currentToken.toString()); + } + return Collections.unmodifiableList(tokens); + } +} diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/Reproducer.java.tmpl b/driver/src/main/java/com/code_intelligence/jazzer/driver/Reproducer.java.tmpl new file mode 100644 index 00000000..d9cb1e9e --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/Reproducer.java.tmpl @@ -0,0 +1,28 @@ +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +public class Crash_%1$s { + static final String base64Bytes = String.join("", "%2$s"); + + public static void main(String[] args) throws Throwable { + ClassLoader.getSystemClassLoader().setDefaultAssertionStatus(true); + try { + Method fuzzerInitialize = %3$s.class.getMethod("fuzzerInitialize"); + fuzzerInitialize.invoke(null); + } catch (NoSuchMethodException ignored) { + try { + Method fuzzerInitialize = %3$s.class.getMethod("fuzzerInitialize", String[].class); + fuzzerInitialize.invoke(null, (Object) args); + } catch (NoSuchMethodException ignored1) { + } catch (IllegalAccessException | InvocationTargetException e) { + e.printStackTrace(); + System.exit(1); + } + } catch (IllegalAccessException | InvocationTargetException e) { + e.printStackTrace(); + System.exit(1); + } + %4$s + %3$s.fuzzerTestOneInput(input); + } +} diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/ReproducerTemplate.java b/driver/src/main/java/com/code_intelligence/jazzer/driver/ReproducerTemplate.java new file mode 100644 index 00000000..0c7721cf --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/ReproducerTemplate.java @@ -0,0 +1,85 @@ +/* + * Copyright 2022 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.driver; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.stream.Collectors; + +final class ReproducerTemplate { + // A constant pool CONSTANT_Utf8_info entry should be able to hold data of size + // uint16, but somehow this does not seem to be the case and leads to invalid + // code crash reproducer code. Reducing the size by one resolves the problem. + private static final int DATA_CHUNK_MAX_LENGTH = Short.MAX_VALUE - 1; + private static final String RAW_BYTES_INPUT = + "byte[] input = java.util.Base64.getDecoder().decode(base64Bytes);"; + private static final String FUZZED_DATA_PROVIDER_INPUT = + "com.code_intelligence.jazzer.api.CannedFuzzedDataProvider input = new com.code_intelligence.jazzer.api.CannedFuzzedDataProvider(base64Bytes);"; + + private final String targetClass; + private final boolean useFuzzedDataProvider; + + public ReproducerTemplate(String targetClass, boolean useFuzzedDataProvider) { + this.targetClass = targetClass; + this.useFuzzedDataProvider = useFuzzedDataProvider; + } + + /** + * Emits a Java reproducer to {@code Crash_HASH.java} in {@code Opt.reproducerPath}. + * + * @param data the Base64-encoded data to emit as a string literal + * @param sha the SHA1 hash of the raw fuzzer input + */ + public void dumpReproducer(String data, String sha) { + String targetArg = useFuzzedDataProvider ? FUZZED_DATA_PROVIDER_INPUT : RAW_BYTES_INPUT; + String template = new BufferedReader( + new InputStreamReader(ReproducerTemplate.class.getResourceAsStream("Reproducer.java.tmpl"), + StandardCharsets.UTF_8)) + .lines() + .collect(Collectors.joining("\n")); + String chunkedData = chunkStringLiteral(data); + String javaSource = String.format(template, sha, chunkedData, targetClass, targetArg); + Path javaPath = Paths.get(Opt.reproducerPath, String.format("Crash_%s.java", sha)); + try { + Files.write(javaPath, javaSource.getBytes(StandardCharsets.UTF_8), StandardOpenOption.CREATE); + } catch (IOException e) { + System.err.printf("ERROR: Failed to write Java reproducer to %s%n", javaPath); + e.printStackTrace(); + } + System.out.printf( + "reproducer_path='%s'; Java reproducer written to %s%n", Opt.reproducerPath, javaPath); + } + + // The serialization of recorded FuzzedDataProvider invocations can get too long to be emitted + // into the template as a single String literal. This is mitigated by chunking the data and + // concatenating it again in the generated code. + private String chunkStringLiteral(String data) { + ArrayList<String> chunks = new ArrayList<>(); + for (int i = 0; i <= data.length() / DATA_CHUNK_MAX_LENGTH; i++) { + chunks.add(data.substring( + i * DATA_CHUNK_MAX_LENGTH, Math.min((i + 1) * DATA_CHUNK_MAX_LENGTH, data.length()))); + } + return String.join("\", \"", chunks); + } +} diff --git a/driver/src/main/java/com/code_intelligence/jazzer/driver/Utils.java b/driver/src/main/java/com/code_intelligence/jazzer/driver/Utils.java new file mode 100644 index 00000000..37eb1d0f --- /dev/null +++ b/driver/src/main/java/com/code_intelligence/jazzer/driver/Utils.java @@ -0,0 +1,39 @@ +/* + * Copyright 2022 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.driver; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; + +public class Utils { + /** + * Convert the arguments to UTF8 before passing them on to JNI as there are no JNI functions to + * get (unmodified) UTF-8 out of a jstring. + */ + static byte[][] toNativeArgs(Collection<String> args) { + return args.stream().map(str -> str.getBytes(StandardCharsets.UTF_8)).toArray(byte[][] ::new); + } + + static List<String> fromNativeArgs(byte[][] args) { + return Arrays.stream(args) + .map(bytes -> new String(bytes, StandardCharsets.UTF_8)) + .collect(Collectors.toList()); + } +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel b/driver/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel new file mode 100644 index 00000000..863a1875 --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel @@ -0,0 +1,124 @@ +load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library") +load("//bazel:compat.bzl", "SKIP_ON_WINDOWS") + +cc_jni_library( + name = "jazzer_driver", + visibility = [ + "//agent/src/jmh:__subpackages__", + "//agent/src/test:__subpackages__", + "//driver/src/main/java/com/code_intelligence/jazzer/driver:__pkg__", + "//driver/src/test:__subpackages__", + ], + deps = [ + ":jazzer_driver_lib", + "@jazzer_libfuzzer//:libfuzzer_no_main", + ] + select({ + # Windows doesn't have a concept analogous to RTLD_GLOBAL. + "@platforms//os:windows": [], + "//conditions:default": [":trigger_driver_hooks_load"], + }), +) + +cc_library( + name = "jazzer_driver_lib", + visibility = ["//driver/src/test/native/com/code_intelligence/jazzer/driver/mocks:__pkg__"], + deps = [ + ":coverage_tracker", + ":fuzz_target_runner", + ":fuzzed_data_provider", + ":jazzer_fuzzer_callbacks", + ":libfuzzer_callbacks", + ], +) + +cc_library( + name = "coverage_tracker", + srcs = ["coverage_tracker.cpp"], + hdrs = ["coverage_tracker.h"], + deps = ["//agent/src/main/java/com/code_intelligence/jazzer/runtime:coverage_map.hdrs"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzz_target_runner", + srcs = ["fuzz_target_runner.cpp"], + hdrs = ["fuzz_target_runner.h"], + linkopts = select({ + "@platforms//os:windows": [], + "//conditions:default": ["-ldl"], + }), + deps = [ + ":sanitizer_symbols", + "//driver/src/main/java/com/code_intelligence/jazzer/driver:fuzz_target_runner.hdrs", + ], + # With sanitizers, symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzzed_data_provider", + srcs = ["fuzzed_data_provider.cpp"], + visibility = [ + "//driver:__pkg__", + ], + deps = [ + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:fuzzed_data_provider.hdrs", + ], + # Symbols may only be referenced dynamically via JNI. + alwayslink = True, +) + +cc_jni_library( + name = "fuzzed_data_provider_standalone", + visibility = ["//agent/src/main/java/com/code_intelligence/jazzer/replay:__pkg__"], + deps = [":fuzzed_data_provider"], +) + +cc_library( + name = "jazzer_fuzzer_callbacks", + srcs = ["jazzer_fuzzer_callbacks.cpp"], + deps = [ + ":sanitizer_hooks_with_pc", + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + ], + alwayslink = True, +) + +cc_library( + name = "libfuzzer_callbacks", + srcs = ["libfuzzer_callbacks.cpp"], + deps = [ + "//agent/src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + "@com_google_absl//absl/strings", + ], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "trigger_driver_hooks_load", + srcs = ["trigger_driver_hooks_load.cpp"], + linkopts = ["-ldl"], + target_compatible_with = SKIP_ON_WINDOWS, + deps = ["@fmeum_rules_jni//jni"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "sanitizer_hooks_with_pc", + hdrs = ["sanitizer_hooks_with_pc.h"], + visibility = [ + "//agent/src/jmh/native:__subpackages__", + "//driver:__pkg__", + "//driver/src/test/native/com/code_intelligence/jazzer/driver:__pkg__", + ], +) + +cc_library( + name = "sanitizer_symbols", + srcs = ["sanitizer_symbols.cpp"], + # Symbols are referenced dynamically by libFuzzer. + alwayslink = True, +) diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp new file mode 100644 index 00000000..dc8349d4 --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp @@ -0,0 +1,114 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "coverage_tracker.h" + +#include <jni.h> + +#include <stdexcept> +#include <vector> + +#include "com_code_intelligence_jazzer_runtime_CoverageMap.h" + +extern "C" void __sanitizer_cov_8bit_counters_init(uint8_t *start, + uint8_t *end); +extern "C" void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end); +extern "C" size_t __sanitizer_cov_get_observed_pcs(uintptr_t **pc_entries); + +namespace { +void AssertNoException(JNIEnv &env) { + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + throw std::runtime_error( + "Java exception occurred in CoverageTracker JNI code"); + } +} +} // namespace + +namespace jazzer { + +uint8_t *CoverageTracker::counters_ = nullptr; +PCTableEntry *CoverageTracker::pc_entries_ = nullptr; + +void CoverageTracker::Initialize(JNIEnv &env, jlong counters) { + if (counters_ != nullptr) { + throw std::runtime_error( + "CoverageTracker::Initialize must not be called more than once"); + } + counters_ = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(counters)); +} + +void CoverageTracker::RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters) { + if (counters_ == nullptr) { + throw std::runtime_error( + "CoverageTracker::Initialize should have been called first"); + } + if (new_num_counters < old_num_counters) { + throw std::runtime_error( + "new_num_counters must not be smaller than old_num_counters"); + } + if (new_num_counters == old_num_counters) { + return; + } + std::size_t diff_num_counters = new_num_counters - old_num_counters; + // libFuzzer requires an array containing the instruction addresses associated + // with the coverage counters registered above. This is required to report how + // many edges have been covered. However, libFuzzer only checks these + // addresses when the corresponding flag is set to 1. Therefore, it is safe to + // set the all PC entries to any value as long as the corresponding flag is + // set to zero. We set the value of each PC to the index of the corresponding + // edge ID. This facilitates finding the edge ID of each covered PC reported + // by libFuzzer. + pc_entries_ = new PCTableEntry[diff_num_counters]; + for (std::size_t i = 0; i < diff_num_counters; ++i) { + pc_entries_[i] = {i, 0}; + } + __sanitizer_cov_8bit_counters_init(counters_ + old_num_counters, + counters_ + new_num_counters); + __sanitizer_cov_pcs_init((uintptr_t *)(pc_entries_), + (uintptr_t *)(pc_entries_ + diff_num_counters)); +} +} // namespace jazzer + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_initialize( + JNIEnv *env, jclass, jlong counters) { + ::jazzer::CoverageTracker::Initialize(*env, counters); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_registerNewCounters( + JNIEnv *env, jclass, jint old_num_counters, jint new_num_counters) { + ::jazzer::CoverageTracker::RegisterNewCounters(*env, old_num_counters, + new_num_counters); +} + +[[maybe_unused]] jintArray +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_getEverCoveredIds( + JNIEnv *env, jclass) { + uintptr_t *covered_pcs; + jint num_covered_pcs = __sanitizer_cov_get_observed_pcs(&covered_pcs); + std::vector<jint> covered_edge_ids(covered_pcs, + covered_pcs + num_covered_pcs); + delete[] covered_pcs; + + jintArray covered_edge_ids_jni = env->NewIntArray(num_covered_pcs); + AssertNoException(*env); + env->SetIntArrayRegion(covered_edge_ids_jni, 0, num_covered_pcs, + covered_edge_ids.data()); + AssertNoException(*env); + return covered_edge_ids_jni; +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h b/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h new file mode 100644 index 00000000..8cceceed --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h @@ -0,0 +1,42 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <jni.h> + +#include <string> + +namespace jazzer { + +// The members of this struct are only accessed by libFuzzer. +struct __attribute__((packed)) PCTableEntry { + [[maybe_unused]] uintptr_t PC, PCFlags; +}; + +// CoverageTracker registers an array of 8-bit coverage counters with +// libFuzzer. The array is populated from Java using Unsafe. +class CoverageTracker { + private: + static uint8_t *counters_; + static PCTableEntry *pc_entries_; + + public: + static void Initialize(JNIEnv &env, jlong counters); + static void RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters); +}; +} // namespace jazzer diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp new file mode 100644 index 00000000..6231af09 --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp @@ -0,0 +1,176 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * A native wrapper around the FuzzTargetRunner Java class that executes it as a + * libFuzzer fuzz target. + */ + +#include "fuzz_target_runner.h" + +#ifndef _WIN32 +#include <dlfcn.h> +#endif +#include <jni.h> + +#include <iostream> +#include <limits> +#include <string> +#include <vector> + +#include "com_code_intelligence_jazzer_driver_FuzzTargetRunner.h" + +extern "C" int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, + size_t Size)); + +namespace { +jclass gRunner; +jmethodID gRunOneId; +JavaVM *gJavaVm; +JNIEnv *gEnv; + +// A libFuzzer-registered callback that outputs the crashing input, but does +// not include a stack trace. +void (*gLibfuzzerPrintCrashingInput)() = nullptr; + +int testOneInput(const uint8_t *data, const std::size_t size) { + JNIEnv &env = *gEnv; + jint jsize = + std::min(size, static_cast<size_t>(std::numeric_limits<jint>::max())); + int res = env.CallStaticIntMethod(gRunner, gRunOneId, data, jsize); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return res; +} +} // namespace + +namespace jazzer { +void DumpJvmStackTraces() { + JNIEnv *env = nullptr; + if (gJavaVm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) != + JNI_OK) { + std::cerr << "WARN: AttachCurrentThread failed in DumpJvmStackTraces" + << std::endl; + return; + } + jmethodID dumpStack = + env->GetStaticMethodID(gRunner, "dumpAllStackTraces", "()V"); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + env->CallStaticVoidMethod(gRunner, dumpStack); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + // Do not detach as we may be the main thread (but the JVM exits anyway). +} +} // namespace jazzer + +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_driver_FuzzTargetRunner_startLibFuzzer( + JNIEnv *env, jclass runner, jobjectArray args) { + gEnv = env; + env->GetJavaVM(&gJavaVm); + gRunner = reinterpret_cast<jclass>(env->NewGlobalRef(runner)); + gRunOneId = env->GetStaticMethodID(runner, "runOne", "(JI)I"); + if (gRunOneId == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + + int argc = env->GetArrayLength(args); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + std::vector<std::string> argv_strings; + std::vector<const char *> argv_c; + for (jsize i = 0; i < argc; i++) { + auto arg_jni = + reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(args, i)); + if (arg_jni == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + jbyte *arg_c = env->GetByteArrayElements(arg_jni, nullptr); + if (arg_c == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + std::size_t arg_size = env->GetArrayLength(arg_jni); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + argv_strings.emplace_back(reinterpret_cast<const char *>(arg_c), arg_size); + env->ReleaseByteArrayElements(arg_jni, arg_c, JNI_ABORT); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + } + for (jsize i = 0; i < argc; i++) { + argv_c.emplace_back(argv_strings[i].c_str()); + } + // Null-terminate argv. + argv_c.emplace_back(nullptr); + + const char **argv = argv_c.data(); + return LLVMFuzzerRunDriver(&argc, const_cast<char ***>(&argv), testOneInput); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_FuzzTargetRunner_printCrashingInput( + JNIEnv *, jclass) { + if (gLibfuzzerPrintCrashingInput == nullptr) { + std::cerr << "<not available>" << std::endl; + } else { + gLibfuzzerPrintCrashingInput(); + } +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_FuzzTargetRunner__1Exit( + JNIEnv *, jclass, jint exit_code) { + _Exit(exit_code); +} + +// We apply a patch to libFuzzer to make it call this function instead of +// __sanitizer_set_death_callback to pass us the death callback. +extern "C" [[maybe_unused]] void __jazzer_set_death_callback( + void (*callback)()) { + gLibfuzzerPrintCrashingInput = callback; +#ifndef _WIN32 + void *sanitizer_set_death_callback = + dlsym(RTLD_DEFAULT, "__sanitizer_set_death_callback"); + if (sanitizer_set_death_callback != nullptr) { + (reinterpret_cast<void (*)(void (*)())>(sanitizer_set_death_callback))( + []() { + ::jazzer::DumpJvmStackTraces(); + gLibfuzzerPrintCrashingInput(); + // Ideally, we would be able to perform a graceful shutdown of the + // JVM. However, doing this directly results in a nested bug report by + // ASan or UBSan, likely because something about the stack/thread + // context in which they generate reports is incompatible with the JVM + // shutdown process. use_sigaltstack=0 does not help though, so this + // might be on us. + }); + } +#endif +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h new file mode 100644 index 00000000..0e8846c0 --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h @@ -0,0 +1,28 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <jni.h> + +namespace jazzer { +/* + * Print the stack traces of all active JVM threads. + * + * This function can be called from any thread. + */ +void DumpJvmStackTraces(); +} // namespace jazzer diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp new file mode 100644 index 00000000..494bb9e8 --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp @@ -0,0 +1,692 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://raw.githubusercontent.com/google/atheris/034284dc4bb1ad4f4ab6ba5d34fb4dca7c633660/fuzzed_data_provider.cc +// +// Original license and copyright notices: +// +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://github.com/llvm/llvm-project/blob/70de7e0d9a95b7fcd7c105b06bd90fdf4e01f563/compiler-rt/include/fuzzer/FuzzedDataProvider.h +// +// Original license and copyright notices: +// +//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#include <algorithm> +#include <cstdint> +#include <limits> +#include <string> +#include <tuple> +#include <type_traits> + +#include "com_code_intelligence_jazzer_runtime_FuzzedDataProviderImpl.h" + +namespace { + +jfieldID gDataPtrField = nullptr; +jfieldID gRemainingBytesField = nullptr; + +void ThrowIllegalArgumentException(JNIEnv &env, const std::string &message) { + jclass illegal_argument_exception = + env.FindClass("java/lang/IllegalArgumentException"); + env.ThrowNew(illegal_argument_exception, message.c_str()); +} + +template <typename T> +struct JniArrayType {}; + +#define JNI_ARRAY_TYPE(lower_case, sentence_case) \ + template <> \ + struct JniArrayType<j##lower_case> { \ + typedef j##lower_case type; \ + typedef j##lower_case##Array array_type; \ + static constexpr array_type (JNIEnv::*kNewArrayFunc)(jsize) = \ + &JNIEnv::New##sentence_case##Array; \ + static constexpr void (JNIEnv::*kSetArrayRegionFunc)( \ + array_type array, jsize start, jsize len, \ + const type *buf) = &JNIEnv::Set##sentence_case##ArrayRegion; \ + }; + +JNI_ARRAY_TYPE(boolean, Boolean); +JNI_ARRAY_TYPE(byte, Byte); +JNI_ARRAY_TYPE(short, Short); +JNI_ARRAY_TYPE(int, Int); +JNI_ARRAY_TYPE(long, Long); + +template <typename T> +typename JniArrayType<T>::array_type JNICALL +ConsumeIntegralArray(JNIEnv &env, jobject self, jint max_length) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + // Arrays of integral types are considered data and thus consumed from the + // beginning of the buffer. + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + jint max_num_bytes = + std::min(static_cast<jint>(sizeof(T)) * max_length, remainingBytes); + jsize actual_length = max_num_bytes / sizeof(T); + jint actual_num_bytes = sizeof(T) * actual_length; + auto array = (env.*(JniArrayType<T>::kNewArrayFunc))(actual_length); + (env.*(JniArrayType<T>::kSetArrayRegionFunc))( + array, 0, actual_length, reinterpret_cast<const T *>(dataPtr)); + + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + actual_num_bytes)); + env.SetIntField(self, gRemainingBytesField, + remainingBytes - actual_num_bytes); + + return array; +} + +template <typename T> +jbyteArray JNICALL ConsumeRemainingAsArray(JNIEnv &env, jobject self) { + return ConsumeIntegralArray<T>(env, self, std::numeric_limits<jint>::max()); +} + +template <typename T> +T JNICALL ConsumeIntegralInRange(JNIEnv &env, jobject self, T min, T max) { + uint64_t range = static_cast<uint64_t>(max) - min; + uint64_t result = 0; + jint offset = 0; + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + while (offset < 8 * sizeof(T) && (range >> offset) > 0 && + remainingBytes != 0) { + --remainingBytes; + result = (result << 8u) | dataPtr[remainingBytes]; + offset += 8; + } + + env.SetIntField(self, gRemainingBytesField, remainingBytes); + // dataPtr hasn't been modified, so we don't need to update gDataPtrField. + + if (range != std::numeric_limits<T>::max()) + // We accept modulo bias in favor of reading a dynamic number of bytes as + // this would make it harder for the fuzzer to mutate towards values from + // the table of recent compares. + result = result % (range + 1); + + return static_cast<T>(min + result); +} + +template <typename T> +T JNICALL ConsumeIntegral(JNIEnv &env, jobject self) { + // First generate an unsigned value and then (safely) cast it to a signed + // integral type. By doing this rather than calling ConsumeIntegralInRange + // with bounds [signed_min, signed_max], we ensure that there is a direct + // correspondence between the consumed raw bytes and the result (e.g., 0 + // corresponds to 0 and not to signed_min). This should help mutating + // towards entries of the table of recent compares. + using UnsignedT = typename std::make_unsigned<T>::type; + static_assert( + std::numeric_limits<UnsignedT>::is_modulo, + "Unsigned to signed conversion requires modulo-based overflow handling"); + return static_cast<T>(ConsumeIntegralInRange<UnsignedT>( + env, self, 0, std::numeric_limits<UnsignedT>::max())); +} + +bool JNICALL ConsumeBool(JNIEnv &env, jobject self) { + return ConsumeIntegral<uint8_t>(env, self) & 1u; +} + +jchar ConsumeCharInternal(JNIEnv &env, jobject self, bool filter_surrogates) { + auto raw_codepoint = ConsumeIntegral<jchar>(env, self); + if (filter_surrogates && raw_codepoint >= 0xd800 && raw_codepoint < 0xe000) + raw_codepoint -= 0xd800; + return raw_codepoint; +} + +jchar JNICALL ConsumeChar(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, false); +} + +jchar JNICALL ConsumeCharNoSurrogates(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, true); +} + +template <typename T> +T JNICALL ConsumeProbability(JNIEnv &env, jobject self) { + using IntegralType = + typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, + uint64_t>::type; + T result = static_cast<T>(ConsumeIntegral<IntegralType>(env, self)); + result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); + return result; +} + +template <typename T> +T JNICALL ConsumeFloatInRange(JNIEnv &env, jobject self, T min, T max) { + T range; + T result = min; + + // Deal with overflow, in the event min and max are very far apart + if (min < 0 && max > 0 && min + std::numeric_limits<T>::max() < max) { + range = (max / 2) - (min / 2); + if (ConsumeBool(env, self)) { + result += range; + } + } else { + range = max - min; + } + + T probability = ConsumeProbability<T>(env, self); + return result + range * probability; +} + +template <typename T> +T JNICALL ConsumeRegularFloat(JNIEnv &env, jobject self) { + return ConsumeFloatInRange(env, self, std::numeric_limits<T>::lowest(), + std::numeric_limits<T>::max()); +} + +template <typename T> +T JNICALL ConsumeFloat(JNIEnv &env, jobject self) { + if (env.GetIntField(self, gRemainingBytesField) == 0) return 0.0; + + auto type_val = ConsumeIntegral<uint8_t>(env, self); + + if (type_val <= 10) { + // Consume the same amount of bytes as for a regular float/double + ConsumeRegularFloat<T>(env, self); + + switch (type_val) { + case 0: + return 0.0; + case 1: + return -0.0; + case 2: + return std::numeric_limits<T>::infinity(); + case 3: + return -std::numeric_limits<T>::infinity(); + case 4: + return std::numeric_limits<T>::quiet_NaN(); + case 5: + return std::numeric_limits<T>::denorm_min(); + case 6: + return -std::numeric_limits<T>::denorm_min(); + case 7: + return std::numeric_limits<T>::min(); + case 8: + return -std::numeric_limits<T>::min(); + case 9: + return std::numeric_limits<T>::max(); + case 10: + return -std::numeric_limits<T>::max(); + default: + abort(); + } + } + + T regular = ConsumeRegularFloat<T>(env, self); + return regular; +} + +// Polyfill for C++20 std::countl_one, which counts the number of leading ones +// in an unsigned integer. +inline __attribute__((always_inline)) uint8_t countl_one(uint8_t byte) { + // The result of __builtin_clz is undefined for 0. + if (byte == 0xFF) return 8; + return __builtin_clz(static_cast<uint8_t>(~byte)) - 24; +} + +// Forces a byte to be a valid UTF-8 continuation byte. +inline __attribute__((always_inline)) void ForceContinuationByte( + uint8_t &byte) { + byte = (byte | (1u << 7u)) & ~(1u << 6u); +} + +constexpr uint8_t kTwoByteZeroLeadingByte = 0b11000000; +constexpr uint8_t kTwoByteZeroContinuationByte = 0b10000000; +constexpr uint8_t kThreeByteLowLeadingByte = 0b11100000; +constexpr uint8_t kSurrogateLeadingByte = 0b11101101; + +enum class Utf8GenerationState { + LeadingByte_Generic, + LeadingByte_AfterBackslash, + ContinuationByte_Generic, + ContinuationByte_LowLeadingByte, + FirstContinuationByte_LowLeadingByte, + FirstContinuationByte_SurrogateLeadingByte, + FirstContinuationByte_Generic, + SecondContinuationByte_Generic, + LeadingByte_LowSurrogate, + FirstContinuationByte_LowSurrogate, + SecondContinuationByte_HighSurrogate, + SecondContinuationByte_LowSurrogate, +}; + +// Consumes up to `max_bytes` arbitrary bytes pointed to by `ptr` and returns a +// valid "modified UTF-8" string of length at most `max_length` that resembles +// the input bytes as closely as possible as well as the number of consumed +// bytes. If `stop_on_slash` is true, then the string will end on the first +// single consumed '\'. +// +// "Modified UTF-8" is the string encoding used by the JNI. It is the same as +// the legacy encoding CESU-8, but with `\0` coded on two bytes. In these +// encodings, code points requiring 4 bytes in modern UTF-8 are represented as +// two surrogates, each of which is coded on 3 bytes. +// +// This function has been designed with the following goals in mind: +// 1. The generated string should be biased towards containing ASCII characters +// as these are often the ones that affect control flow directly. +// 2. Correctly encoded data (e.g. taken from the table of recent compares) +// should be emitted unchanged. +// 3. The raw fuzzer input should be preserved as far as possible, but the +// output must always be correctly encoded. +// +// The JVM accepts string in two encodings: UTF-16 and modified UTF-8. +// Generating UTF-16 would make it harder to fulfill the first design goal and +// would potentially hinder compatibility with corpora using the much more +// widely used UTF-8 encoding, which is reasonably similar to modified UTF-8. As +// a result, this function uses modified UTF-8. +// +// See Algorithm 1 of https://arxiv.org/pdf/2010.03090.pdf for more details on +// the individual cases involved in determining the validity of a UTF-8 string. +template <bool ascii_only, bool stop_on_backslash> +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, + jint max_length) { + std::string str; + // Every character in modified UTF-8 is coded on at most six bytes. Every + // consumed byte is transformed into at most one code unit, except for the + // case of a zero byte which requires two bytes. + if (ascii_only) { + str.reserve(std::min(2 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } else { + str.reserve(std::min(6 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } + + Utf8GenerationState state = Utf8GenerationState::LeadingByte_Generic; + const uint8_t *pos = data; + const auto data_end = data + max_bytes; + for (jint length = 0; length < max_length && pos != data_end; ++pos) { + uint8_t c = *pos; + if (ascii_only) { + // Clamp to 7-bit ASCII range. + c &= 0x7Fu; + } + // Fix up c or previously read bytes according to the value of c and the + // current state. In the end, add the fixed up code unit c to the string. + // Exception: The zero character has to be coded on two bytes and is the + // only case in which an iteration of the loop adds two code units. + switch (state) { + case Utf8GenerationState::LeadingByte_Generic: { + switch (ascii_only ? 0 : countl_one(c)) { + case 0: { + // valid - 1-byte code point (ASCII) + // The zero character has to be coded on two bytes in modified + // UTF-8. + if (c == 0) { + str += static_cast<char>(kTwoByteZeroLeadingByte); + c = kTwoByteZeroContinuationByte; + } else if (stop_on_backslash && c == '\\') { + state = Utf8GenerationState::LeadingByte_AfterBackslash; + // The slash either signals the end of the string or is skipped, + // so don't append anything. + continue; + } + // Remain in state LeadingByte. + ++length; + break; + } + case 1: { + // invalid - continuation byte at leader byte position + // Fix it up to be of the form 0b110XXXXX and fall through to the + // case of a 2-byte sequence. + c |= 1u << 6u; + c &= ~(1u << 5u); + [[fallthrough]]; + } + case 2: { + // (most likely) valid - start of a 2-byte sequence + // ASCII characters must be coded on a single byte, so we must + // ensure that the lower two bits combined with the six non-header + // bits of the following byte do not form a 7-bit ASCII value. This + // could only be the case if at most the lowest bit is set. + if ((c & 0b00011110u) == 0) { + state = Utf8GenerationState::ContinuationByte_LowLeadingByte; + } else { + state = Utf8GenerationState::ContinuationByte_Generic; + } + break; + } + // The default case falls through to the case of three leading ones + // coming right after. + default: { + // invalid - at least four leading ones + // In the case of exactly four leading ones, this would be valid + // UTF-8, but is not valid in the JVM's modified UTF-8 encoding. + // Fix it up by clearing the fourth leading one and falling through + // to the 3-byte case. + c &= ~(1u << 4u); + [[fallthrough]]; + } + case 3: { + // valid - start of a 3-byte sequence + if (c == kThreeByteLowLeadingByte) { + state = Utf8GenerationState::FirstContinuationByte_LowLeadingByte; + } else if (c == kSurrogateLeadingByte) { + state = Utf8GenerationState:: + FirstContinuationByte_SurrogateLeadingByte; + } else { + state = Utf8GenerationState::FirstContinuationByte_Generic; + } + break; + } + } + break; + } + case Utf8GenerationState::LeadingByte_AfterBackslash: { + if (c != '\\') { + // Mark the current byte as consumed. + ++pos; + goto done; + } + // A double backslash is consumed as a single one. As we skipped the + // first one, emit the second one as usual. + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Preserve the zero character, which is coded on two bytes in modified + // UTF-8. In all other cases ensure that we are not incorrectly encoding + // an ASCII character on two bytes by setting the eigth least + // significant bit of the encoded value (second least significant bit of + // the leading byte). + auto previous_c = static_cast<uint8_t>(str.back()); + if (previous_c != kTwoByteZeroLeadingByte || + c != kTwoByteZeroContinuationByte) { + str.back() = static_cast<char>(previous_c | (1u << 1u)); + } + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Ensure that the current code point could not have been coded on two + // bytes. As two bytes encode up to 11 bits and three bytes encode up + // to 16 bits, we thus have to make it such that the five highest bits + // are not all zero. Four of these bits are the non-header bits of the + // leader byte. Thus, set the highest non-header bit in this byte (fifth + // highest in the encoded value). + c |= 1u << 5u; + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: { + ForceContinuationByte(c); + if (c & (1u << 5u)) { + // Start with a high surrogate (0xD800-0xDBFF). c contains the second + // byte and the first two bits of the third byte. The first two bits + // of this second byte are fixed to 10 (in 0x8-0xB). + c |= 1u << 5u; + c &= ~(1u << 4u); + // The high surrogate must be followed by a low surrogate. + state = Utf8GenerationState::SecondContinuationByte_HighSurrogate; + } else { + state = Utf8GenerationState::SecondContinuationByte_Generic; + } + break; + } + case Utf8GenerationState::FirstContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_LowSurrogate; + ++length; + break; + } + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + case Utf8GenerationState::SecondContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::LeadingByte_LowSurrogate: { + // We have to emit a low surrogate leading byte, which is a fixed value. + // We still consume a byte from the input to make fuzzer changes more + // stable and preserve valid surrogate pairs picked up from e.g. the + // table of recent compares. + c = kSurrogateLeadingByte; + state = Utf8GenerationState::FirstContinuationByte_LowSurrogate; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: { + ForceContinuationByte(c); + // Low surrogates are code points in the range 0xDC00-0xDFFF. c contains + // the second byte and the first two bits of the third byte. The first + // two bits of this second byte are fixed to 11 (in 0xC-0xF). + c |= (1u << 5u) | (1u << 4u); + // The second continuation byte of a low surrogate is not restricted, + // but we need to track it differently to allow for correct backtracking + // if it isn't completed. + state = Utf8GenerationState::SecondContinuationByte_LowSurrogate; + break; + } + } + str += static_cast<uint8_t>(c); + } + + // Backtrack the current incomplete character. + switch (state) { + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::SecondContinuationByte_Generic: + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::ContinuationByte_Generic: + case Utf8GenerationState::ContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_Generic: + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_Generic: + case Utf8GenerationState::LeadingByte_AfterBackslash: + // No backtracking required. + break; + } + +done: + return std::make_pair(str, pos - data); +} +} // namespace + +namespace jazzer { +// Exposed for testing only. +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, jint max_length, + bool ascii_only, + bool stop_on_backslash) { + if (ascii_only) { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<true, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<true, false>(data, max_bytes, max_length); + } + } else { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<false, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<false, false>(data, max_bytes, max_length); + } + } +} +} // namespace jazzer + +namespace { +jstring ConsumeStringInternal(JNIEnv &env, jobject self, jint max_length, + bool ascii_only, bool stop_on_backslash) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + if (max_length == 0 || remainingBytes == 0) return env.NewStringUTF(""); + + if (remainingBytes == 1) { + env.SetIntField(self, gRemainingBytesField, 0); + return env.NewStringUTF(""); + } + + std::string str; + jint consumed_bytes; + std::tie(str, consumed_bytes) = jazzer::FixUpModifiedUtf8( + dataPtr, remainingBytes, max_length, ascii_only, stop_on_backslash); + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + consumed_bytes)); + env.SetIntField(self, gRemainingBytesField, remainingBytes - consumed_bytes); + return env.NewStringUTF(str.c_str()); +} + +jstring JNICALL ConsumeAsciiString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, true, true); +} + +jstring JNICALL ConsumeString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, false, true); +} + +jstring JNICALL ConsumeRemainingAsAsciiString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + true, false); +} + +jstring JNICALL ConsumeRemainingAsString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + false, false); +} + +std::size_t RemainingBytes(JNIEnv &env, jobject self) { + return env.GetIntField(self, gRemainingBytesField); +} + +const JNINativeMethod kFuzzedDataMethods[]{ + {(char *)"consumeBoolean", (char *)"()Z", (void *)&ConsumeBool}, + {(char *)"consumeByte", (char *)"()B", (void *)&ConsumeIntegral<jbyte>}, + {(char *)"consumeByteUnchecked", (char *)"(BB)B", + (void *)&ConsumeIntegralInRange<jbyte>}, + {(char *)"consumeShort", (char *)"()S", (void *)&ConsumeIntegral<jshort>}, + {(char *)"consumeShortUnchecked", (char *)"(SS)S", + (void *)&ConsumeIntegralInRange<jshort>}, + {(char *)"consumeInt", (char *)"()I", (void *)&ConsumeIntegral<jint>}, + {(char *)"consumeIntUnchecked", (char *)"(II)I", + (void *)&ConsumeIntegralInRange<jint>}, + {(char *)"consumeLong", (char *)"()J", (void *)&ConsumeIntegral<jlong>}, + {(char *)"consumeLongUnchecked", (char *)"(JJ)J", + (void *)&ConsumeIntegralInRange<jlong>}, + {(char *)"consumeFloat", (char *)"()F", (void *)&ConsumeFloat<jfloat>}, + {(char *)"consumeRegularFloat", (char *)"()F", + (void *)&ConsumeRegularFloat<jfloat>}, + {(char *)"consumeRegularFloatUnchecked", (char *)"(FF)F", + (void *)&ConsumeFloatInRange<jfloat>}, + {(char *)"consumeProbabilityFloat", (char *)"()F", + (void *)&ConsumeProbability<jfloat>}, + {(char *)"consumeDouble", (char *)"()D", (void *)&ConsumeFloat<jdouble>}, + {(char *)"consumeRegularDouble", (char *)"()D", + (void *)&ConsumeRegularFloat<jdouble>}, + {(char *)"consumeRegularDoubleUnchecked", (char *)"(DD)D", + (void *)&ConsumeFloatInRange<jdouble>}, + {(char *)"consumeProbabilityDouble", (char *)"()D", + (void *)&ConsumeProbability<jdouble>}, + {(char *)"consumeChar", (char *)"()C", (void *)&ConsumeChar}, + {(char *)"consumeCharUnchecked", (char *)"(CC)C", + (void *)&ConsumeIntegralInRange<jchar>}, + {(char *)"consumeCharNoSurrogates", (char *)"()C", + (void *)&ConsumeCharNoSurrogates}, + {(char *)"consumeAsciiString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeAsciiString}, + {(char *)"consumeRemainingAsAsciiString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsAsciiString}, + {(char *)"consumeString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeString}, + {(char *)"consumeRemainingAsString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsString}, + {(char *)"consumeBooleans", (char *)"(I)[Z", + (void *)&ConsumeIntegralArray<jboolean>}, + {(char *)"consumeBytes", (char *)"(I)[B", + (void *)&ConsumeIntegralArray<jbyte>}, + {(char *)"consumeShorts", (char *)"(I)[S", + (void *)&ConsumeIntegralArray<jshort>}, + {(char *)"consumeInts", (char *)"(I)[I", + (void *)&ConsumeIntegralArray<jint>}, + {(char *)"consumeLongs", (char *)"(I)[J", + (void *)&ConsumeIntegralArray<jlong>}, + {(char *)"consumeRemainingAsBytes", (char *)"()[B", + (void *)&ConsumeRemainingAsArray<jbyte>}, + {(char *)"remainingBytes", (char *)"()I", (void *)&RemainingBytes}, +}; +const jint kNumFuzzedDataMethods = + sizeof(kFuzzedDataMethods) / sizeof(kFuzzedDataMethods[0]); +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_FuzzedDataProviderImpl_nativeInit( + JNIEnv *env, jclass clazz) { + env->RegisterNatives(clazz, kFuzzedDataMethods, kNumFuzzedDataMethods); + gDataPtrField = env->GetFieldID(clazz, "dataPtr", "J"); + gRemainingBytesField = env->GetFieldID(clazz, "remainingBytes", "I"); +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp new file mode 100644 index 00000000..8764aaaa --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp @@ -0,0 +1,184 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <cstddef> +#include <cstdint> + +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" +#include "sanitizer_hooks_with_pc.h" + +namespace { + +extern "C" { +void __sanitizer_weak_hook_compare_bytes(void *caller_pc, const void *s1, + const void *s2, std::size_t n1, + std::size_t n2, int result); +void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, + const void *s2, size_t len2, void *result); +} + +inline __attribute__((always_inline)) void *idToPc(jint id) { + return reinterpret_cast<void *>(static_cast<uintptr_t>(id)); +} +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + JNIEnv *env, jclass cls, jbyteArray needle, jint id) { + jint needle_length = env->GetArrayLength(needle); + auto *needle_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(needle, nullptr)); + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); + env->ReleasePrimitiveArrayCritical(needle, needle_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + jint needle_length, jbyte *needle_native, jint id) { + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + JNIEnv *env, jclass cls, jbyteArray b1, jbyteArray b2, jint result, + jint id) { + jint b1_length = env->GetArrayLength(b1); + jint b2_length = env->GetArrayLength(b2); + auto *b1_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b1, nullptr)); + auto *b2_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b2, nullptr)); + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1_native, b2_native, + b1_length, b2_length, result); + env->ReleasePrimitiveArrayCritical(b1, b1_native, JNI_ABORT); + env->ReleasePrimitiveArrayCritical(b2, b2_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + jint b1_length, jbyte *b1, jint b2_length, jbyte *b2, jint result, + jint id) { + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1, b2, b1_length, b2_length, + result); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + JNIEnv *env, jclass cls, jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + JNIEnv *env, jclass cls, jlong switch_value, + jlongArray libfuzzer_case_values, jint id) { + auto *case_values = static_cast<jlong *>( + env->GetPrimitiveArrayCritical(libfuzzer_case_values, nullptr)); + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); + env->ReleasePrimitiveArrayCritical(libfuzzer_case_values, case_values, + JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + jlong switch_value, jint libfuzzer_case_values_length, jlong *case_values, + jint id) { + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + JNIEnv *env, jclass cls, jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + JNIEnv *env, jclass cls, jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + JNIEnv *env, jclass cls, jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + JNIEnv *env, jclass cls, jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp new file mode 100644 index 00000000..a20863fa --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp @@ -0,0 +1,129 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <mutex> +#include <utility> +#include <vector> + +#include "absl/strings/str_split.h" +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" + +namespace { +bool is_using_native_libraries = false; +std::once_flag ignore_list_flag; +std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges; + +/** + * Adds the address ranges of executable segmentes of the library lib_name to + * the ignorelist for C standard library function interception (strcmp, memcmp, + * ...). + */ +void ignoreLibraryForInterception(const std::string &lib_name) { + std::ifstream loaded_libs("/proc/self/maps"); + if (!loaded_libs) { + // This early exit is taken e.g. on macOS, where /proc does not exist. + return; + } + std::string line; + while (std::getline(loaded_libs, line)) { + if (!absl::StrContains(line, lib_name)) continue; + // clang-format off + // A typical line looks as follows: + // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so + // clang-format on + std::vector<std::string> parts = + absl::StrSplit(line, ' ', absl::SkipEmpty()); + if (parts.size() != 6) { + std::cout << "ERROR: Invalid format for /proc/self/maps\n" + << line << std::endl; + exit(1); + } + // Skip non-executable address rang"s. + if (!absl::StrContains(parts[1], "x")) continue; + std::string range_str = parts[0]; + std::vector<std::string> range = absl::StrSplit(range_str, "-"); + if (range.size() != 2) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + std::size_t pos; + auto start = std::stoull(range[0], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + auto end = std::stoull(range[1], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + ignore_for_interception_ranges.emplace_back(start, end); + } +} + +const std::vector<std::string> kLibrariesToIgnoreForInterception = { + // The driver executable itself can be treated just like a library. + "jazzer_driver", "libinstrument.so", "libjava.so", + "libjimage.so", "libjli.so", "libjvm.so", + "libnet.so", "libverify.so", "libzip.so", +}; +} // namespace + +extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc( + void *caller_pc) { + // If the fuzz target is not using native libraries, calls to strcmp, memcmp, + // etc. should never be intercepted. The values reported if they were at best + // duplicate the values received from our bytecode instrumentation and at + // worst pollute the table of recent compares with string internal to the JDK. + if (!is_using_native_libraries) return false; + // If the fuzz target is using native libraries, intercept calls only if they + // don't originate from those address ranges that are known to belong to the + // JDK. + return std::none_of( + ignore_for_interception_ranges.cbegin(), + ignore_for_interception_ranges.cend(), + [caller_pc](const std::pair<uintptr_t, uintptr_t> &range) { + uintptr_t start; + uintptr_t end; + std::tie(start, end) = range; + auto address = reinterpret_cast<uintptr_t>(caller_pc); + return start <= address && address <= end; + }); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_handleLibraryLoad( + JNIEnv *, jclass) { + std::call_once(ignore_list_flag, [] { + std::cout << "INFO: detected a native library load, enabling interception " + "for libc functions" + << std::endl; + for (const auto &lib_name : kLibrariesToIgnoreForInterception) + ignoreLibraryForInterception(lib_name); + // Enable the ignore list after it has been populated since vector is not + // thread-safe with respect to concurrent writes and reads. + is_using_native_libraries = true; + }); +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h b/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h new file mode 100644 index 00000000..be655adb --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h @@ -0,0 +1,49 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <cstdint> + +// This file declares variants of the libFuzzer compare, division, switch and +// gep hooks that accept an additional caller_pc argument that can be used to +// pass a custom value that is recorded as the caller's instruction pointer +// ("program counter"). This allows synthetic program counters obtained from +// Java coverage information to be used with libFuzzer's value profile, with +// which it records detailed information about the result of compares and +// associates it with particular coverage locations. +// +// Note: Only the lower 9 bits of the caller_pc argument are used by libFuzzer. +#ifdef __cplusplus +extern "C" { +#endif +void __sanitizer_cov_trace_cmp4_with_pc(void *caller_pc, uint32_t arg1, + uint32_t arg2); +void __sanitizer_cov_trace_cmp8_with_pc(void *caller_pc, uint64_t arg1, + uint64_t arg2); + +void __sanitizer_cov_trace_switch_with_pc(void *caller_pc, uint64_t val, + uint64_t *cases); + +void __sanitizer_cov_trace_div4_with_pc(void *caller_pc, uint32_t val); +void __sanitizer_cov_trace_div8_with_pc(void *caller_pc, uint64_t val); + +void __sanitizer_cov_trace_gep_with_pc(void *caller_pc, uintptr_t idx); + +void __sanitizer_cov_trace_pc_indir_with_pc(void *caller_pc, uintptr_t callee); +#ifdef __cplusplus +} +#endif diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp new file mode 100644 index 00000000..abc5f04e --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Suppress libFuzzer warnings about missing sanitizer methods in non-sanitizer +// builds. +extern "C" [[maybe_unused]] int __sanitizer_acquire_crash_state() { return 1; } + +namespace jazzer { +void DumpJvmStackTraces(); +} + +// Dump a JVM stack trace on timeouts. +extern "C" [[maybe_unused]] void __sanitizer_print_stack_trace() { + jazzer::DumpJvmStackTraces(); +} diff --git a/driver/src/main/native/com/code_intelligence/jazzer/driver/trigger_driver_hooks_load.cpp b/driver/src/main/native/com/code_intelligence/jazzer/driver/trigger_driver_hooks_load.cpp new file mode 100644 index 00000000..8e6d19ab --- /dev/null +++ b/driver/src/main/native/com/code_intelligence/jazzer/driver/trigger_driver_hooks_load.cpp @@ -0,0 +1,50 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <cstdlib> + +// The native driver binary, if used, forwards all calls to native libFuzzer +// hooks such as __sanitizer_cov_trace_cmp8 to the Jazzer JNI library. In order +// to load the hook symbols when the library is ready, it needs to be passed a +// handle - the JVM loads libraries with RTLD_LOCAL and thus their symbols +// wouldn't be found as part of the global lookup procedure. +jint JNI_OnLoad(JavaVM *, void *) { + Dl_info info; + + if (!dladdr(reinterpret_cast<const void *>(&JNI_OnLoad), &info) || + !info.dli_fname) { + fprintf(stderr, "Failed to determine our dli_fname\n"); + abort(); + } + + void *handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY); + if (handle == nullptr) { + fprintf(stderr, "Failed to dlopen self: %s\n", dlerror()); + abort(); + } + + void *register_hooks = dlsym(RTLD_DEFAULT, "jazzer_initialize_native_hooks"); + // We may be running without the native driver, so not finding this method is + // an expected error. + if (register_hooks) { + reinterpret_cast<void (*)(void *)>(register_hooks)(handle); + } + + dlclose(handle); + + return JNI_VERSION_1_8; +} |