diff options
Diffstat (limited to 'src/main/native/com/code_intelligence/jazzer/driver')
15 files changed, 1965 insertions, 0 deletions
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel new file mode 100644 index 00000000..27d8a1c5 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel @@ -0,0 +1,166 @@ +load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library") +load("//bazel:compat.bzl", "MULTI_PLATFORM", "SKIP_ON_WINDOWS") + +cc_jni_library( + name = "jazzer_driver", + platforms = MULTI_PLATFORM, + visibility = [ + "//src/jmh:__subpackages__", + "//src/main/java/com/code_intelligence/jazzer/driver:__pkg__", + "//src/main/java/com/code_intelligence/jazzer/junit:__pkg__", + "//src/main/java/com/code_intelligence/jazzer/runtime:__pkg__", + "//src/test:__subpackages__", + ], + deps = [ + ":jazzer_driver_lib", + "@jazzer_libfuzzer//:libfuzzer_no_main", + ] + select({ + # Windows doesn't have a concept analogous to RTLD_GLOBAL. + "@platforms//os:windows": [], + "//conditions:default": [":init_jazzer_preload"], + }), +) + +cc_library( + name = "jazzer_driver_lib", + visibility = ["//src/test/native/com/code_intelligence/jazzer/driver/mocks:__pkg__"], + deps = [ + ":coverage_tracker", + ":fuzz_target_runner", + ":jazzer_fuzzer_callbacks", + ":libfuzzer_callbacks", + ":mutator", + ], +) + +cc_jni_library( + name = "jazzer_android_tooling", + srcs = ["android_tooling.cpp"], + platforms = MULTI_PLATFORM, + target_compatible_with = SKIP_ON_WINDOWS, + visibility = ["//src/main/java/com/code_intelligence/jazzer/android:__pkg__"], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/android:android_runtime.hdrs", + ], +) + +cc_library( + name = "coverage_tracker", + srcs = ["coverage_tracker.cpp"], + hdrs = ["coverage_tracker.h"], + deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:coverage_map.hdrs"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzz_target_runner", + srcs = ["fuzz_target_runner.cpp"], + hdrs = ["fuzz_target_runner.h"], + linkopts = select({ + "@platforms//os:windows": [], + "//conditions:default": ["-ldl"], + }), + deps = [ + ":sanitizer_symbols", + "//src/main/java/com/code_intelligence/jazzer/runtime:fuzz_target_runner_natives.hdrs", + ], + # With sanitizers, symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzzed_data_provider", + srcs = ["fuzzed_data_provider.cpp"], + visibility = [ + "//launcher:__pkg__", + ], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/driver:fuzzed_data_provider_impl.hdrs", + ], + # Symbols may only be referenced dynamically via JNI. + alwayslink = True, +) + +cc_jni_library( + name = "jazzer_fuzzed_data_provider", + platforms = MULTI_PLATFORM, + visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"], + deps = [":fuzzed_data_provider"], +) + +cc_library( + name = "jazzer_fuzzer_callbacks", + srcs = ["jazzer_fuzzer_callbacks.cpp"], + deps = [ + ":sanitizer_hooks_with_pc", + "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + ], + alwayslink = True, +) + +cc_jni_library( + name = "jazzer_signal_handler", + srcs = ["signal_handler.cpp"], + platforms = MULTI_PLATFORM, + visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"], + deps = ["//src/main/java/com/code_intelligence/jazzer/driver:signal_handler.hdrs"], +) + +cc_library( + name = "libfuzzer_callbacks", + srcs = ["libfuzzer_callbacks.cpp"], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + "@com_google_absl//absl/strings", + ], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "mutator", + srcs = ["mutator.cpp"], + deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:mutator.hdrs"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "init_jazzer_preload", + srcs = ["init_jazzer_preload.cpp"], + linkopts = ["-ldl"], + target_compatible_with = SKIP_ON_WINDOWS, + deps = ["@fmeum_rules_jni//jni"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "sanitizer_hooks_with_pc", + hdrs = ["sanitizer_hooks_with_pc.h"], + visibility = ["//:__subpackages__"], +) + +cc_library( + name = "sanitizer_symbols", + srcs = ["sanitizer_symbols.cpp"], + # Symbols are referenced dynamically by libFuzzer. + alwayslink = True, +) + +cc_test( + name = "fuzzed_data_provider_test", + size = "small", + srcs = ["fuzzed_data_provider_test.cpp"], + copts = select({ + "@platforms//os:windows": ["/std:c++17"], + "//conditions:default": ["-std=c++17"], + }), + deps = [ + ":fuzzed_data_provider", + "@fmeum_rules_jni//jni", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) diff --git a/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp new file mode 100644 index 00000000..73444696 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp @@ -0,0 +1,61 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <cstdlib> +#include <cstring> +#include <iostream> + +#include "com_code_intelligence_jazzer_android_AndroidRuntime.h" + +const char *RUNTIME_LIBRARY = "libandroid_runtime.so"; + +// Register native methods from the Android Runtime (ART) framework. +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_android_AndroidRuntime_registerNatives( + JNIEnv *env, jclass clazz) { + void *handle = nullptr; + handle = dlopen(RUNTIME_LIBRARY, RTLD_LAZY); + + if (handle == nullptr) { + std::cerr + << "ERROR: Unable to locate runtime library. Check LD_LIBRARY_PATH." + << std::endl; + exit(1); + } + // reset errors + dlerror(); + + // Load the symbol from library + typedef jint (*Register_Frameworks_t)(JNIEnv *); + Register_Frameworks_t Register_Frameworks; + + Register_Frameworks = reinterpret_cast<Register_Frameworks_t>( + dlsym(handle, "registerFrameworkNatives")); + const char *dlsym_error = dlerror(); + if (dlsym_error) { + std::cerr << "ERROR: Unable to invoke registerFrameworkNatives." + << std::endl; + exit(1); + } + + if (Register_Frameworks == nullptr) { + std::cerr << "ERROR: Register_Frameworks is null." << std::endl; + exit(1); + } + + return Register_Frameworks(env); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp new file mode 100644 index 00000000..d904c2d5 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "coverage_tracker.h" + +#include <jni.h> +#include <stdint.h> + +#include <iostream> +#include <vector> + +#include "com_code_intelligence_jazzer_runtime_CoverageMap.h" + +extern "C" void __sanitizer_cov_8bit_counters_init(uint8_t *start, + uint8_t *end); +extern "C" void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end); +extern "C" size_t __sanitizer_cov_get_observed_pcs(uintptr_t **pc_entries); + +namespace { +void AssertNoException(JNIEnv &env) { + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + std::cerr << "ERROR: Java exception occurred in CoverageTracker JNI code" + << std::endl; + _Exit(1); + } +} +} // namespace + +namespace jazzer { + +uint8_t *CoverageTracker::counters_ = nullptr; +PCTableEntry *CoverageTracker::pc_entries_ = nullptr; + +void CoverageTracker::Initialize(JNIEnv &env, jlong counters) { + if (counters_ != nullptr) { + std::cerr << "ERROR: CoverageTracker::Initialize must not be called more " + "than once" + << std::endl; + _Exit(1); + } + counters_ = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(counters)); +} + +void CoverageTracker::RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters) { + if (counters_ == nullptr) { + std::cerr + << "ERROR: CoverageTracker::Initialize should have been called first" + << std::endl; + _Exit(1); + } + if (new_num_counters < old_num_counters) { + std::cerr + << "ERROR: new_num_counters must not be smaller than old_num_counters" + << std::endl; + _Exit(1); + } + if (new_num_counters == old_num_counters) { + return; + } + std::size_t diff_num_counters = new_num_counters - old_num_counters; + // libFuzzer requires an array containing the instruction addresses associated + // with the coverage counters registered above. This is required to report how + // many edges have been covered. However, libFuzzer only checks these + // addresses when the corresponding flag is set to 1. Therefore, it is safe to + // set the all PC entries to any value as long as the corresponding flag is + // set to zero. We set the value of each PC to the index of the corresponding + // edge ID. This facilitates finding the edge ID of each covered PC reported + // by libFuzzer. + pc_entries_ = new PCTableEntry[diff_num_counters]; + for (std::size_t i = 0; i < diff_num_counters; ++i) { + pc_entries_[i] = {i, 0}; + } + __sanitizer_cov_8bit_counters_init(counters_ + old_num_counters, + counters_ + new_num_counters); + __sanitizer_cov_pcs_init((uintptr_t *)(pc_entries_), + (uintptr_t *)(pc_entries_ + diff_num_counters)); +} +} // namespace jazzer + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_initialize( + JNIEnv *env, jclass, jlong counters) { + ::jazzer::CoverageTracker::Initialize(*env, counters); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_registerNewCounters( + JNIEnv *env, jclass, jint old_num_counters, jint new_num_counters) { + ::jazzer::CoverageTracker::RegisterNewCounters(*env, old_num_counters, + new_num_counters); +} + +[[maybe_unused]] jintArray +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_getEverCoveredIds( + JNIEnv *env, jclass) { + uintptr_t *covered_pcs; + jint num_covered_pcs = __sanitizer_cov_get_observed_pcs(&covered_pcs); + std::vector<jint> covered_edge_ids(covered_pcs, + covered_pcs + num_covered_pcs); + delete[] covered_pcs; + + jintArray covered_edge_ids_jni = env->NewIntArray(num_covered_pcs); + AssertNoException(*env); + env->SetIntArrayRegion(covered_edge_ids_jni, 0, num_covered_pcs, + covered_edge_ids.data()); + AssertNoException(*env); + return covered_edge_ids_jni; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h new file mode 100644 index 00000000..234536dc --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h @@ -0,0 +1,43 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <jni.h> +#include <stdint.h> + +#include <string> + +namespace jazzer { + +// The members of this struct are only accessed by libFuzzer. +struct __attribute__((packed)) PCTableEntry { + [[maybe_unused]] uintptr_t PC, PCFlags; +}; + +// CoverageTracker registers an array of 8-bit coverage counters with +// libFuzzer. The array is populated from Java using Unsafe. +class CoverageTracker { + private: + static uint8_t *counters_; + static PCTableEntry *pc_entries_; + + public: + static void Initialize(JNIEnv &env, jlong counters); + static void RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters); +}; +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp new file mode 100644 index 00000000..02e9ae14 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp @@ -0,0 +1,240 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * A native wrapper around the FuzzTargetRunner Java class that executes it as a + * libFuzzer fuzz target. + */ + +#include "fuzz_target_runner.h" + +#ifndef _WIN32 +#include <dlfcn.h> +#endif +#include <jni.h> +#include <stdint.h> + +#include <iostream> +#include <limits> +#include <string> +#include <vector> + +#include "com_code_intelligence_jazzer_runtime_FuzzTargetRunnerNatives.h" + +extern "C" int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, + size_t Size)); +extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); + +namespace { +jclass gRunner; +jmethodID gRunOneId; +jmethodID gMutateOneId; +jmethodID gCrossOverId; +JavaVM *gJavaVm; +JNIEnv *gEnv; +jboolean gUseExperimentalMutator; + +// A libFuzzer-registered callback that outputs the crashing input, but does +// not include a stack trace. +void (*gLibfuzzerPrintCrashingInput)() = nullptr; + +int testOneInput(const uint8_t *data, const std::size_t size) { + JNIEnv &env = *gEnv; + jint jsize = + std::min(size, static_cast<size_t>(std::numeric_limits<jint>::max())); + int res = env.CallStaticIntMethod(gRunner, gRunOneId, data, jsize); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return res; +} +} // namespace + +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, + size_t MaxSize, unsigned int Seed) { + if (gUseExperimentalMutator) { + JNIEnv &env = *gEnv; + jint jsize = + std::min(Size, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jmaxSize = std::min( + MaxSize, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jseed = static_cast<jint>(Seed); + jint newSize = env.CallStaticLongMethod(gRunner, gMutateOneId, Data, jsize, + jmaxSize, jseed); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return static_cast<uint32_t>(newSize); + } else { + return LLVMFuzzerMutate(Data, Size, MaxSize); + } +} + +extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize, + unsigned int Seed) { + if (gUseExperimentalMutator) { + JNIEnv &env = *gEnv; + jint jsize1 = + std::min(Size1, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jsize2 = + std::min(Size2, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jMaxOutSize = std::min( + MaxOutSize, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jseed = static_cast<jint>(Seed); + + jint newSize = + env.CallStaticLongMethod(gRunner, gCrossOverId, Data1, jsize1, Data2, + jsize2, Out, jMaxOutSize, jseed); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return static_cast<uint32_t>(newSize); + } else { + // No custom cross over supported. + return 0; + } +} + +namespace jazzer { +void DumpJvmStackTraces() { + JNIEnv *env = nullptr; + if (gJavaVm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) != + JNI_OK) { + return; + } + jmethodID dumpStack = + env->GetStaticMethodID(gRunner, "dumpAllStackTraces", "()V"); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + env->CallStaticVoidMethod(gRunner, dumpStack); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + // Do not detach as we may be the main thread (but the JVM exits anyway). +} +} // namespace jazzer + +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_startLibFuzzer( + JNIEnv *env, jclass, jobjectArray args, jclass runner, + jboolean useExperimentalMutator) { + gUseExperimentalMutator = useExperimentalMutator; + gEnv = env; + env->GetJavaVM(&gJavaVm); + gRunner = reinterpret_cast<jclass>(env->NewGlobalRef(runner)); + gRunOneId = env->GetStaticMethodID(runner, "runOne", "(JI)I"); + gMutateOneId = env->GetStaticMethodID(runner, "mutateOne", "(JIII)I"); + gCrossOverId = env->GetStaticMethodID(runner, "crossOver", "(JIJIJII)I"); + if (gRunOneId == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + + int argc = env->GetArrayLength(args); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + std::vector<std::string> argv_strings; + std::vector<const char *> argv_c; + for (jsize i = 0; i < argc; i++) { + auto arg_jni = + reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(args, i)); + if (arg_jni == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + jbyte *arg_c = env->GetByteArrayElements(arg_jni, nullptr); + if (arg_c == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + std::size_t arg_size = env->GetArrayLength(arg_jni); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + argv_strings.emplace_back(reinterpret_cast<const char *>(arg_c), arg_size); + env->ReleaseByteArrayElements(arg_jni, arg_c, JNI_ABORT); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + } + for (jsize i = 0; i < argc; i++) { + argv_c.emplace_back(argv_strings[i].c_str()); + } + // Null-terminate argv. + argv_c.emplace_back(nullptr); + + const char **argv = argv_c.data(); + return LLVMFuzzerRunDriver(&argc, const_cast<char ***>(&argv), testOneInput); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_printCrashingInput( + JNIEnv *, jclass) { + if (gLibfuzzerPrintCrashingInput == nullptr) { + std::cerr << "<not available>" << std::endl; + } else { + gLibfuzzerPrintCrashingInput(); + } +} + +namespace fuzzer { +// Defined in: +// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L43 +// Used here: +// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L244 +extern bool RunningUserCallback; +} // namespace fuzzer + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_temporarilyDisableLibfuzzerExitHook( + JNIEnv *, jclass) { + ::fuzzer::RunningUserCallback = false; +} + +// We apply a patch to libFuzzer to make it call this function instead of +// __sanitizer_set_death_callback to pass us the death callback. +extern "C" [[maybe_unused]] void __jazzer_set_death_callback( + void (*callback)()) { + gLibfuzzerPrintCrashingInput = callback; +#ifndef _WIN32 + void *sanitizer_set_death_callback = + dlsym(RTLD_DEFAULT, "__sanitizer_set_death_callback"); + if (sanitizer_set_death_callback != nullptr) { + (reinterpret_cast<void (*)(void (*)())>(sanitizer_set_death_callback))( + []() { + ::jazzer::DumpJvmStackTraces(); + gLibfuzzerPrintCrashingInput(); + // Ideally, we would be able to perform a graceful shutdown of the + // JVM. However, doing this directly results in a nested bug report by + // ASan or UBSan, likely because something about the stack/thread + // context in which they generate reports is incompatible with the JVM + // shutdown process. use_sigaltstack=0 does not help though, so this + // might be on us. + }); + } +#endif +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h new file mode 100644 index 00000000..e64eb8f2 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h @@ -0,0 +1,26 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace jazzer { +/* + * Print the stack traces of all active JVM threads. + * + * This function can be called from any thread. + */ +void DumpJvmStackTraces(); +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp new file mode 100644 index 00000000..7ea9c344 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp @@ -0,0 +1,692 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://raw.githubusercontent.com/google/atheris/034284dc4bb1ad4f4ab6ba5d34fb4dca7c633660/fuzzed_data_provider.cc +// +// Original license and copyright notices: +// +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://github.com/llvm/llvm-project/blob/70de7e0d9a95b7fcd7c105b06bd90fdf4e01f563/compiler-rt/include/fuzzer/FuzzedDataProvider.h +// +// Original license and copyright notices: +// +//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#include <algorithm> +#include <cstdint> +#include <limits> +#include <string> +#include <tuple> +#include <type_traits> + +#include "com_code_intelligence_jazzer_driver_FuzzedDataProviderImpl.h" + +namespace { + +jfieldID gDataPtrField = nullptr; +jfieldID gRemainingBytesField = nullptr; + +void ThrowIllegalArgumentException(JNIEnv &env, const std::string &message) { + jclass illegal_argument_exception = + env.FindClass("java/lang/IllegalArgumentException"); + env.ThrowNew(illegal_argument_exception, message.c_str()); +} + +template <typename T> +struct JniArrayType {}; + +#define JNI_ARRAY_TYPE(lower_case, sentence_case) \ + template <> \ + struct JniArrayType<j##lower_case> { \ + typedef j##lower_case type; \ + typedef j##lower_case##Array array_type; \ + static constexpr array_type (JNIEnv::*kNewArrayFunc)(jsize) = \ + &JNIEnv::New##sentence_case##Array; \ + static constexpr void (JNIEnv::*kSetArrayRegionFunc)( \ + array_type array, jsize start, jsize len, \ + const type *buf) = &JNIEnv::Set##sentence_case##ArrayRegion; \ + }; + +JNI_ARRAY_TYPE(boolean, Boolean); +JNI_ARRAY_TYPE(byte, Byte); +JNI_ARRAY_TYPE(short, Short); +JNI_ARRAY_TYPE(int, Int); +JNI_ARRAY_TYPE(long, Long); + +template <typename T> +typename JniArrayType<T>::array_type JNICALL +ConsumeIntegralArray(JNIEnv &env, jobject self, jint max_length) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + // Arrays of integral types are considered data and thus consumed from the + // beginning of the buffer. + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + jint max_num_bytes = + std::min(static_cast<jint>(sizeof(T)) * max_length, remainingBytes); + jsize actual_length = max_num_bytes / sizeof(T); + jint actual_num_bytes = sizeof(T) * actual_length; + auto array = (env.*(JniArrayType<T>::kNewArrayFunc))(actual_length); + (env.*(JniArrayType<T>::kSetArrayRegionFunc))( + array, 0, actual_length, reinterpret_cast<const T *>(dataPtr)); + + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + actual_num_bytes)); + env.SetIntField(self, gRemainingBytesField, + remainingBytes - actual_num_bytes); + + return array; +} + +template <typename T> +jbyteArray JNICALL ConsumeRemainingAsArray(JNIEnv &env, jobject self) { + return ConsumeIntegralArray<T>(env, self, std::numeric_limits<jint>::max()); +} + +template <typename T> +T JNICALL ConsumeIntegralInRange(JNIEnv &env, jobject self, T min, T max) { + uint64_t range = static_cast<uint64_t>(max) - min; + uint64_t result = 0; + jint offset = 0; + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + while (offset < 8 * sizeof(T) && (range >> offset) > 0 && + remainingBytes != 0) { + --remainingBytes; + result = (result << 8u) | dataPtr[remainingBytes]; + offset += 8; + } + + env.SetIntField(self, gRemainingBytesField, remainingBytes); + // dataPtr hasn't been modified, so we don't need to update gDataPtrField. + + if (range != std::numeric_limits<T>::max()) + // We accept modulo bias in favor of reading a dynamic number of bytes as + // this would make it harder for the fuzzer to mutate towards values from + // the table of recent compares. + result = result % (range + 1); + + return static_cast<T>(min + result); +} + +template <typename T> +T JNICALL ConsumeIntegral(JNIEnv &env, jobject self) { + // First generate an unsigned value and then (safely) cast it to a signed + // integral type. By doing this rather than calling ConsumeIntegralInRange + // with bounds [signed_min, signed_max], we ensure that there is a direct + // correspondence between the consumed raw bytes and the result (e.g., 0 + // corresponds to 0 and not to signed_min). This should help mutating + // towards entries of the table of recent compares. + using UnsignedT = typename std::make_unsigned<T>::type; + static_assert( + std::numeric_limits<UnsignedT>::is_modulo, + "Unsigned to signed conversion requires modulo-based overflow handling"); + return static_cast<T>(ConsumeIntegralInRange<UnsignedT>( + env, self, 0, std::numeric_limits<UnsignedT>::max())); +} + +bool JNICALL ConsumeBool(JNIEnv &env, jobject self) { + return ConsumeIntegral<uint8_t>(env, self) & 1u; +} + +jchar ConsumeCharInternal(JNIEnv &env, jobject self, bool filter_surrogates) { + auto raw_codepoint = ConsumeIntegral<jchar>(env, self); + if (filter_surrogates && raw_codepoint >= 0xd800 && raw_codepoint < 0xe000) + raw_codepoint -= 0xd800; + return raw_codepoint; +} + +jchar JNICALL ConsumeChar(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, false); +} + +jchar JNICALL ConsumeCharNoSurrogates(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, true); +} + +template <typename T> +T JNICALL ConsumeProbability(JNIEnv &env, jobject self) { + using IntegralType = + typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, + uint64_t>::type; + T result = static_cast<T>(ConsumeIntegral<IntegralType>(env, self)); + result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); + return result; +} + +template <typename T> +T JNICALL ConsumeFloatInRange(JNIEnv &env, jobject self, T min, T max) { + T range; + T result = min; + + // Deal with overflow, in the event min and max are very far apart + if (min < 0 && max > 0 && min + std::numeric_limits<T>::max() < max) { + range = (max / 2) - (min / 2); + if (ConsumeBool(env, self)) { + result += range; + } + } else { + range = max - min; + } + + T probability = ConsumeProbability<T>(env, self); + return result + range * probability; +} + +template <typename T> +T JNICALL ConsumeRegularFloat(JNIEnv &env, jobject self) { + return ConsumeFloatInRange(env, self, std::numeric_limits<T>::lowest(), + std::numeric_limits<T>::max()); +} + +template <typename T> +T JNICALL ConsumeFloat(JNIEnv &env, jobject self) { + if (env.GetIntField(self, gRemainingBytesField) == 0) return 0.0; + + auto type_val = ConsumeIntegral<uint8_t>(env, self); + + if (type_val <= 10) { + // Consume the same amount of bytes as for a regular float/double + ConsumeRegularFloat<T>(env, self); + + switch (type_val) { + case 0: + return 0.0; + case 1: + return -0.0; + case 2: + return std::numeric_limits<T>::infinity(); + case 3: + return -std::numeric_limits<T>::infinity(); + case 4: + return std::numeric_limits<T>::quiet_NaN(); + case 5: + return std::numeric_limits<T>::denorm_min(); + case 6: + return -std::numeric_limits<T>::denorm_min(); + case 7: + return std::numeric_limits<T>::min(); + case 8: + return -std::numeric_limits<T>::min(); + case 9: + return std::numeric_limits<T>::max(); + case 10: + return -std::numeric_limits<T>::max(); + default: + abort(); + } + } + + T regular = ConsumeRegularFloat<T>(env, self); + return regular; +} + +// Polyfill for C++20 std::countl_one, which counts the number of leading ones +// in an unsigned integer. +inline __attribute__((always_inline)) uint8_t countl_one(uint8_t byte) { + // The result of __builtin_clz is undefined for 0. + if (byte == 0xFF) return 8; + return __builtin_clz(static_cast<uint8_t>(~byte)) - 24; +} + +// Forces a byte to be a valid UTF-8 continuation byte. +inline __attribute__((always_inline)) void ForceContinuationByte( + uint8_t &byte) { + byte = (byte | (1u << 7u)) & ~(1u << 6u); +} + +constexpr uint8_t kTwoByteZeroLeadingByte = 0b11000000; +constexpr uint8_t kTwoByteZeroContinuationByte = 0b10000000; +constexpr uint8_t kThreeByteLowLeadingByte = 0b11100000; +constexpr uint8_t kSurrogateLeadingByte = 0b11101101; + +enum class Utf8GenerationState { + LeadingByte_Generic, + LeadingByte_AfterBackslash, + ContinuationByte_Generic, + ContinuationByte_LowLeadingByte, + FirstContinuationByte_LowLeadingByte, + FirstContinuationByte_SurrogateLeadingByte, + FirstContinuationByte_Generic, + SecondContinuationByte_Generic, + LeadingByte_LowSurrogate, + FirstContinuationByte_LowSurrogate, + SecondContinuationByte_HighSurrogate, + SecondContinuationByte_LowSurrogate, +}; + +// Consumes up to `max_bytes` arbitrary bytes pointed to by `ptr` and returns a +// valid "modified UTF-8" string of length at most `max_length` that resembles +// the input bytes as closely as possible as well as the number of consumed +// bytes. If `stop_on_slash` is true, then the string will end on the first +// single consumed '\'. +// +// "Modified UTF-8" is the string encoding used by the JNI. It is the same as +// the legacy encoding CESU-8, but with `\0` coded on two bytes. In these +// encodings, code points requiring 4 bytes in modern UTF-8 are represented as +// two surrogates, each of which is coded on 3 bytes. +// +// This function has been designed with the following goals in mind: +// 1. The generated string should be biased towards containing ASCII characters +// as these are often the ones that affect control flow directly. +// 2. Correctly encoded data (e.g. taken from the table of recent compares) +// should be emitted unchanged. +// 3. The raw fuzzer input should be preserved as far as possible, but the +// output must always be correctly encoded. +// +// The JVM accepts string in two encodings: UTF-16 and modified UTF-8. +// Generating UTF-16 would make it harder to fulfill the first design goal and +// would potentially hinder compatibility with corpora using the much more +// widely used UTF-8 encoding, which is reasonably similar to modified UTF-8. As +// a result, this function uses modified UTF-8. +// +// See Algorithm 1 of https://arxiv.org/pdf/2010.03090.pdf for more details on +// the individual cases involved in determining the validity of a UTF-8 string. +template <bool ascii_only, bool stop_on_backslash> +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, + jint max_length) { + std::string str; + // Every character in modified UTF-8 is coded on at most six bytes. Every + // consumed byte is transformed into at most one code unit, except for the + // case of a zero byte which requires two bytes. + if (ascii_only) { + str.reserve(std::min(2 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } else { + str.reserve(std::min(6 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } + + Utf8GenerationState state = Utf8GenerationState::LeadingByte_Generic; + const uint8_t *pos = data; + const auto data_end = data + max_bytes; + for (jint length = 0; length < max_length && pos != data_end; ++pos) { + uint8_t c = *pos; + if (ascii_only) { + // Clamp to 7-bit ASCII range. + c &= 0x7Fu; + } + // Fix up c or previously read bytes according to the value of c and the + // current state. In the end, add the fixed up code unit c to the string. + // Exception: The zero character has to be coded on two bytes and is the + // only case in which an iteration of the loop adds two code units. + switch (state) { + case Utf8GenerationState::LeadingByte_Generic: { + switch (ascii_only ? 0 : countl_one(c)) { + case 0: { + // valid - 1-byte code point (ASCII) + // The zero character has to be coded on two bytes in modified + // UTF-8. + if (c == 0) { + str += static_cast<char>(kTwoByteZeroLeadingByte); + c = kTwoByteZeroContinuationByte; + } else if (stop_on_backslash && c == '\\') { + state = Utf8GenerationState::LeadingByte_AfterBackslash; + // The slash either signals the end of the string or is skipped, + // so don't append anything. + continue; + } + // Remain in state LeadingByte. + ++length; + break; + } + case 1: { + // invalid - continuation byte at leader byte position + // Fix it up to be of the form 0b110XXXXX and fall through to the + // case of a 2-byte sequence. + c |= 1u << 6u; + c &= ~(1u << 5u); + [[fallthrough]]; + } + case 2: { + // (most likely) valid - start of a 2-byte sequence + // ASCII characters must be coded on a single byte, so we must + // ensure that the lower two bits combined with the six non-header + // bits of the following byte do not form a 7-bit ASCII value. This + // could only be the case if at most the lowest bit is set. + if ((c & 0b00011110u) == 0) { + state = Utf8GenerationState::ContinuationByte_LowLeadingByte; + } else { + state = Utf8GenerationState::ContinuationByte_Generic; + } + break; + } + // The default case falls through to the case of three leading ones + // coming right after. + default: { + // invalid - at least four leading ones + // In the case of exactly four leading ones, this would be valid + // UTF-8, but is not valid in the JVM's modified UTF-8 encoding. + // Fix it up by clearing the fourth leading one and falling through + // to the 3-byte case. + c &= ~(1u << 4u); + [[fallthrough]]; + } + case 3: { + // valid - start of a 3-byte sequence + if (c == kThreeByteLowLeadingByte) { + state = Utf8GenerationState::FirstContinuationByte_LowLeadingByte; + } else if (c == kSurrogateLeadingByte) { + state = Utf8GenerationState:: + FirstContinuationByte_SurrogateLeadingByte; + } else { + state = Utf8GenerationState::FirstContinuationByte_Generic; + } + break; + } + } + break; + } + case Utf8GenerationState::LeadingByte_AfterBackslash: { + if (c != '\\') { + // Mark the current byte as consumed. + ++pos; + goto done; + } + // A double backslash is consumed as a single one. As we skipped the + // first one, emit the second one as usual. + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Preserve the zero character, which is coded on two bytes in modified + // UTF-8. In all other cases ensure that we are not incorrectly encoding + // an ASCII character on two bytes by setting the eighth least + // significant bit of the encoded value (second least significant bit of + // the leading byte). + auto previous_c = static_cast<uint8_t>(str.back()); + if (previous_c != kTwoByteZeroLeadingByte || + c != kTwoByteZeroContinuationByte) { + str.back() = static_cast<char>(previous_c | (1u << 1u)); + } + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Ensure that the current code point could not have been coded on two + // bytes. As two bytes encode up to 11 bits and three bytes encode up + // to 16 bits, we thus have to make it such that the five highest bits + // are not all zero. Four of these bits are the non-header bits of the + // leader byte. Thus, set the highest non-header bit in this byte (fifth + // highest in the encoded value). + c |= 1u << 5u; + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: { + ForceContinuationByte(c); + if (c & (1u << 5u)) { + // Start with a high surrogate (0xD800-0xDBFF). c contains the second + // byte and the first two bits of the third byte. The first two bits + // of this second byte are fixed to 10 (in 0x8-0xB). + c |= 1u << 5u; + c &= ~(1u << 4u); + // The high surrogate must be followed by a low surrogate. + state = Utf8GenerationState::SecondContinuationByte_HighSurrogate; + } else { + state = Utf8GenerationState::SecondContinuationByte_Generic; + } + break; + } + case Utf8GenerationState::FirstContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_LowSurrogate; + ++length; + break; + } + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + case Utf8GenerationState::SecondContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::LeadingByte_LowSurrogate: { + // We have to emit a low surrogate leading byte, which is a fixed value. + // We still consume a byte from the input to make fuzzer changes more + // stable and preserve valid surrogate pairs picked up from e.g. the + // table of recent compares. + c = kSurrogateLeadingByte; + state = Utf8GenerationState::FirstContinuationByte_LowSurrogate; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: { + ForceContinuationByte(c); + // Low surrogates are code points in the range 0xDC00-0xDFFF. c contains + // the second byte and the first two bits of the third byte. The first + // two bits of this second byte are fixed to 11 (in 0xC-0xF). + c |= (1u << 5u) | (1u << 4u); + // The second continuation byte of a low surrogate is not restricted, + // but we need to track it differently to allow for correct backtracking + // if it isn't completed. + state = Utf8GenerationState::SecondContinuationByte_LowSurrogate; + break; + } + } + str += static_cast<uint8_t>(c); + } + + // Backtrack the current incomplete character. + switch (state) { + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::SecondContinuationByte_Generic: + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::ContinuationByte_Generic: + case Utf8GenerationState::ContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_Generic: + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_Generic: + case Utf8GenerationState::LeadingByte_AfterBackslash: + // No backtracking required. + break; + } + +done: + return std::make_pair(str, pos - data); +} +} // namespace + +namespace jazzer { +// Exposed for testing only. +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, jint max_length, + bool ascii_only, + bool stop_on_backslash) { + if (ascii_only) { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<true, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<true, false>(data, max_bytes, max_length); + } + } else { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<false, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<false, false>(data, max_bytes, max_length); + } + } +} +} // namespace jazzer + +namespace { +jstring ConsumeStringInternal(JNIEnv &env, jobject self, jint max_length, + bool ascii_only, bool stop_on_backslash) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + if (max_length == 0 || remainingBytes == 0) return env.NewStringUTF(""); + + if (remainingBytes == 1) { + env.SetIntField(self, gRemainingBytesField, 0); + return env.NewStringUTF(""); + } + + std::string str; + jint consumed_bytes; + std::tie(str, consumed_bytes) = jazzer::FixUpModifiedUtf8( + dataPtr, remainingBytes, max_length, ascii_only, stop_on_backslash); + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + consumed_bytes)); + env.SetIntField(self, gRemainingBytesField, remainingBytes - consumed_bytes); + return env.NewStringUTF(str.c_str()); +} + +jstring JNICALL ConsumeAsciiString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, true, true); +} + +jstring JNICALL ConsumeString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, false, true); +} + +jstring JNICALL ConsumeRemainingAsAsciiString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + true, false); +} + +jstring JNICALL ConsumeRemainingAsString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + false, false); +} + +std::size_t RemainingBytes(JNIEnv &env, jobject self) { + return env.GetIntField(self, gRemainingBytesField); +} + +const JNINativeMethod kFuzzedDataMethods[]{ + {(char *)"consumeBoolean", (char *)"()Z", (void *)&ConsumeBool}, + {(char *)"consumeByte", (char *)"()B", (void *)&ConsumeIntegral<jbyte>}, + {(char *)"consumeByteUnchecked", (char *)"(BB)B", + (void *)&ConsumeIntegralInRange<jbyte>}, + {(char *)"consumeShort", (char *)"()S", (void *)&ConsumeIntegral<jshort>}, + {(char *)"consumeShortUnchecked", (char *)"(SS)S", + (void *)&ConsumeIntegralInRange<jshort>}, + {(char *)"consumeInt", (char *)"()I", (void *)&ConsumeIntegral<jint>}, + {(char *)"consumeIntUnchecked", (char *)"(II)I", + (void *)&ConsumeIntegralInRange<jint>}, + {(char *)"consumeLong", (char *)"()J", (void *)&ConsumeIntegral<jlong>}, + {(char *)"consumeLongUnchecked", (char *)"(JJ)J", + (void *)&ConsumeIntegralInRange<jlong>}, + {(char *)"consumeFloat", (char *)"()F", (void *)&ConsumeFloat<jfloat>}, + {(char *)"consumeRegularFloat", (char *)"()F", + (void *)&ConsumeRegularFloat<jfloat>}, + {(char *)"consumeRegularFloatUnchecked", (char *)"(FF)F", + (void *)&ConsumeFloatInRange<jfloat>}, + {(char *)"consumeProbabilityFloat", (char *)"()F", + (void *)&ConsumeProbability<jfloat>}, + {(char *)"consumeDouble", (char *)"()D", (void *)&ConsumeFloat<jdouble>}, + {(char *)"consumeRegularDouble", (char *)"()D", + (void *)&ConsumeRegularFloat<jdouble>}, + {(char *)"consumeRegularDoubleUnchecked", (char *)"(DD)D", + (void *)&ConsumeFloatInRange<jdouble>}, + {(char *)"consumeProbabilityDouble", (char *)"()D", + (void *)&ConsumeProbability<jdouble>}, + {(char *)"consumeChar", (char *)"()C", (void *)&ConsumeChar}, + {(char *)"consumeCharUnchecked", (char *)"(CC)C", + (void *)&ConsumeIntegralInRange<jchar>}, + {(char *)"consumeCharNoSurrogates", (char *)"()C", + (void *)&ConsumeCharNoSurrogates}, + {(char *)"consumeAsciiString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeAsciiString}, + {(char *)"consumeRemainingAsAsciiString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsAsciiString}, + {(char *)"consumeString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeString}, + {(char *)"consumeRemainingAsString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsString}, + {(char *)"consumeBooleans", (char *)"(I)[Z", + (void *)&ConsumeIntegralArray<jboolean>}, + {(char *)"consumeBytes", (char *)"(I)[B", + (void *)&ConsumeIntegralArray<jbyte>}, + {(char *)"consumeShorts", (char *)"(I)[S", + (void *)&ConsumeIntegralArray<jshort>}, + {(char *)"consumeInts", (char *)"(I)[I", + (void *)&ConsumeIntegralArray<jint>}, + {(char *)"consumeLongs", (char *)"(I)[J", + (void *)&ConsumeIntegralArray<jlong>}, + {(char *)"consumeRemainingAsBytes", (char *)"()[B", + (void *)&ConsumeRemainingAsArray<jbyte>}, + {(char *)"remainingBytes", (char *)"()I", (void *)&RemainingBytes}, +}; +const jint kNumFuzzedDataMethods = + sizeof(kFuzzedDataMethods) / sizeof(kFuzzedDataMethods[0]); +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_FuzzedDataProviderImpl_nativeInit( + JNIEnv *env, jclass clazz) { + env->RegisterNatives(clazz, kFuzzedDataMethods, kNumFuzzedDataMethods); + gDataPtrField = env->GetFieldID(clazz, "dataPtr", "J"); + gRemainingBytesField = env->GetFieldID(clazz, "remainingBytes", "I"); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp new file mode 100644 index 00000000..2395cd97 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <cstddef> +#include <cstdint> +#include <random> +#include <string> +#include <vector> + +#include "gtest/gtest.h" + +namespace jazzer { +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *pos, + jint max_bytes, jint max_length, + bool ascii_only, + bool stop_on_backslash); +} + +std::pair<std::string, jint> FixUpRemainingModifiedUtf8( + const std::string &str, bool ascii_only, bool stop_on_backslash) { + return jazzer::FixUpModifiedUtf8( + reinterpret_cast<const uint8_t *>(str.c_str()), str.length(), + std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash); +} + +std::pair<std::string, jint> expect(const std::string &s, jint i) { + return std::make_pair(s, i); +} + +using namespace std::literals::string_literals; +TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, false, false)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false)); + EXPECT_EQ(expect("ja\\zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false)); + EXPECT_EQ(expect("ja\\\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false)); + EXPECT_EQ(expect("ۧ"s, 5), + FixUpRemainingModifiedUtf8(u8"ۧ"s, false, false)); +} + +TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, true, false)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false)); + EXPECT_EQ(expect("ja\\zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false)); + EXPECT_EQ(expect("ja\\\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false)); + EXPECT_EQ(expect("\x62\x02\x2C\x43\x1F"s, 5), + FixUpRemainingModifiedUtf8(u8"ۧ"s, true, false)); +} + +TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, false, true)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true)); + EXPECT_EQ(expect("ja"s, 4), + FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true)); + EXPECT_EQ(expect("ja\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true)); +} + +TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, true, true)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true)); + EXPECT_EQ(expect("ja"s, 4), + FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true)); + EXPECT_EQ(expect("ja\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true)); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp new file mode 100644 index 00000000..23a86c53 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp @@ -0,0 +1,56 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <cstdlib> + +#if defined(_ANDROID) +#define __jni_version__ JNI_VERSION_1_6 +#else +#define __jni_version__ JNI_VERSION_1_8 +#endif + +// The jazzer_preload library, if used, forwards all calls to native libFuzzer +// hooks such as __sanitizer_cov_trace_cmp8 to the Jazzer JNI library. In order +// to load the hook symbols when the library is ready, it needs to be passed a +// handle - the JVM loads libraries with RTLD_LOCAL and thus their symbols +// wouldn't be found as part of the global lookup procedure. +jint JNI_OnLoad(JavaVM *, void *) { + Dl_info info; + + if (!dladdr(reinterpret_cast<const void *>(&JNI_OnLoad), &info) || + !info.dli_fname) { + fprintf(stderr, "Failed to determine our dli_fname\n"); + abort(); + } + + void *handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY); + if (handle == nullptr) { + fprintf(stderr, "Failed to dlopen self: %s\n", dlerror()); + abort(); + } + + void *preload_init = dlsym(RTLD_DEFAULT, "jazzer_preload_init"); + // jazzer_preload is only preloaded when Jazzer is started with --native, so + // not finding this method is an expected error. + if (preload_init) { + reinterpret_cast<void (*)(void *)>(preload_init)(handle); + } + + dlclose(handle); + + return __jni_version__; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp new file mode 100644 index 00000000..8764aaaa --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp @@ -0,0 +1,184 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <cstddef> +#include <cstdint> + +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" +#include "sanitizer_hooks_with_pc.h" + +namespace { + +extern "C" { +void __sanitizer_weak_hook_compare_bytes(void *caller_pc, const void *s1, + const void *s2, std::size_t n1, + std::size_t n2, int result); +void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, + const void *s2, size_t len2, void *result); +} + +inline __attribute__((always_inline)) void *idToPc(jint id) { + return reinterpret_cast<void *>(static_cast<uintptr_t>(id)); +} +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + JNIEnv *env, jclass cls, jbyteArray needle, jint id) { + jint needle_length = env->GetArrayLength(needle); + auto *needle_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(needle, nullptr)); + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); + env->ReleasePrimitiveArrayCritical(needle, needle_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + jint needle_length, jbyte *needle_native, jint id) { + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + JNIEnv *env, jclass cls, jbyteArray b1, jbyteArray b2, jint result, + jint id) { + jint b1_length = env->GetArrayLength(b1); + jint b2_length = env->GetArrayLength(b2); + auto *b1_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b1, nullptr)); + auto *b2_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b2, nullptr)); + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1_native, b2_native, + b1_length, b2_length, result); + env->ReleasePrimitiveArrayCritical(b1, b1_native, JNI_ABORT); + env->ReleasePrimitiveArrayCritical(b2, b2_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + jint b1_length, jbyte *b1, jint b2_length, jbyte *b2, jint result, + jint id) { + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1, b2, b1_length, b2_length, + result); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + JNIEnv *env, jclass cls, jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + JNIEnv *env, jclass cls, jlong switch_value, + jlongArray libfuzzer_case_values, jint id) { + auto *case_values = static_cast<jlong *>( + env->GetPrimitiveArrayCritical(libfuzzer_case_values, nullptr)); + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); + env->ReleasePrimitiveArrayCritical(libfuzzer_case_values, case_values, + JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + jlong switch_value, jint libfuzzer_case_values_length, jlong *case_values, + jint id) { + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + JNIEnv *env, jclass cls, jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + JNIEnv *env, jclass cls, jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + JNIEnv *env, jclass cls, jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + JNIEnv *env, jclass cls, jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp new file mode 100644 index 00000000..b7a0df5d --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp @@ -0,0 +1,131 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <mutex> +#include <utility> +#include <vector> + +#include "absl/strings/str_split.h" +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" + +namespace { +bool is_using_native_libraries = false; +std::once_flag ignore_list_flag; +std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges; + +/** + * Adds the address ranges of executable segments of the library lib_name to + * the ignorelist for C standard library function interception (strcmp, memcmp, + * ...). + */ +void ignoreLibraryForInterception(const std::string &lib_name) { + std::ifstream loaded_libs("/proc/self/maps"); + if (!loaded_libs) { + // This early exit is taken e.g. on macOS, where /proc does not exist. + return; + } + std::string line; + while (std::getline(loaded_libs, line)) { + if (!absl::StrContains(line, lib_name)) continue; + // clang-format off + // A typical line looks as follows: + // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so + // clang-format on + std::vector<std::string> parts = + absl::StrSplit(line, ' ', absl::SkipEmpty()); + if (parts.size() != 6) { + std::cout << "ERROR: Invalid format for /proc/self/maps\n" + << line << std::endl; + exit(1); + } + // Skip non-executable address rang"s. + if (!absl::StrContains(parts[1], "x")) continue; + std::string range_str = parts[0]; + std::vector<std::string> range = absl::StrSplit(range_str, "-"); + if (range.size() != 2) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + std::size_t pos; + auto start = std::stoull(range[0], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + auto end = std::stoull(range[1], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + ignore_for_interception_ranges.emplace_back(start, end); + } +} + +const std::vector<std::string> kLibrariesToIgnoreForInterception = { + // The launcher executable itself can be treated just like a library. + "jazzer", "libjazzer_preload.so", + "libinstrument.so", "libjava.so", + "libjimage.so", "libjli.so", + "libjvm.so", "libnet.so", + "libverify.so", "libzip.so", +}; +} // namespace + +extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc( + void *caller_pc) { + // If the fuzz target is not using native libraries, calls to strcmp, memcmp, + // etc. should never be intercepted. The values reported if they were at best + // duplicate the values received from our bytecode instrumentation and at + // worst pollute the table of recent compares with string internal to the JDK. + if (!is_using_native_libraries) return false; + // If the fuzz target is using native libraries, intercept calls only if they + // don't originate from those address ranges that are known to belong to the + // JDK. + return std::none_of( + ignore_for_interception_ranges.cbegin(), + ignore_for_interception_ranges.cend(), + [caller_pc](const std::pair<uintptr_t, uintptr_t> &range) { + uintptr_t start; + uintptr_t end; + std::tie(start, end) = range; + auto address = reinterpret_cast<uintptr_t>(caller_pc); + return start <= address && address <= end; + }); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_handleLibraryLoad( + JNIEnv *, jclass) { + std::call_once(ignore_list_flag, [] { + std::cout << "INFO: detected a native library load, enabling interception " + "for libc functions" + << std::endl; + for (const auto &lib_name : kLibrariesToIgnoreForInterception) + ignoreLibraryForInterception(lib_name); + // Enable the ignore list after it has been populated since vector is not + // thread-safe with respect to concurrent writes and reads. + is_using_native_libraries = true; + }); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp new file mode 100644 index 00000000..4e21612b --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp @@ -0,0 +1,31 @@ +// Copyright 2023 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstddef> +#include <cstdint> + +#include "com_code_intelligence_jazzer_runtime_Mutator.h" + +extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); + +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_runtime_Mutator_defaultMutateNative( + JNIEnv *env, jclass, jbyteArray jni_data, jint size) { + jint maxSize = env->GetArrayLength(jni_data); + uint8_t *data = + static_cast<uint8_t *>(env->GetPrimitiveArrayCritical(jni_data, nullptr)); + jint res = LLVMFuzzerMutate(data, size, maxSize); + env->ReleasePrimitiveArrayCritical(jni_data, data, 0); + return res; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h new file mode 100644 index 00000000..be655adb --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h @@ -0,0 +1,49 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <cstdint> + +// This file declares variants of the libFuzzer compare, division, switch and +// gep hooks that accept an additional caller_pc argument that can be used to +// pass a custom value that is recorded as the caller's instruction pointer +// ("program counter"). This allows synthetic program counters obtained from +// Java coverage information to be used with libFuzzer's value profile, with +// which it records detailed information about the result of compares and +// associates it with particular coverage locations. +// +// Note: Only the lower 9 bits of the caller_pc argument are used by libFuzzer. +#ifdef __cplusplus +extern "C" { +#endif +void __sanitizer_cov_trace_cmp4_with_pc(void *caller_pc, uint32_t arg1, + uint32_t arg2); +void __sanitizer_cov_trace_cmp8_with_pc(void *caller_pc, uint64_t arg1, + uint64_t arg2); + +void __sanitizer_cov_trace_switch_with_pc(void *caller_pc, uint64_t val, + uint64_t *cases); + +void __sanitizer_cov_trace_div4_with_pc(void *caller_pc, uint32_t val); +void __sanitizer_cov_trace_div8_with_pc(void *caller_pc, uint64_t val); + +void __sanitizer_cov_trace_gep_with_pc(void *caller_pc, uintptr_t idx); + +void __sanitizer_cov_trace_pc_indir_with_pc(void *caller_pc, uintptr_t callee); +#ifdef __cplusplus +} +#endif diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp new file mode 100644 index 00000000..abc5f04e --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Suppress libFuzzer warnings about missing sanitizer methods in non-sanitizer +// builds. +extern "C" [[maybe_unused]] int __sanitizer_acquire_crash_state() { return 1; } + +namespace jazzer { +void DumpJvmStackTraces(); +} + +// Dump a JVM stack trace on timeouts. +extern "C" [[maybe_unused]] void __sanitizer_print_stack_trace() { + jazzer::DumpJvmStackTraces(); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp new file mode 100644 index 00000000..e284925d --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp @@ -0,0 +1,40 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <atomic> +#include <csignal> + +#include "com_code_intelligence_jazzer_driver_SignalHandler.h" + +#ifdef _WIN32 +// Windows does not have SIGUSR1, which triggers a graceful exit of libFuzzer. +// Instead, trigger a hard exit. +#define SIGUSR1 SIGTERM +#endif + +// Handles SIGINT raised while running Java code. +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_SignalHandler_handleInterrupt( + JNIEnv *, jclass) { + static std::atomic<bool> already_exiting{false}; + if (!already_exiting.exchange(true)) { + // Let libFuzzer exit gracefully when the JVM received SIGINT. + raise(SIGUSR1); + } else { + // Exit libFuzzer forcefully on repeated SIGINTs. + raise(SIGTERM); + } +} |