aboutsummaryrefslogtreecommitdiff
path: root/src/main/native/com
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/native/com')
-rw-r--r--src/main/native/com/code_intelligence/jazzer/BUILD.bazel60
-rw-r--r--src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel47
-rw-r--r--src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp208
-rw-r--r--src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h37
-rw-r--r--src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h52
-rw-r--r--src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp313
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel166
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp61
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp122
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h43
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp240
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h26
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp692
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp98
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp56
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp184
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp131
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp31
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h49
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp26
-rw-r--r--src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp40
-rw-r--r--src/main/native/com/code_intelligence/jazzer/jazzer_preload.c249
22 files changed, 2931 insertions, 0 deletions
diff --git a/src/main/native/com/code_intelligence/jazzer/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/BUILD.bazel
new file mode 100644
index 00000000..689adc9a
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/BUILD.bazel
@@ -0,0 +1,60 @@
+load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library")
+load("//bazel:compat.bzl", "MULTI_PLATFORM", "SKIP_ON_WINDOWS")
+
+DYNAMIC_SYMBOLS_TO_EXPORT = [
+ "__sancov_lowest_stack",
+ "__sanitizer_cov_8bit_counters_init",
+ "__sanitizer_cov_pcs_init",
+ "__sanitizer_cov_trace_cmp1",
+ "__sanitizer_cov_trace_cmp4",
+ "__sanitizer_cov_trace_cmp4",
+ "__sanitizer_cov_trace_cmp8",
+ "__sanitizer_cov_trace_const_cmp1",
+ "__sanitizer_cov_trace_const_cmp4",
+ "__sanitizer_cov_trace_const_cmp4",
+ "__sanitizer_cov_trace_const_cmp8",
+ "__sanitizer_cov_trace_div4",
+ "__sanitizer_cov_trace_div8",
+ "__sanitizer_cov_trace_gep",
+ "__sanitizer_cov_trace_pc_indir",
+ "__sanitizer_cov_trace_switch",
+ "__sanitizer_weak_hook_memcmp",
+ "__sanitizer_weak_hook_memmem",
+ "__sanitizer_weak_hook_strcasecmp",
+ "__sanitizer_weak_hook_strcasestr",
+ "__sanitizer_weak_hook_strcmp",
+ "__sanitizer_weak_hook_strncasecmp",
+ "__sanitizer_weak_hook_strncmp",
+ "__sanitizer_weak_hook_strstr",
+ "bcmp",
+ "jazzer_preload_init",
+ "memcmp",
+ "memmem",
+ "strcasecmp",
+ "strcasestr",
+ "strcmp",
+ "strncasecmp",
+ "strncmp",
+ "strstr",
+]
+
+cc_jni_library(
+ name = "jazzer_preload",
+ srcs = ["jazzer_preload.c"],
+ linkopts = select({
+ "@platforms//os:linux": [
+ "-Wl,--export-dynamic-symbol=" + symbol
+ for symbol in DYNAMIC_SYMBOLS_TO_EXPORT
+ ] + [
+ "-ldl",
+ ],
+ "@platforms//os:macos": [
+ "-ldl",
+ ],
+ "//conditions:default": [],
+ }),
+ platforms = MULTI_PLATFORM,
+ target_compatible_with = SKIP_ON_WINDOWS,
+ visibility = ["//src/main/java/com/code_intelligence/jazzer:__pkg__"],
+ deps = ["//src/main/native/com/code_intelligence/jazzer/driver:sanitizer_hooks_with_pc"],
+)
diff --git a/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel
new file mode 100644
index 00000000..74f98cda
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel
@@ -0,0 +1,47 @@
+load("//bazel:compat.bzl", "SKIP_ON_WINDOWS")
+load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library")
+load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
+
+copy_file(
+ name = "jvmti_h_encoded",
+ src = "@android_jvmti//file",
+ out = "jvmti.encoded",
+ is_executable = False,
+ tags = ["manual"],
+ target_compatible_with = SKIP_ON_WINDOWS,
+)
+
+genrule(
+ name = "jvmti_h",
+ srcs = [
+ "jvmti.encoded",
+ ],
+ outs = ["jvmti.h"],
+ cmd = "base64 --decode $< > $(OUTS)",
+ tags = ["manual"],
+ target_compatible_with = SKIP_ON_WINDOWS,
+)
+
+cc_jni_library(
+ name = "android_native_agent",
+ srcs = [
+ "dex_file_manager.cpp",
+ "dex_file_manager.h",
+ "jazzer_jvmti_allocator.h",
+ "native_agent.cpp",
+ ":jvmti_h",
+ ],
+ includes = [
+ ".",
+ ],
+ linkopts = [
+ "-lz",
+ ],
+ tags = ["manual"],
+ target_compatible_with = SKIP_ON_WINDOWS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "@com_google_absl//absl/strings",
+ "@jazzer_slicer",
+ ],
+)
diff --git a/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp
new file mode 100644
index 00000000..b409e82b
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp
@@ -0,0 +1,208 @@
+// Copyright 2023 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "dex_file_manager.h"
+
+#include <algorithm>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "jazzer_jvmti_allocator.h"
+#include "jvmti.h"
+#include "slicer/dex_ir.h"
+#include "slicer/reader.h"
+#include "slicer/writer.h"
+
+std::string GetName(const char* name) {
+ std::stringstream ss;
+ // Class name needs to be in the format "L<class_name>;" as it is stored in
+ // the types table in the DEX file for slicer to find it
+ ss << "L" << name << ";";
+ return ss.str();
+}
+
+bool IsValidIndex(dex::u4 index) { return index != (unsigned)-1; }
+
+void DexFileManager::addDexFile(const unsigned char* bytes, int length) {
+ unsigned char* newArr = new unsigned char[length];
+ std::copy(bytes, bytes + length, newArr);
+
+ dexFiles.push_back(newArr);
+ dexFilesSize.push_back(length);
+}
+
+unsigned char* DexFileManager::getClassBytes(const char* className,
+ int dexFileIndex, jvmtiEnv* jvmti,
+ size_t* newSize) {
+ dex::Reader dexReader(dexFiles[dexFileIndex], dexFilesSize[dexFileIndex]);
+ auto descName = GetName(className);
+
+ auto classIndex = dexReader.FindClassIndex(descName.c_str());
+ if (!IsValidIndex(classIndex)) {
+ *newSize = *newSize;
+ return nullptr;
+ }
+
+ dexReader.CreateClassIr(classIndex);
+ auto oldIr = dexReader.GetIr();
+
+ dex::Writer writer(oldIr);
+ JazzerJvmtiAllocator allocator(jvmti);
+ return writer.CreateImage(&allocator, newSize);
+}
+
+uint32_t DexFileManager::findDexFileForClass(const char* className) {
+ for (int i = 0; i < dexFiles.size(); i++) {
+ dex::Reader dexReader(dexFiles[i], dexFilesSize[i]);
+
+ std::string descName = GetName(className);
+ dex::u4 classIndex = dexReader.FindClassIndex(descName.c_str());
+
+ if (IsValidIndex(classIndex)) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+std::vector<std::string> getMethodDescriptions(
+ std::vector<ir::EncodedMethod*>* encMethodList) {
+ std::vector<std::string> methodDescs;
+
+ for (int i = 0; i < encMethodList->size(); i++) {
+ std::stringstream ss;
+ ss << (*encMethodList)[i]->access_flags;
+ ss << (*encMethodList)[i]->decl->name->c_str();
+ ss << (*encMethodList)[i]->decl->prototype->Signature().c_str();
+
+ methodDescs.push_back(ss.str());
+ }
+
+ sort(methodDescs.begin(), methodDescs.end());
+ return methodDescs;
+}
+
+std::vector<std::string> getFieldDescriptions(
+ std::vector<ir::EncodedField*>* encFieldList) {
+ std::vector<std::string> fieldDescs;
+
+ for (int i = 0; i < encFieldList->size(); i++) {
+ std::stringstream ss;
+ ss << (*encFieldList)[i]->access_flags;
+ ss << (*encFieldList)[i]->decl->type->descriptor->c_str();
+ ss << (*encFieldList)[i]->decl->name->c_str();
+ fieldDescs.push_back(ss.str());
+ }
+
+ sort(fieldDescs.begin(), fieldDescs.end());
+ return fieldDescs;
+}
+
+bool matchFields(std::vector<ir::EncodedField*>* encodedFieldListOne,
+ std::vector<ir::EncodedField*>* encodedFieldListTwo) {
+ std::vector<std::string> fDescListOne =
+ getFieldDescriptions(encodedFieldListOne);
+ std::vector<std::string> fDescListTwo =
+ getFieldDescriptions(encodedFieldListTwo);
+
+ if (fDescListOne.size() != fDescListTwo.size()) {
+ return false;
+ }
+
+ for (int i = 0; i < fDescListOne.size(); i++) {
+ if (fDescListOne[i] != fDescListTwo[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool matchMethods(std::vector<ir::EncodedMethod*>* encodedMethodListOne,
+ std::vector<ir::EncodedMethod*>* encodedMethodListTwo) {
+ std::vector<std::string> mDescListOne =
+ getMethodDescriptions(encodedMethodListOne);
+ std::vector<std::string> mDescListTwo =
+ getMethodDescriptions(encodedMethodListTwo);
+
+ if (mDescListOne.size() != mDescListTwo.size()) {
+ return false;
+ }
+
+ for (int i = 0; i < mDescListOne.size(); i++) {
+ if (mDescListOne[i] != mDescListTwo[i]) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool classStructureMatches(ir::Class* classOne, ir::Class* classTwo) {
+ return matchMethods(&(classOne->direct_methods),
+ &(classTwo->direct_methods)) &&
+ matchMethods(&(classOne->virtual_methods),
+ &(classTwo->virtual_methods)) &&
+ matchFields(&(classOne->static_fields), &(classTwo->static_fields)) &&
+ matchFields(&(classOne->instance_fields),
+ &(classTwo->instance_fields)) &&
+ classOne->access_flags == classTwo->access_flags;
+}
+
+bool DexFileManager::structureMatches(dex::Reader* oldReader,
+ dex::Reader* newReader,
+ const char* className) {
+ std::string descName = GetName(className);
+
+ dex::u4 oldReaderIndex = oldReader->FindClassIndex(descName.c_str());
+ dex::u4 newReaderIndex = newReader->FindClassIndex(descName.c_str());
+
+ if (!IsValidIndex(oldReaderIndex) || !IsValidIndex(newReaderIndex)) {
+ return false;
+ }
+
+ oldReader->CreateClassIr(oldReaderIndex);
+ newReader->CreateClassIr(newReaderIndex);
+
+ std::shared_ptr<ir::DexFile> oldDexFile = oldReader->GetIr();
+ std::shared_ptr<ir::DexFile> newDexFile = newReader->GetIr();
+
+ for (int i = 0; i < oldDexFile->classes.size(); i++) {
+ const char* oldClassDescriptor =
+ oldDexFile->classes[i]->type->descriptor->c_str();
+ if (strcmp(oldClassDescriptor, descName.c_str()) != 0) {
+ continue;
+ }
+
+ bool match = false;
+ for (int j = 0; j < newDexFile->classes.size(); j++) {
+ const char* newClassDescriptor =
+ newDexFile->classes[j]->type->descriptor->c_str();
+ if (strcmp(oldClassDescriptor, newClassDescriptor) == 0) {
+ match = classStructureMatches(oldDexFile->classes[i].get(),
+ newDexFile->classes[j].get());
+ break;
+ }
+ }
+
+ if (!match) {
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h
new file mode 100644
index 00000000..2b7dd67a
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include "jvmti.h"
+#include "slicer/reader.h"
+
+// DexFileManager will contain the contents to multiple DEX files
+class DexFileManager {
+ public:
+ DexFileManager() {}
+
+ void addDexFile(const unsigned char* bytes, int length);
+ unsigned char* getClassBytes(const char* className, int dexFileIndex,
+ jvmtiEnv* jvmti, size_t* newSize);
+ uint32_t findDexFileForClass(const char* className);
+ bool structureMatches(dex::Reader* oldReader, dex::Reader* newReader,
+ const char* className);
+
+ private:
+ std::vector<unsigned char*> dexFiles;
+ std::vector<int> dexFilesSize;
+};
diff --git a/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h b/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h
new file mode 100644
index 00000000..0748c177
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2023 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include "slicer/writer.h"
+
+class JazzerJvmtiAllocator : public dex::Writer::Allocator {
+ public:
+ JazzerJvmtiAllocator(jvmtiEnv* jvmti_env) : jvmti_env_(jvmti_env) {}
+
+ virtual void* Allocate(size_t size) {
+ unsigned char* alloc = nullptr;
+ jvmtiError error_num = jvmti_env_->Allocate(size, &alloc);
+
+ if (error_num != JVMTI_ERROR_NONE) {
+ std::cerr << "JazzerJvmtiAllocator Allocation error. JVMTI error: "
+ << error_num << std::endl;
+ }
+
+ return (void*)alloc;
+ }
+
+ virtual void Free(void* ptr) {
+ if (ptr == nullptr) {
+ return;
+ }
+
+ jvmtiError error_num = jvmti_env_->Deallocate((unsigned char*)ptr);
+
+ if (error_num != JVMTI_ERROR_NONE) {
+ std::cout << "JazzerJvmtiAllocator Free error. JVMTI error: " << error_num
+ << std::endl;
+ }
+ }
+
+ private:
+ jvmtiEnv* jvmti_env_;
+};
diff --git a/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp b/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp
new file mode 100644
index 00000000..9f0b2ad8
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp
@@ -0,0 +1,313 @@
+// Copyright 2023 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dlfcn.h>
+#include <jni.h>
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "dex_file_manager.h"
+#include "jazzer_jvmti_allocator.h"
+#include "jvmti.h"
+#include "slicer/arrayview.h"
+#include "slicer/dex_format.h"
+#include "slicer/reader.h"
+#include "slicer/writer.h"
+
+static std::string agentOptions;
+static DexFileManager dfm;
+
+const std::string kAndroidAgentClass =
+ "com/code_intelligence/jazzer/android/DexFileManager";
+
+void retransformLoadedClasses(jvmtiEnv* jvmti, JNIEnv* env) {
+ jint classCount = 0;
+ jclass* classes;
+
+ jvmti->GetLoadedClasses(&classCount, &classes);
+
+ std::vector<jclass> classesToRetransform;
+ for (int i = 0; i < classCount; i++) {
+ jboolean isModifiable = false;
+ jvmti->IsModifiableClass(classes[i], &isModifiable);
+
+ if ((bool)isModifiable) {
+ classesToRetransform.push_back(classes[i]);
+ }
+ }
+
+ jvmtiError errorNum = jvmti->RetransformClasses(classesToRetransform.size(),
+ &classesToRetransform[0]);
+ if (errorNum != JVMTI_ERROR_NONE) {
+ std::cerr << "Could not retransform classes. JVMTI error: " << errorNum
+ << std::endl;
+ exit(1);
+ }
+}
+
+std::vector<std::string> getDexFiles(std::string jarPath, JNIEnv* env) {
+ jclass jazzerClass = env->FindClass(kAndroidAgentClass.c_str());
+ if (jazzerClass == nullptr) {
+ std::cerr << kAndroidAgentClass << " could not be found" << std::endl;
+ exit(1);
+ }
+
+ const char* getDexFilesFunction = "getDexFilesForJar";
+ jmethodID getDexFilesForJar =
+ env->GetStaticMethodID(jazzerClass, getDexFilesFunction,
+ "(Ljava/lang/String;)[Ljava/lang/String;");
+ if (getDexFilesForJar == nullptr) {
+ std::cerr << getDexFilesFunction << " could not be found\n";
+ exit(1);
+ }
+
+ jstring jJarFile = env->NewStringUTF(jarPath.data());
+ jobjectArray dexFilesArray = (jobjectArray)env->CallStaticObjectMethod(
+ jazzerClass, getDexFilesForJar, jJarFile);
+
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ exit(1);
+ }
+
+ int length = env->GetArrayLength(dexFilesArray);
+
+ std::vector<std::string> dexFilesResult;
+ for (int i = 0; i < length; i++) {
+ jstring dexFileJstring =
+ (jstring)env->GetObjectArrayElement(dexFilesArray, i);
+ const char* dexFileChars = env->GetStringUTFChars(dexFileJstring, NULL);
+ std::string dexFileString(dexFileChars);
+
+ env->ReleaseStringUTFChars(dexFileJstring, dexFileChars);
+ dexFilesResult.push_back(dexFileString);
+ }
+
+ return dexFilesResult;
+}
+
+void initializeBootclassOverrideJar(std::string jarPath, JNIEnv* env) {
+ std::vector<std::string> dexFiles = getDexFiles(jarPath, env);
+
+ std::cerr << "Adding DEX files for: " << jarPath << std::endl;
+ for (int i = 0; i < dexFiles.size(); i++) {
+ std::cerr << "DEX FILE: " << dexFiles[i] << std::endl;
+ }
+
+ for (int i = 0; i < dexFiles.size(); i++) {
+ jclass bootHelperClass = env->FindClass(kAndroidAgentClass.c_str());
+ if (bootHelperClass == nullptr) {
+ std::cerr << kAndroidAgentClass << " could not be found" << std::endl;
+ exit(1);
+ }
+
+ jmethodID getBytecodeFromDex =
+ env->GetStaticMethodID(bootHelperClass, "getBytecodeFromDex",
+ "(Ljava/lang/String;Ljava/lang/String;)[B");
+ if (getBytecodeFromDex == nullptr) {
+ std::cerr << "'getBytecodeFromDex' not found\n";
+ exit(1);
+ }
+
+ jstring jjarPath = env->NewStringUTF(jarPath.data());
+ jstring jdexFile = env->NewStringUTF(dexFiles[i].data());
+
+ int length = 1;
+ std::vector<unsigned char> dexFileBytes;
+
+ jbyteArray dexBytes = (jbyteArray)env->CallStaticObjectMethod(
+ bootHelperClass, getBytecodeFromDex, jjarPath, jdexFile);
+
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ exit(1);
+ }
+
+ jbyte* data = new jbyte;
+ data = env->GetByteArrayElements(dexBytes, 0);
+ length = env->GetArrayLength(dexBytes);
+
+ for (int j = 0; j < length; j++) {
+ dexFileBytes.push_back(data[j]);
+ }
+
+ env->DeleteLocalRef(dexBytes);
+ env->DeleteLocalRef(jjarPath);
+ env->DeleteLocalRef(jdexFile);
+ env->DeleteLocalRef(bootHelperClass);
+
+ unsigned char* usData = reinterpret_cast<unsigned char*>(&dexFileBytes[0]);
+ dfm.addDexFile(usData, length);
+ }
+}
+
+void JNICALL jazzerClassFileLoadHook(
+ jvmtiEnv* jvmti, JNIEnv* jni_env, jclass class_being_redefined,
+ jobject loader, const char* name, jobject protection_domain,
+ jint class_data_len, const unsigned char* class_data,
+ jint* new_class_data_len, unsigned char** new_class_data) {
+ // check if Jazzer class
+ const char* prefix = "com/code_intelligence/jazzer/";
+ if (strncmp(name, prefix, 29) == 0) {
+ return;
+ }
+
+ int indx = dfm.findDexFileForClass(name);
+ if (indx < 0) {
+ return;
+ }
+
+ size_t newSize;
+ unsigned char* newClassDataResult =
+ dfm.getClassBytes(name, indx, jvmti, &newSize);
+
+ dex::Reader oldReader(const_cast<unsigned char*>(class_data),
+ (size_t)class_data_len);
+ dex::Reader newReader(newClassDataResult, newSize);
+ if (dfm.structureMatches(&oldReader, &newReader, name)) {
+ std::cout << "REDEFINING WITH INSTRUMENTATION: " << name << std::endl;
+ *new_class_data = newClassDataResult;
+ *new_class_data_len = static_cast<jint>(newSize);
+ }
+}
+
+bool fileExists(std::string filePath) { return std::ifstream(filePath).good(); }
+
+void JNICALL jazzerVMInit(jvmtiEnv* jvmti_env, JNIEnv* jni_env,
+ jthread thread) {
+ // Parse agentOptions
+
+ std::stringstream ss(agentOptions);
+ std::string token;
+
+ std::string jazzerClassesJar;
+ std::vector<std::string> bootpathClassesOverrides;
+ while (std::getline(ss, token, ',')) {
+ std::vector<std::string> split =
+ absl::StrSplit(token, absl::MaxSplits('=', 1));
+ if (split.size() < 2) {
+ std::cerr << "ERROR: no option given for: " << token;
+ exit(1);
+ }
+
+ if (split[0] == "injectJars") {
+ jazzerClassesJar = split[1];
+ } else if (split[0] == "bootstrapClassOverrides") {
+ bootpathClassesOverrides =
+ absl::StrSplit(split[1], absl::MaxSplits(':', 10));
+ }
+ }
+
+ if (!fileExists(jazzerClassesJar)) {
+ std::cerr << "ERROR: Jazzer bootstrap class file not found at: "
+ << jazzerClassesJar << std::endl;
+ exit(1);
+ }
+
+ jvmti_env->AddToBootstrapClassLoaderSearch(jazzerClassesJar.c_str());
+
+ jvmtiCapabilities jazzerJvmtiCapabilities = {
+ .can_tag_objects = 0,
+ .can_generate_field_modification_events = 0,
+ .can_generate_field_access_events = 0,
+ .can_get_bytecodes = 0,
+ .can_get_synthetic_attribute = 0,
+ .can_get_owned_monitor_info = 0,
+ .can_get_current_contended_monitor = 0,
+ .can_get_monitor_info = 0,
+ .can_pop_frame = 0,
+ .can_redefine_classes = 1,
+ .can_signal_thread = 0,
+ .can_get_source_file_name = 1,
+ .can_get_line_numbers = 0,
+ .can_get_source_debug_extension = 0,
+ .can_access_local_variables = 0,
+ .can_maintain_original_method_order = 0,
+ .can_generate_single_step_events = 0,
+ .can_generate_exception_events = 0,
+ .can_generate_frame_pop_events = 0,
+ .can_generate_breakpoint_events = 0,
+ .can_suspend = 0,
+ .can_redefine_any_class = 0,
+ .can_get_current_thread_cpu_time = 0,
+ .can_get_thread_cpu_time = 0,
+ .can_generate_method_entry_events = 0,
+ .can_generate_method_exit_events = 0,
+ .can_generate_all_class_hook_events = 0,
+ .can_generate_compiled_method_load_events = 0,
+ .can_generate_monitor_events = 0,
+ .can_generate_vm_object_alloc_events = 0,
+ .can_generate_native_method_bind_events = 0,
+ .can_generate_garbage_collection_events = 0,
+ .can_generate_object_free_events = 0,
+ .can_force_early_return = 0,
+ .can_get_owned_monitor_stack_depth_info = 0,
+ .can_get_constant_pool = 0,
+ .can_set_native_method_prefix = 0,
+ .can_retransform_classes = 1,
+ .can_retransform_any_class = 0,
+ .can_generate_resource_exhaustion_heap_events = 0,
+ .can_generate_resource_exhaustion_threads_events = 0,
+ };
+
+ jvmtiError je = jvmti_env->AddCapabilities(&jazzerJvmtiCapabilities);
+ if (je != JVMTI_ERROR_NONE) {
+ std::cerr << "JVMTI ERROR: " << je << std::endl;
+ exit(1);
+ }
+
+ for (int i = 0; i < bootpathClassesOverrides.size(); i++) {
+ if (!fileExists(bootpathClassesOverrides[i])) {
+ std::cerr << "ERROR: Bootpath Class override jar not found at: "
+ << bootpathClassesOverrides[i] << std::endl;
+ exit(1);
+ }
+
+ initializeBootclassOverrideJar(bootpathClassesOverrides[i], jni_env);
+ }
+
+ retransformLoadedClasses(jvmti_env, jni_env);
+}
+
+JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* reserved) {
+ jvmtiEnv* jvmti = nullptr;
+ if (vm->GetEnv((void**)&jvmti, JVMTI_VERSION_1_2) != JNI_OK) {
+ return 1;
+ }
+
+ jvmtiEventCallbacks callbacks;
+
+ memset(&callbacks, 0, sizeof(callbacks));
+ callbacks.ClassFileLoadHook = jazzerClassFileLoadHook;
+ callbacks.VMInit = jazzerVMInit;
+
+ jvmti->SetEventCallbacks(&callbacks, sizeof(jvmtiEventCallbacks));
+ jvmti->SetEventNotificationMode(JVMTI_ENABLE,
+ JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL);
+ jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_INIT, NULL);
+
+ // Save the options string here, this is the only time it will be available
+ // however, we wont be able to use this to initialize until VMInit callback is
+ // called
+ agentOptions = std::string(options);
+ return 0;
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel
new file mode 100644
index 00000000..27d8a1c5
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel
@@ -0,0 +1,166 @@
+load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library")
+load("//bazel:compat.bzl", "MULTI_PLATFORM", "SKIP_ON_WINDOWS")
+
+cc_jni_library(
+ name = "jazzer_driver",
+ platforms = MULTI_PLATFORM,
+ visibility = [
+ "//src/jmh:__subpackages__",
+ "//src/main/java/com/code_intelligence/jazzer/driver:__pkg__",
+ "//src/main/java/com/code_intelligence/jazzer/junit:__pkg__",
+ "//src/main/java/com/code_intelligence/jazzer/runtime:__pkg__",
+ "//src/test:__subpackages__",
+ ],
+ deps = [
+ ":jazzer_driver_lib",
+ "@jazzer_libfuzzer//:libfuzzer_no_main",
+ ] + select({
+ # Windows doesn't have a concept analogous to RTLD_GLOBAL.
+ "@platforms//os:windows": [],
+ "//conditions:default": [":init_jazzer_preload"],
+ }),
+)
+
+cc_library(
+ name = "jazzer_driver_lib",
+ visibility = ["//src/test/native/com/code_intelligence/jazzer/driver/mocks:__pkg__"],
+ deps = [
+ ":coverage_tracker",
+ ":fuzz_target_runner",
+ ":jazzer_fuzzer_callbacks",
+ ":libfuzzer_callbacks",
+ ":mutator",
+ ],
+)
+
+cc_jni_library(
+ name = "jazzer_android_tooling",
+ srcs = ["android_tooling.cpp"],
+ platforms = MULTI_PLATFORM,
+ target_compatible_with = SKIP_ON_WINDOWS,
+ visibility = ["//src/main/java/com/code_intelligence/jazzer/android:__pkg__"],
+ deps = [
+ "//src/main/java/com/code_intelligence/jazzer/android:android_runtime.hdrs",
+ ],
+)
+
+cc_library(
+ name = "coverage_tracker",
+ srcs = ["coverage_tracker.cpp"],
+ hdrs = ["coverage_tracker.h"],
+ deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:coverage_map.hdrs"],
+ # Symbols are only referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_library(
+ name = "fuzz_target_runner",
+ srcs = ["fuzz_target_runner.cpp"],
+ hdrs = ["fuzz_target_runner.h"],
+ linkopts = select({
+ "@platforms//os:windows": [],
+ "//conditions:default": ["-ldl"],
+ }),
+ deps = [
+ ":sanitizer_symbols",
+ "//src/main/java/com/code_intelligence/jazzer/runtime:fuzz_target_runner_natives.hdrs",
+ ],
+ # With sanitizers, symbols are only referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_library(
+ name = "fuzzed_data_provider",
+ srcs = ["fuzzed_data_provider.cpp"],
+ visibility = [
+ "//launcher:__pkg__",
+ ],
+ deps = [
+ "//src/main/java/com/code_intelligence/jazzer/driver:fuzzed_data_provider_impl.hdrs",
+ ],
+ # Symbols may only be referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_jni_library(
+ name = "jazzer_fuzzed_data_provider",
+ platforms = MULTI_PLATFORM,
+ visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"],
+ deps = [":fuzzed_data_provider"],
+)
+
+cc_library(
+ name = "jazzer_fuzzer_callbacks",
+ srcs = ["jazzer_fuzzer_callbacks.cpp"],
+ deps = [
+ ":sanitizer_hooks_with_pc",
+ "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs",
+ ],
+ alwayslink = True,
+)
+
+cc_jni_library(
+ name = "jazzer_signal_handler",
+ srcs = ["signal_handler.cpp"],
+ platforms = MULTI_PLATFORM,
+ visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"],
+ deps = ["//src/main/java/com/code_intelligence/jazzer/driver:signal_handler.hdrs"],
+)
+
+cc_library(
+ name = "libfuzzer_callbacks",
+ srcs = ["libfuzzer_callbacks.cpp"],
+ deps = [
+ "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs",
+ "@com_google_absl//absl/strings",
+ ],
+ # Symbols are only referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_library(
+ name = "mutator",
+ srcs = ["mutator.cpp"],
+ deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:mutator.hdrs"],
+ # Symbols are only referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_library(
+ name = "init_jazzer_preload",
+ srcs = ["init_jazzer_preload.cpp"],
+ linkopts = ["-ldl"],
+ target_compatible_with = SKIP_ON_WINDOWS,
+ deps = ["@fmeum_rules_jni//jni"],
+ # Symbols are only referenced dynamically via JNI.
+ alwayslink = True,
+)
+
+cc_library(
+ name = "sanitizer_hooks_with_pc",
+ hdrs = ["sanitizer_hooks_with_pc.h"],
+ visibility = ["//:__subpackages__"],
+)
+
+cc_library(
+ name = "sanitizer_symbols",
+ srcs = ["sanitizer_symbols.cpp"],
+ # Symbols are referenced dynamically by libFuzzer.
+ alwayslink = True,
+)
+
+cc_test(
+ name = "fuzzed_data_provider_test",
+ size = "small",
+ srcs = ["fuzzed_data_provider_test.cpp"],
+ copts = select({
+ "@platforms//os:windows": ["/std:c++17"],
+ "//conditions:default": ["-std=c++17"],
+ }),
+ deps = [
+ ":fuzzed_data_provider",
+ "@fmeum_rules_jni//jni",
+ "@googletest//:gtest",
+ "@googletest//:gtest_main",
+ ],
+)
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp
new file mode 100644
index 00000000..73444696
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp
@@ -0,0 +1,61 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dlfcn.h>
+#include <jni.h>
+
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+
+#include "com_code_intelligence_jazzer_android_AndroidRuntime.h"
+
+const char *RUNTIME_LIBRARY = "libandroid_runtime.so";
+
+// Register native methods from the Android Runtime (ART) framework.
+[[maybe_unused]] jint
+Java_com_code_1intelligence_jazzer_android_AndroidRuntime_registerNatives(
+ JNIEnv *env, jclass clazz) {
+ void *handle = nullptr;
+ handle = dlopen(RUNTIME_LIBRARY, RTLD_LAZY);
+
+ if (handle == nullptr) {
+ std::cerr
+ << "ERROR: Unable to locate runtime library. Check LD_LIBRARY_PATH."
+ << std::endl;
+ exit(1);
+ }
+ // reset errors
+ dlerror();
+
+ // Load the symbol from library
+ typedef jint (*Register_Frameworks_t)(JNIEnv *);
+ Register_Frameworks_t Register_Frameworks;
+
+ Register_Frameworks = reinterpret_cast<Register_Frameworks_t>(
+ dlsym(handle, "registerFrameworkNatives"));
+ const char *dlsym_error = dlerror();
+ if (dlsym_error) {
+ std::cerr << "ERROR: Unable to invoke registerFrameworkNatives."
+ << std::endl;
+ exit(1);
+ }
+
+ if (Register_Frameworks == nullptr) {
+ std::cerr << "ERROR: Register_Frameworks is null." << std::endl;
+ exit(1);
+ }
+
+ return Register_Frameworks(env);
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp
new file mode 100644
index 00000000..d904c2d5
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp
@@ -0,0 +1,122 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "coverage_tracker.h"
+
+#include <jni.h>
+#include <stdint.h>
+
+#include <iostream>
+#include <vector>
+
+#include "com_code_intelligence_jazzer_runtime_CoverageMap.h"
+
+extern "C" void __sanitizer_cov_8bit_counters_init(uint8_t *start,
+ uint8_t *end);
+extern "C" void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
+ const uintptr_t *pcs_end);
+extern "C" size_t __sanitizer_cov_get_observed_pcs(uintptr_t **pc_entries);
+
+namespace {
+void AssertNoException(JNIEnv &env) {
+ if (env.ExceptionCheck()) {
+ env.ExceptionDescribe();
+ std::cerr << "ERROR: Java exception occurred in CoverageTracker JNI code"
+ << std::endl;
+ _Exit(1);
+ }
+}
+} // namespace
+
+namespace jazzer {
+
+uint8_t *CoverageTracker::counters_ = nullptr;
+PCTableEntry *CoverageTracker::pc_entries_ = nullptr;
+
+void CoverageTracker::Initialize(JNIEnv &env, jlong counters) {
+ if (counters_ != nullptr) {
+ std::cerr << "ERROR: CoverageTracker::Initialize must not be called more "
+ "than once"
+ << std::endl;
+ _Exit(1);
+ }
+ counters_ = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(counters));
+}
+
+void CoverageTracker::RegisterNewCounters(JNIEnv &env, jint old_num_counters,
+ jint new_num_counters) {
+ if (counters_ == nullptr) {
+ std::cerr
+ << "ERROR: CoverageTracker::Initialize should have been called first"
+ << std::endl;
+ _Exit(1);
+ }
+ if (new_num_counters < old_num_counters) {
+ std::cerr
+ << "ERROR: new_num_counters must not be smaller than old_num_counters"
+ << std::endl;
+ _Exit(1);
+ }
+ if (new_num_counters == old_num_counters) {
+ return;
+ }
+ std::size_t diff_num_counters = new_num_counters - old_num_counters;
+ // libFuzzer requires an array containing the instruction addresses associated
+ // with the coverage counters registered above. This is required to report how
+ // many edges have been covered. However, libFuzzer only checks these
+ // addresses when the corresponding flag is set to 1. Therefore, it is safe to
+ // set the all PC entries to any value as long as the corresponding flag is
+ // set to zero. We set the value of each PC to the index of the corresponding
+ // edge ID. This facilitates finding the edge ID of each covered PC reported
+ // by libFuzzer.
+ pc_entries_ = new PCTableEntry[diff_num_counters];
+ for (std::size_t i = 0; i < diff_num_counters; ++i) {
+ pc_entries_[i] = {i, 0};
+ }
+ __sanitizer_cov_8bit_counters_init(counters_ + old_num_counters,
+ counters_ + new_num_counters);
+ __sanitizer_cov_pcs_init((uintptr_t *)(pc_entries_),
+ (uintptr_t *)(pc_entries_ + diff_num_counters));
+}
+} // namespace jazzer
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_CoverageMap_initialize(
+ JNIEnv *env, jclass, jlong counters) {
+ ::jazzer::CoverageTracker::Initialize(*env, counters);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_CoverageMap_registerNewCounters(
+ JNIEnv *env, jclass, jint old_num_counters, jint new_num_counters) {
+ ::jazzer::CoverageTracker::RegisterNewCounters(*env, old_num_counters,
+ new_num_counters);
+}
+
+[[maybe_unused]] jintArray
+Java_com_code_1intelligence_jazzer_runtime_CoverageMap_getEverCoveredIds(
+ JNIEnv *env, jclass) {
+ uintptr_t *covered_pcs;
+ jint num_covered_pcs = __sanitizer_cov_get_observed_pcs(&covered_pcs);
+ std::vector<jint> covered_edge_ids(covered_pcs,
+ covered_pcs + num_covered_pcs);
+ delete[] covered_pcs;
+
+ jintArray covered_edge_ids_jni = env->NewIntArray(num_covered_pcs);
+ AssertNoException(*env);
+ env->SetIntArrayRegion(covered_edge_ids_jni, 0, num_covered_pcs,
+ covered_edge_ids.data());
+ AssertNoException(*env);
+ return covered_edge_ids_jni;
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h
new file mode 100644
index 00000000..234536dc
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2021 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <jni.h>
+#include <stdint.h>
+
+#include <string>
+
+namespace jazzer {
+
+// The members of this struct are only accessed by libFuzzer.
+struct __attribute__((packed)) PCTableEntry {
+ [[maybe_unused]] uintptr_t PC, PCFlags;
+};
+
+// CoverageTracker registers an array of 8-bit coverage counters with
+// libFuzzer. The array is populated from Java using Unsafe.
+class CoverageTracker {
+ private:
+ static uint8_t *counters_;
+ static PCTableEntry *pc_entries_;
+
+ public:
+ static void Initialize(JNIEnv &env, jlong counters);
+ static void RegisterNewCounters(JNIEnv &env, jint old_num_counters,
+ jint new_num_counters);
+};
+} // namespace jazzer
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp
new file mode 100644
index 00000000..02e9ae14
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp
@@ -0,0 +1,240 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ * A native wrapper around the FuzzTargetRunner Java class that executes it as a
+ * libFuzzer fuzz target.
+ */
+
+#include "fuzz_target_runner.h"
+
+#ifndef _WIN32
+#include <dlfcn.h>
+#endif
+#include <jni.h>
+#include <stdint.h>
+
+#include <iostream>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "com_code_intelligence_jazzer_runtime_FuzzTargetRunnerNatives.h"
+
+extern "C" int LLVMFuzzerRunDriver(int *argc, char ***argv,
+ int (*UserCb)(const uint8_t *Data,
+ size_t Size));
+extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+namespace {
+jclass gRunner;
+jmethodID gRunOneId;
+jmethodID gMutateOneId;
+jmethodID gCrossOverId;
+JavaVM *gJavaVm;
+JNIEnv *gEnv;
+jboolean gUseExperimentalMutator;
+
+// A libFuzzer-registered callback that outputs the crashing input, but does
+// not include a stack trace.
+void (*gLibfuzzerPrintCrashingInput)() = nullptr;
+
+int testOneInput(const uint8_t *data, const std::size_t size) {
+ JNIEnv &env = *gEnv;
+ jint jsize =
+ std::min(size, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ int res = env.CallStaticIntMethod(gRunner, gRunOneId, data, jsize);
+ if (env.ExceptionCheck()) {
+ env.ExceptionDescribe();
+ _Exit(1);
+ }
+ return res;
+}
+} // namespace
+
+extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size,
+ size_t MaxSize, unsigned int Seed) {
+ if (gUseExperimentalMutator) {
+ JNIEnv &env = *gEnv;
+ jint jsize =
+ std::min(Size, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ jint jmaxSize = std::min(
+ MaxSize, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ jint jseed = static_cast<jint>(Seed);
+ jint newSize = env.CallStaticLongMethod(gRunner, gMutateOneId, Data, jsize,
+ jmaxSize, jseed);
+ if (env.ExceptionCheck()) {
+ env.ExceptionDescribe();
+ _Exit(1);
+ }
+ return static_cast<uint32_t>(newSize);
+ } else {
+ return LLVMFuzzerMutate(Data, Size, MaxSize);
+ }
+}
+
+extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
+ const uint8_t *Data2, size_t Size2,
+ uint8_t *Out, size_t MaxOutSize,
+ unsigned int Seed) {
+ if (gUseExperimentalMutator) {
+ JNIEnv &env = *gEnv;
+ jint jsize1 =
+ std::min(Size1, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ jint jsize2 =
+ std::min(Size2, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ jint jMaxOutSize = std::min(
+ MaxOutSize, static_cast<size_t>(std::numeric_limits<jint>::max()));
+ jint jseed = static_cast<jint>(Seed);
+
+ jint newSize =
+ env.CallStaticLongMethod(gRunner, gCrossOverId, Data1, jsize1, Data2,
+ jsize2, Out, jMaxOutSize, jseed);
+ if (env.ExceptionCheck()) {
+ env.ExceptionDescribe();
+ _Exit(1);
+ }
+ return static_cast<uint32_t>(newSize);
+ } else {
+ // No custom cross over supported.
+ return 0;
+ }
+}
+
+namespace jazzer {
+void DumpJvmStackTraces() {
+ JNIEnv *env = nullptr;
+ if (gJavaVm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) !=
+ JNI_OK) {
+ return;
+ }
+ jmethodID dumpStack =
+ env->GetStaticMethodID(gRunner, "dumpAllStackTraces", "()V");
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ return;
+ }
+ env->CallStaticVoidMethod(gRunner, dumpStack);
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ return;
+ }
+ // Do not detach as we may be the main thread (but the JVM exits anyway).
+}
+} // namespace jazzer
+
+[[maybe_unused]] jint
+Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_startLibFuzzer(
+ JNIEnv *env, jclass, jobjectArray args, jclass runner,
+ jboolean useExperimentalMutator) {
+ gUseExperimentalMutator = useExperimentalMutator;
+ gEnv = env;
+ env->GetJavaVM(&gJavaVm);
+ gRunner = reinterpret_cast<jclass>(env->NewGlobalRef(runner));
+ gRunOneId = env->GetStaticMethodID(runner, "runOne", "(JI)I");
+ gMutateOneId = env->GetStaticMethodID(runner, "mutateOne", "(JIII)I");
+ gCrossOverId = env->GetStaticMethodID(runner, "crossOver", "(JIJIJII)I");
+ if (gRunOneId == nullptr) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+
+ int argc = env->GetArrayLength(args);
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+ std::vector<std::string> argv_strings;
+ std::vector<const char *> argv_c;
+ for (jsize i = 0; i < argc; i++) {
+ auto arg_jni =
+ reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(args, i));
+ if (arg_jni == nullptr) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+ jbyte *arg_c = env->GetByteArrayElements(arg_jni, nullptr);
+ if (arg_c == nullptr) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+ std::size_t arg_size = env->GetArrayLength(arg_jni);
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+ argv_strings.emplace_back(reinterpret_cast<const char *>(arg_c), arg_size);
+ env->ReleaseByteArrayElements(arg_jni, arg_c, JNI_ABORT);
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ _Exit(1);
+ }
+ }
+ for (jsize i = 0; i < argc; i++) {
+ argv_c.emplace_back(argv_strings[i].c_str());
+ }
+ // Null-terminate argv.
+ argv_c.emplace_back(nullptr);
+
+ const char **argv = argv_c.data();
+ return LLVMFuzzerRunDriver(&argc, const_cast<char ***>(&argv), testOneInput);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_printCrashingInput(
+ JNIEnv *, jclass) {
+ if (gLibfuzzerPrintCrashingInput == nullptr) {
+ std::cerr << "<not available>" << std::endl;
+ } else {
+ gLibfuzzerPrintCrashingInput();
+ }
+}
+
+namespace fuzzer {
+// Defined in:
+// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L43
+// Used here:
+// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L244
+extern bool RunningUserCallback;
+} // namespace fuzzer
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_temporarilyDisableLibfuzzerExitHook(
+ JNIEnv *, jclass) {
+ ::fuzzer::RunningUserCallback = false;
+}
+
+// We apply a patch to libFuzzer to make it call this function instead of
+// __sanitizer_set_death_callback to pass us the death callback.
+extern "C" [[maybe_unused]] void __jazzer_set_death_callback(
+ void (*callback)()) {
+ gLibfuzzerPrintCrashingInput = callback;
+#ifndef _WIN32
+ void *sanitizer_set_death_callback =
+ dlsym(RTLD_DEFAULT, "__sanitizer_set_death_callback");
+ if (sanitizer_set_death_callback != nullptr) {
+ (reinterpret_cast<void (*)(void (*)())>(sanitizer_set_death_callback))(
+ []() {
+ ::jazzer::DumpJvmStackTraces();
+ gLibfuzzerPrintCrashingInput();
+ // Ideally, we would be able to perform a graceful shutdown of the
+ // JVM. However, doing this directly results in a nested bug report by
+ // ASan or UBSan, likely because something about the stack/thread
+ // context in which they generate reports is incompatible with the JVM
+ // shutdown process. use_sigaltstack=0 does not help though, so this
+ // might be on us.
+ });
+ }
+#endif
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h
new file mode 100644
index 00000000..e64eb8f2
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2021 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace jazzer {
+/*
+ * Print the stack traces of all active JVM threads.
+ *
+ * This function can be called from any thread.
+ */
+void DumpJvmStackTraces();
+} // namespace jazzer
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp
new file mode 100644
index 00000000..7ea9c344
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp
@@ -0,0 +1,692 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Modified from
+// https://raw.githubusercontent.com/google/atheris/034284dc4bb1ad4f4ab6ba5d34fb4dca7c633660/fuzzed_data_provider.cc
+//
+// Original license and copyright notices:
+//
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Modified from
+// https://github.com/llvm/llvm-project/blob/70de7e0d9a95b7fcd7c105b06bd90fdf4e01f563/compiler-rt/include/fuzzer/FuzzedDataProvider.h
+//
+// Original license and copyright notices:
+//
+//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <tuple>
+#include <type_traits>
+
+#include "com_code_intelligence_jazzer_driver_FuzzedDataProviderImpl.h"
+
+namespace {
+
+jfieldID gDataPtrField = nullptr;
+jfieldID gRemainingBytesField = nullptr;
+
+void ThrowIllegalArgumentException(JNIEnv &env, const std::string &message) {
+ jclass illegal_argument_exception =
+ env.FindClass("java/lang/IllegalArgumentException");
+ env.ThrowNew(illegal_argument_exception, message.c_str());
+}
+
+template <typename T>
+struct JniArrayType {};
+
+#define JNI_ARRAY_TYPE(lower_case, sentence_case) \
+ template <> \
+ struct JniArrayType<j##lower_case> { \
+ typedef j##lower_case type; \
+ typedef j##lower_case##Array array_type; \
+ static constexpr array_type (JNIEnv::*kNewArrayFunc)(jsize) = \
+ &JNIEnv::New##sentence_case##Array; \
+ static constexpr void (JNIEnv::*kSetArrayRegionFunc)( \
+ array_type array, jsize start, jsize len, \
+ const type *buf) = &JNIEnv::Set##sentence_case##ArrayRegion; \
+ };
+
+JNI_ARRAY_TYPE(boolean, Boolean);
+JNI_ARRAY_TYPE(byte, Byte);
+JNI_ARRAY_TYPE(short, Short);
+JNI_ARRAY_TYPE(int, Int);
+JNI_ARRAY_TYPE(long, Long);
+
+template <typename T>
+typename JniArrayType<T>::array_type JNICALL
+ConsumeIntegralArray(JNIEnv &env, jobject self, jint max_length) {
+ if (max_length < 0) {
+ ThrowIllegalArgumentException(env, "maxLength must not be negative");
+ return nullptr;
+ }
+ // Arrays of integral types are considered data and thus consumed from the
+ // beginning of the buffer.
+ const auto *dataPtr =
+ reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField));
+ jint remainingBytes = env.GetIntField(self, gRemainingBytesField);
+
+ jint max_num_bytes =
+ std::min(static_cast<jint>(sizeof(T)) * max_length, remainingBytes);
+ jsize actual_length = max_num_bytes / sizeof(T);
+ jint actual_num_bytes = sizeof(T) * actual_length;
+ auto array = (env.*(JniArrayType<T>::kNewArrayFunc))(actual_length);
+ (env.*(JniArrayType<T>::kSetArrayRegionFunc))(
+ array, 0, actual_length, reinterpret_cast<const T *>(dataPtr));
+
+ env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + actual_num_bytes));
+ env.SetIntField(self, gRemainingBytesField,
+ remainingBytes - actual_num_bytes);
+
+ return array;
+}
+
+template <typename T>
+jbyteArray JNICALL ConsumeRemainingAsArray(JNIEnv &env, jobject self) {
+ return ConsumeIntegralArray<T>(env, self, std::numeric_limits<jint>::max());
+}
+
+template <typename T>
+T JNICALL ConsumeIntegralInRange(JNIEnv &env, jobject self, T min, T max) {
+ uint64_t range = static_cast<uint64_t>(max) - min;
+ uint64_t result = 0;
+ jint offset = 0;
+
+ const auto *dataPtr =
+ reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField));
+ jint remainingBytes = env.GetIntField(self, gRemainingBytesField);
+
+ while (offset < 8 * sizeof(T) && (range >> offset) > 0 &&
+ remainingBytes != 0) {
+ --remainingBytes;
+ result = (result << 8u) | dataPtr[remainingBytes];
+ offset += 8;
+ }
+
+ env.SetIntField(self, gRemainingBytesField, remainingBytes);
+ // dataPtr hasn't been modified, so we don't need to update gDataPtrField.
+
+ if (range != std::numeric_limits<T>::max())
+ // We accept modulo bias in favor of reading a dynamic number of bytes as
+ // this would make it harder for the fuzzer to mutate towards values from
+ // the table of recent compares.
+ result = result % (range + 1);
+
+ return static_cast<T>(min + result);
+}
+
+template <typename T>
+T JNICALL ConsumeIntegral(JNIEnv &env, jobject self) {
+ // First generate an unsigned value and then (safely) cast it to a signed
+ // integral type. By doing this rather than calling ConsumeIntegralInRange
+ // with bounds [signed_min, signed_max], we ensure that there is a direct
+ // correspondence between the consumed raw bytes and the result (e.g., 0
+ // corresponds to 0 and not to signed_min). This should help mutating
+ // towards entries of the table of recent compares.
+ using UnsignedT = typename std::make_unsigned<T>::type;
+ static_assert(
+ std::numeric_limits<UnsignedT>::is_modulo,
+ "Unsigned to signed conversion requires modulo-based overflow handling");
+ return static_cast<T>(ConsumeIntegralInRange<UnsignedT>(
+ env, self, 0, std::numeric_limits<UnsignedT>::max()));
+}
+
+bool JNICALL ConsumeBool(JNIEnv &env, jobject self) {
+ return ConsumeIntegral<uint8_t>(env, self) & 1u;
+}
+
+jchar ConsumeCharInternal(JNIEnv &env, jobject self, bool filter_surrogates) {
+ auto raw_codepoint = ConsumeIntegral<jchar>(env, self);
+ if (filter_surrogates && raw_codepoint >= 0xd800 && raw_codepoint < 0xe000)
+ raw_codepoint -= 0xd800;
+ return raw_codepoint;
+}
+
+jchar JNICALL ConsumeChar(JNIEnv &env, jobject self) {
+ return ConsumeCharInternal(env, self, false);
+}
+
+jchar JNICALL ConsumeCharNoSurrogates(JNIEnv &env, jobject self) {
+ return ConsumeCharInternal(env, self, true);
+}
+
+template <typename T>
+T JNICALL ConsumeProbability(JNIEnv &env, jobject self) {
+ using IntegralType =
+ typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
+ uint64_t>::type;
+ T result = static_cast<T>(ConsumeIntegral<IntegralType>(env, self));
+ result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
+ return result;
+}
+
+template <typename T>
+T JNICALL ConsumeFloatInRange(JNIEnv &env, jobject self, T min, T max) {
+ T range;
+ T result = min;
+
+ // Deal with overflow, in the event min and max are very far apart
+ if (min < 0 && max > 0 && min + std::numeric_limits<T>::max() < max) {
+ range = (max / 2) - (min / 2);
+ if (ConsumeBool(env, self)) {
+ result += range;
+ }
+ } else {
+ range = max - min;
+ }
+
+ T probability = ConsumeProbability<T>(env, self);
+ return result + range * probability;
+}
+
+template <typename T>
+T JNICALL ConsumeRegularFloat(JNIEnv &env, jobject self) {
+ return ConsumeFloatInRange(env, self, std::numeric_limits<T>::lowest(),
+ std::numeric_limits<T>::max());
+}
+
+template <typename T>
+T JNICALL ConsumeFloat(JNIEnv &env, jobject self) {
+ if (env.GetIntField(self, gRemainingBytesField) == 0) return 0.0;
+
+ auto type_val = ConsumeIntegral<uint8_t>(env, self);
+
+ if (type_val <= 10) {
+ // Consume the same amount of bytes as for a regular float/double
+ ConsumeRegularFloat<T>(env, self);
+
+ switch (type_val) {
+ case 0:
+ return 0.0;
+ case 1:
+ return -0.0;
+ case 2:
+ return std::numeric_limits<T>::infinity();
+ case 3:
+ return -std::numeric_limits<T>::infinity();
+ case 4:
+ return std::numeric_limits<T>::quiet_NaN();
+ case 5:
+ return std::numeric_limits<T>::denorm_min();
+ case 6:
+ return -std::numeric_limits<T>::denorm_min();
+ case 7:
+ return std::numeric_limits<T>::min();
+ case 8:
+ return -std::numeric_limits<T>::min();
+ case 9:
+ return std::numeric_limits<T>::max();
+ case 10:
+ return -std::numeric_limits<T>::max();
+ default:
+ abort();
+ }
+ }
+
+ T regular = ConsumeRegularFloat<T>(env, self);
+ return regular;
+}
+
+// Polyfill for C++20 std::countl_one, which counts the number of leading ones
+// in an unsigned integer.
+inline __attribute__((always_inline)) uint8_t countl_one(uint8_t byte) {
+ // The result of __builtin_clz is undefined for 0.
+ if (byte == 0xFF) return 8;
+ return __builtin_clz(static_cast<uint8_t>(~byte)) - 24;
+}
+
+// Forces a byte to be a valid UTF-8 continuation byte.
+inline __attribute__((always_inline)) void ForceContinuationByte(
+ uint8_t &byte) {
+ byte = (byte | (1u << 7u)) & ~(1u << 6u);
+}
+
+constexpr uint8_t kTwoByteZeroLeadingByte = 0b11000000;
+constexpr uint8_t kTwoByteZeroContinuationByte = 0b10000000;
+constexpr uint8_t kThreeByteLowLeadingByte = 0b11100000;
+constexpr uint8_t kSurrogateLeadingByte = 0b11101101;
+
+enum class Utf8GenerationState {
+ LeadingByte_Generic,
+ LeadingByte_AfterBackslash,
+ ContinuationByte_Generic,
+ ContinuationByte_LowLeadingByte,
+ FirstContinuationByte_LowLeadingByte,
+ FirstContinuationByte_SurrogateLeadingByte,
+ FirstContinuationByte_Generic,
+ SecondContinuationByte_Generic,
+ LeadingByte_LowSurrogate,
+ FirstContinuationByte_LowSurrogate,
+ SecondContinuationByte_HighSurrogate,
+ SecondContinuationByte_LowSurrogate,
+};
+
+// Consumes up to `max_bytes` arbitrary bytes pointed to by `ptr` and returns a
+// valid "modified UTF-8" string of length at most `max_length` that resembles
+// the input bytes as closely as possible as well as the number of consumed
+// bytes. If `stop_on_slash` is true, then the string will end on the first
+// single consumed '\'.
+//
+// "Modified UTF-8" is the string encoding used by the JNI. It is the same as
+// the legacy encoding CESU-8, but with `\0` coded on two bytes. In these
+// encodings, code points requiring 4 bytes in modern UTF-8 are represented as
+// two surrogates, each of which is coded on 3 bytes.
+//
+// This function has been designed with the following goals in mind:
+// 1. The generated string should be biased towards containing ASCII characters
+// as these are often the ones that affect control flow directly.
+// 2. Correctly encoded data (e.g. taken from the table of recent compares)
+// should be emitted unchanged.
+// 3. The raw fuzzer input should be preserved as far as possible, but the
+// output must always be correctly encoded.
+//
+// The JVM accepts string in two encodings: UTF-16 and modified UTF-8.
+// Generating UTF-16 would make it harder to fulfill the first design goal and
+// would potentially hinder compatibility with corpora using the much more
+// widely used UTF-8 encoding, which is reasonably similar to modified UTF-8. As
+// a result, this function uses modified UTF-8.
+//
+// See Algorithm 1 of https://arxiv.org/pdf/2010.03090.pdf for more details on
+// the individual cases involved in determining the validity of a UTF-8 string.
+template <bool ascii_only, bool stop_on_backslash>
+std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data,
+ jint max_bytes,
+ jint max_length) {
+ std::string str;
+ // Every character in modified UTF-8 is coded on at most six bytes. Every
+ // consumed byte is transformed into at most one code unit, except for the
+ // case of a zero byte which requires two bytes.
+ if (ascii_only) {
+ str.reserve(std::min(2 * static_cast<std::size_t>(max_length),
+ 2 * static_cast<std::size_t>(max_bytes)));
+ } else {
+ str.reserve(std::min(6 * static_cast<std::size_t>(max_length),
+ 2 * static_cast<std::size_t>(max_bytes)));
+ }
+
+ Utf8GenerationState state = Utf8GenerationState::LeadingByte_Generic;
+ const uint8_t *pos = data;
+ const auto data_end = data + max_bytes;
+ for (jint length = 0; length < max_length && pos != data_end; ++pos) {
+ uint8_t c = *pos;
+ if (ascii_only) {
+ // Clamp to 7-bit ASCII range.
+ c &= 0x7Fu;
+ }
+ // Fix up c or previously read bytes according to the value of c and the
+ // current state. In the end, add the fixed up code unit c to the string.
+ // Exception: The zero character has to be coded on two bytes and is the
+ // only case in which an iteration of the loop adds two code units.
+ switch (state) {
+ case Utf8GenerationState::LeadingByte_Generic: {
+ switch (ascii_only ? 0 : countl_one(c)) {
+ case 0: {
+ // valid - 1-byte code point (ASCII)
+ // The zero character has to be coded on two bytes in modified
+ // UTF-8.
+ if (c == 0) {
+ str += static_cast<char>(kTwoByteZeroLeadingByte);
+ c = kTwoByteZeroContinuationByte;
+ } else if (stop_on_backslash && c == '\\') {
+ state = Utf8GenerationState::LeadingByte_AfterBackslash;
+ // The slash either signals the end of the string or is skipped,
+ // so don't append anything.
+ continue;
+ }
+ // Remain in state LeadingByte.
+ ++length;
+ break;
+ }
+ case 1: {
+ // invalid - continuation byte at leader byte position
+ // Fix it up to be of the form 0b110XXXXX and fall through to the
+ // case of a 2-byte sequence.
+ c |= 1u << 6u;
+ c &= ~(1u << 5u);
+ [[fallthrough]];
+ }
+ case 2: {
+ // (most likely) valid - start of a 2-byte sequence
+ // ASCII characters must be coded on a single byte, so we must
+ // ensure that the lower two bits combined with the six non-header
+ // bits of the following byte do not form a 7-bit ASCII value. This
+ // could only be the case if at most the lowest bit is set.
+ if ((c & 0b00011110u) == 0) {
+ state = Utf8GenerationState::ContinuationByte_LowLeadingByte;
+ } else {
+ state = Utf8GenerationState::ContinuationByte_Generic;
+ }
+ break;
+ }
+ // The default case falls through to the case of three leading ones
+ // coming right after.
+ default: {
+ // invalid - at least four leading ones
+ // In the case of exactly four leading ones, this would be valid
+ // UTF-8, but is not valid in the JVM's modified UTF-8 encoding.
+ // Fix it up by clearing the fourth leading one and falling through
+ // to the 3-byte case.
+ c &= ~(1u << 4u);
+ [[fallthrough]];
+ }
+ case 3: {
+ // valid - start of a 3-byte sequence
+ if (c == kThreeByteLowLeadingByte) {
+ state = Utf8GenerationState::FirstContinuationByte_LowLeadingByte;
+ } else if (c == kSurrogateLeadingByte) {
+ state = Utf8GenerationState::
+ FirstContinuationByte_SurrogateLeadingByte;
+ } else {
+ state = Utf8GenerationState::FirstContinuationByte_Generic;
+ }
+ break;
+ }
+ }
+ break;
+ }
+ case Utf8GenerationState::LeadingByte_AfterBackslash: {
+ if (c != '\\') {
+ // Mark the current byte as consumed.
+ ++pos;
+ goto done;
+ }
+ // A double backslash is consumed as a single one. As we skipped the
+ // first one, emit the second one as usual.
+ state = Utf8GenerationState::LeadingByte_Generic;
+ ++length;
+ break;
+ }
+ case Utf8GenerationState::ContinuationByte_LowLeadingByte: {
+ ForceContinuationByte(c);
+ // Preserve the zero character, which is coded on two bytes in modified
+ // UTF-8. In all other cases ensure that we are not incorrectly encoding
+ // an ASCII character on two bytes by setting the eighth least
+ // significant bit of the encoded value (second least significant bit of
+ // the leading byte).
+ auto previous_c = static_cast<uint8_t>(str.back());
+ if (previous_c != kTwoByteZeroLeadingByte ||
+ c != kTwoByteZeroContinuationByte) {
+ str.back() = static_cast<char>(previous_c | (1u << 1u));
+ }
+ state = Utf8GenerationState::LeadingByte_Generic;
+ ++length;
+ break;
+ }
+ case Utf8GenerationState::ContinuationByte_Generic: {
+ ForceContinuationByte(c);
+ state = Utf8GenerationState::LeadingByte_Generic;
+ ++length;
+ break;
+ }
+ case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: {
+ ForceContinuationByte(c);
+ // Ensure that the current code point could not have been coded on two
+ // bytes. As two bytes encode up to 11 bits and three bytes encode up
+ // to 16 bits, we thus have to make it such that the five highest bits
+ // are not all zero. Four of these bits are the non-header bits of the
+ // leader byte. Thus, set the highest non-header bit in this byte (fifth
+ // highest in the encoded value).
+ c |= 1u << 5u;
+ state = Utf8GenerationState::SecondContinuationByte_Generic;
+ break;
+ }
+ case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: {
+ ForceContinuationByte(c);
+ if (c & (1u << 5u)) {
+ // Start with a high surrogate (0xD800-0xDBFF). c contains the second
+ // byte and the first two bits of the third byte. The first two bits
+ // of this second byte are fixed to 10 (in 0x8-0xB).
+ c |= 1u << 5u;
+ c &= ~(1u << 4u);
+ // The high surrogate must be followed by a low surrogate.
+ state = Utf8GenerationState::SecondContinuationByte_HighSurrogate;
+ } else {
+ state = Utf8GenerationState::SecondContinuationByte_Generic;
+ }
+ break;
+ }
+ case Utf8GenerationState::FirstContinuationByte_Generic: {
+ ForceContinuationByte(c);
+ state = Utf8GenerationState::SecondContinuationByte_Generic;
+ break;
+ }
+ case Utf8GenerationState::SecondContinuationByte_HighSurrogate: {
+ ForceContinuationByte(c);
+ state = Utf8GenerationState::LeadingByte_LowSurrogate;
+ ++length;
+ break;
+ }
+ case Utf8GenerationState::SecondContinuationByte_LowSurrogate:
+ case Utf8GenerationState::SecondContinuationByte_Generic: {
+ ForceContinuationByte(c);
+ state = Utf8GenerationState::LeadingByte_Generic;
+ ++length;
+ break;
+ }
+ case Utf8GenerationState::LeadingByte_LowSurrogate: {
+ // We have to emit a low surrogate leading byte, which is a fixed value.
+ // We still consume a byte from the input to make fuzzer changes more
+ // stable and preserve valid surrogate pairs picked up from e.g. the
+ // table of recent compares.
+ c = kSurrogateLeadingByte;
+ state = Utf8GenerationState::FirstContinuationByte_LowSurrogate;
+ break;
+ }
+ case Utf8GenerationState::FirstContinuationByte_LowSurrogate: {
+ ForceContinuationByte(c);
+ // Low surrogates are code points in the range 0xDC00-0xDFFF. c contains
+ // the second byte and the first two bits of the third byte. The first
+ // two bits of this second byte are fixed to 11 (in 0xC-0xF).
+ c |= (1u << 5u) | (1u << 4u);
+ // The second continuation byte of a low surrogate is not restricted,
+ // but we need to track it differently to allow for correct backtracking
+ // if it isn't completed.
+ state = Utf8GenerationState::SecondContinuationByte_LowSurrogate;
+ break;
+ }
+ }
+ str += static_cast<uint8_t>(c);
+ }
+
+ // Backtrack the current incomplete character.
+ switch (state) {
+ case Utf8GenerationState::SecondContinuationByte_LowSurrogate:
+ str.pop_back();
+ [[fallthrough]];
+ case Utf8GenerationState::FirstContinuationByte_LowSurrogate:
+ str.pop_back();
+ [[fallthrough]];
+ case Utf8GenerationState::LeadingByte_LowSurrogate:
+ str.pop_back();
+ [[fallthrough]];
+ case Utf8GenerationState::SecondContinuationByte_Generic:
+ case Utf8GenerationState::SecondContinuationByte_HighSurrogate:
+ str.pop_back();
+ [[fallthrough]];
+ case Utf8GenerationState::ContinuationByte_Generic:
+ case Utf8GenerationState::ContinuationByte_LowLeadingByte:
+ case Utf8GenerationState::FirstContinuationByte_Generic:
+ case Utf8GenerationState::FirstContinuationByte_LowLeadingByte:
+ case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte:
+ str.pop_back();
+ [[fallthrough]];
+ case Utf8GenerationState::LeadingByte_Generic:
+ case Utf8GenerationState::LeadingByte_AfterBackslash:
+ // No backtracking required.
+ break;
+ }
+
+done:
+ return std::make_pair(str, pos - data);
+}
+} // namespace
+
+namespace jazzer {
+// Exposed for testing only.
+std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data,
+ jint max_bytes, jint max_length,
+ bool ascii_only,
+ bool stop_on_backslash) {
+ if (ascii_only) {
+ if (stop_on_backslash) {
+ return ::FixUpModifiedUtf8<true, true>(data, max_bytes, max_length);
+ } else {
+ return ::FixUpModifiedUtf8<true, false>(data, max_bytes, max_length);
+ }
+ } else {
+ if (stop_on_backslash) {
+ return ::FixUpModifiedUtf8<false, true>(data, max_bytes, max_length);
+ } else {
+ return ::FixUpModifiedUtf8<false, false>(data, max_bytes, max_length);
+ }
+ }
+}
+} // namespace jazzer
+
+namespace {
+jstring ConsumeStringInternal(JNIEnv &env, jobject self, jint max_length,
+ bool ascii_only, bool stop_on_backslash) {
+ if (max_length < 0) {
+ ThrowIllegalArgumentException(env, "maxLength must not be negative");
+ return nullptr;
+ }
+
+ const auto *dataPtr =
+ reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField));
+ jint remainingBytes = env.GetIntField(self, gRemainingBytesField);
+
+ if (max_length == 0 || remainingBytes == 0) return env.NewStringUTF("");
+
+ if (remainingBytes == 1) {
+ env.SetIntField(self, gRemainingBytesField, 0);
+ return env.NewStringUTF("");
+ }
+
+ std::string str;
+ jint consumed_bytes;
+ std::tie(str, consumed_bytes) = jazzer::FixUpModifiedUtf8(
+ dataPtr, remainingBytes, max_length, ascii_only, stop_on_backslash);
+ env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + consumed_bytes));
+ env.SetIntField(self, gRemainingBytesField, remainingBytes - consumed_bytes);
+ return env.NewStringUTF(str.c_str());
+}
+
+jstring JNICALL ConsumeAsciiString(JNIEnv &env, jobject self, jint max_length) {
+ return ConsumeStringInternal(env, self, max_length, true, true);
+}
+
+jstring JNICALL ConsumeString(JNIEnv &env, jobject self, jint max_length) {
+ return ConsumeStringInternal(env, self, max_length, false, true);
+}
+
+jstring JNICALL ConsumeRemainingAsAsciiString(JNIEnv &env, jobject self) {
+ return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(),
+ true, false);
+}
+
+jstring JNICALL ConsumeRemainingAsString(JNIEnv &env, jobject self) {
+ return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(),
+ false, false);
+}
+
+std::size_t RemainingBytes(JNIEnv &env, jobject self) {
+ return env.GetIntField(self, gRemainingBytesField);
+}
+
+const JNINativeMethod kFuzzedDataMethods[]{
+ {(char *)"consumeBoolean", (char *)"()Z", (void *)&ConsumeBool},
+ {(char *)"consumeByte", (char *)"()B", (void *)&ConsumeIntegral<jbyte>},
+ {(char *)"consumeByteUnchecked", (char *)"(BB)B",
+ (void *)&ConsumeIntegralInRange<jbyte>},
+ {(char *)"consumeShort", (char *)"()S", (void *)&ConsumeIntegral<jshort>},
+ {(char *)"consumeShortUnchecked", (char *)"(SS)S",
+ (void *)&ConsumeIntegralInRange<jshort>},
+ {(char *)"consumeInt", (char *)"()I", (void *)&ConsumeIntegral<jint>},
+ {(char *)"consumeIntUnchecked", (char *)"(II)I",
+ (void *)&ConsumeIntegralInRange<jint>},
+ {(char *)"consumeLong", (char *)"()J", (void *)&ConsumeIntegral<jlong>},
+ {(char *)"consumeLongUnchecked", (char *)"(JJ)J",
+ (void *)&ConsumeIntegralInRange<jlong>},
+ {(char *)"consumeFloat", (char *)"()F", (void *)&ConsumeFloat<jfloat>},
+ {(char *)"consumeRegularFloat", (char *)"()F",
+ (void *)&ConsumeRegularFloat<jfloat>},
+ {(char *)"consumeRegularFloatUnchecked", (char *)"(FF)F",
+ (void *)&ConsumeFloatInRange<jfloat>},
+ {(char *)"consumeProbabilityFloat", (char *)"()F",
+ (void *)&ConsumeProbability<jfloat>},
+ {(char *)"consumeDouble", (char *)"()D", (void *)&ConsumeFloat<jdouble>},
+ {(char *)"consumeRegularDouble", (char *)"()D",
+ (void *)&ConsumeRegularFloat<jdouble>},
+ {(char *)"consumeRegularDoubleUnchecked", (char *)"(DD)D",
+ (void *)&ConsumeFloatInRange<jdouble>},
+ {(char *)"consumeProbabilityDouble", (char *)"()D",
+ (void *)&ConsumeProbability<jdouble>},
+ {(char *)"consumeChar", (char *)"()C", (void *)&ConsumeChar},
+ {(char *)"consumeCharUnchecked", (char *)"(CC)C",
+ (void *)&ConsumeIntegralInRange<jchar>},
+ {(char *)"consumeCharNoSurrogates", (char *)"()C",
+ (void *)&ConsumeCharNoSurrogates},
+ {(char *)"consumeAsciiString", (char *)"(I)Ljava/lang/String;",
+ (void *)&ConsumeAsciiString},
+ {(char *)"consumeRemainingAsAsciiString", (char *)"()Ljava/lang/String;",
+ (void *)&ConsumeRemainingAsAsciiString},
+ {(char *)"consumeString", (char *)"(I)Ljava/lang/String;",
+ (void *)&ConsumeString},
+ {(char *)"consumeRemainingAsString", (char *)"()Ljava/lang/String;",
+ (void *)&ConsumeRemainingAsString},
+ {(char *)"consumeBooleans", (char *)"(I)[Z",
+ (void *)&ConsumeIntegralArray<jboolean>},
+ {(char *)"consumeBytes", (char *)"(I)[B",
+ (void *)&ConsumeIntegralArray<jbyte>},
+ {(char *)"consumeShorts", (char *)"(I)[S",
+ (void *)&ConsumeIntegralArray<jshort>},
+ {(char *)"consumeInts", (char *)"(I)[I",
+ (void *)&ConsumeIntegralArray<jint>},
+ {(char *)"consumeLongs", (char *)"(I)[J",
+ (void *)&ConsumeIntegralArray<jlong>},
+ {(char *)"consumeRemainingAsBytes", (char *)"()[B",
+ (void *)&ConsumeRemainingAsArray<jbyte>},
+ {(char *)"remainingBytes", (char *)"()I", (void *)&RemainingBytes},
+};
+const jint kNumFuzzedDataMethods =
+ sizeof(kFuzzedDataMethods) / sizeof(kFuzzedDataMethods[0]);
+} // namespace
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_driver_FuzzedDataProviderImpl_nativeInit(
+ JNIEnv *env, jclass clazz) {
+ env->RegisterNatives(clazz, kFuzzedDataMethods, kNumFuzzedDataMethods);
+ gDataPtrField = env->GetFieldID(clazz, "dataPtr", "J");
+ gRemainingBytesField = env->GetFieldID(clazz, "remainingBytes", "I");
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp
new file mode 100644
index 00000000..2395cd97
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp
@@ -0,0 +1,98 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace jazzer {
+std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *pos,
+ jint max_bytes, jint max_length,
+ bool ascii_only,
+ bool stop_on_backslash);
+}
+
+std::pair<std::string, jint> FixUpRemainingModifiedUtf8(
+ const std::string &str, bool ascii_only, bool stop_on_backslash) {
+ return jazzer::FixUpModifiedUtf8(
+ reinterpret_cast<const uint8_t *>(str.c_str()), str.length(),
+ std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash);
+}
+
+std::pair<std::string, jint> expect(const std::string &s, jint i) {
+ return std::make_pair(s, i);
+}
+
+using namespace std::literals::string_literals;
+TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) {
+ EXPECT_EQ(expect("jazzer"s, 6),
+ FixUpRemainingModifiedUtf8("jazzer"s, false, false));
+ EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false));
+ EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false));
+ EXPECT_EQ(expect("ja\\zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false));
+ EXPECT_EQ(expect("ja\\\\zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false));
+ EXPECT_EQ(expect("ۧ"s, 5),
+ FixUpRemainingModifiedUtf8(u8"ۧ"s, false, false));
+}
+
+TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) {
+ EXPECT_EQ(expect("jazzer"s, 6),
+ FixUpRemainingModifiedUtf8("jazzer"s, true, false));
+ EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false));
+ EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false));
+ EXPECT_EQ(expect("ja\\zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false));
+ EXPECT_EQ(expect("ja\\\\zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false));
+ EXPECT_EQ(expect("\x62\x02\x2C\x43\x1F"s, 5),
+ FixUpRemainingModifiedUtf8(u8"ۧ"s, true, false));
+}
+
+TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) {
+ EXPECT_EQ(expect("jazzer"s, 6),
+ FixUpRemainingModifiedUtf8("jazzer"s, false, true));
+ EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true));
+ EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true));
+ EXPECT_EQ(expect("ja"s, 4),
+ FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true));
+ EXPECT_EQ(expect("ja\\zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true));
+}
+
+TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) {
+ EXPECT_EQ(expect("jazzer"s, 6),
+ FixUpRemainingModifiedUtf8("jazzer"s, true, true));
+ EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7),
+ FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true));
+ EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true));
+ EXPECT_EQ(expect("ja"s, 4),
+ FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true));
+ EXPECT_EQ(expect("ja\\zzer"s, 8),
+ FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true));
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp
new file mode 100644
index 00000000..23a86c53
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp
@@ -0,0 +1,56 @@
+// Copyright 2022 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <dlfcn.h>
+#include <jni.h>
+
+#include <cstdlib>
+
+#if defined(_ANDROID)
+#define __jni_version__ JNI_VERSION_1_6
+#else
+#define __jni_version__ JNI_VERSION_1_8
+#endif
+
+// The jazzer_preload library, if used, forwards all calls to native libFuzzer
+// hooks such as __sanitizer_cov_trace_cmp8 to the Jazzer JNI library. In order
+// to load the hook symbols when the library is ready, it needs to be passed a
+// handle - the JVM loads libraries with RTLD_LOCAL and thus their symbols
+// wouldn't be found as part of the global lookup procedure.
+jint JNI_OnLoad(JavaVM *, void *) {
+ Dl_info info;
+
+ if (!dladdr(reinterpret_cast<const void *>(&JNI_OnLoad), &info) ||
+ !info.dli_fname) {
+ fprintf(stderr, "Failed to determine our dli_fname\n");
+ abort();
+ }
+
+ void *handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY);
+ if (handle == nullptr) {
+ fprintf(stderr, "Failed to dlopen self: %s\n", dlerror());
+ abort();
+ }
+
+ void *preload_init = dlsym(RTLD_DEFAULT, "jazzer_preload_init");
+ // jazzer_preload is only preloaded when Jazzer is started with --native, so
+ // not finding this method is an expected error.
+ if (preload_init) {
+ reinterpret_cast<void (*)(void *)>(preload_init)(handle);
+ }
+
+ dlclose(handle);
+
+ return __jni_version__;
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp
new file mode 100644
index 00000000..8764aaaa
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp
@@ -0,0 +1,184 @@
+// Copyright 2022 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include <cstddef>
+#include <cstdint>
+
+#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h"
+#include "sanitizer_hooks_with_pc.h"
+
+namespace {
+
+extern "C" {
+void __sanitizer_weak_hook_compare_bytes(void *caller_pc, const void *s1,
+ const void *s2, std::size_t n1,
+ std::size_t n2, int result);
+void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
+ const void *s2, size_t len2, void *result);
+}
+
+inline __attribute__((always_inline)) void *idToPc(jint id) {
+ return reinterpret_cast<void *>(static_cast<uintptr_t>(id));
+}
+} // namespace
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0(
+ JNIEnv *env, jclass cls, jbyteArray needle, jint id) {
+ jint needle_length = env->GetArrayLength(needle);
+ auto *needle_native =
+ static_cast<jbyte *>(env->GetPrimitiveArrayCritical(needle, nullptr));
+ __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native,
+ needle_length, nullptr);
+ env->ReleasePrimitiveArrayCritical(needle, needle_native, JNI_ABORT);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0(
+ jint needle_length, jbyte *needle_native, jint id) {
+ __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native,
+ needle_length, nullptr);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp(
+ JNIEnv *env, jclass cls, jbyteArray b1, jbyteArray b2, jint result,
+ jint id) {
+ jint b1_length = env->GetArrayLength(b1);
+ jint b2_length = env->GetArrayLength(b2);
+ auto *b1_native =
+ static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b1, nullptr));
+ auto *b2_native =
+ static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b2, nullptr));
+ __sanitizer_weak_hook_compare_bytes(idToPc(id), b1_native, b2_native,
+ b1_length, b2_length, result);
+ env->ReleasePrimitiveArrayCritical(b1, b1_native, JNI_ABORT);
+ env->ReleasePrimitiveArrayCritical(b2, b2_native, JNI_ABORT);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp(
+ jint b1_length, jbyte *b1, jint b2_length, jbyte *b2, jint result,
+ jint id) {
+ __sanitizer_weak_hook_compare_bytes(idToPc(id), b1, b2, b1_length, b2_length,
+ result);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong(
+ JNIEnv *env, jclass cls, jlong value1, jlong value2, jint id) {
+ __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong(
+ jlong value1, jlong value2, jint id) {
+ __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt(
+ JNIEnv *env, jclass cls, jint value1, jint value2, jint id) {
+ __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt(
+ jint value1, jint value2, jint id) {
+ __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt(
+ JNIEnv *env, jclass cls, jint value1, jint value2, jint id) {
+ __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt(
+ jint value1, jint value2, jint id) {
+ __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch(
+ JNIEnv *env, jclass cls, jlong switch_value,
+ jlongArray libfuzzer_case_values, jint id) {
+ auto *case_values = static_cast<jlong *>(
+ env->GetPrimitiveArrayCritical(libfuzzer_case_values, nullptr));
+ __sanitizer_cov_trace_switch_with_pc(
+ idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values));
+ env->ReleasePrimitiveArrayCritical(libfuzzer_case_values, case_values,
+ JNI_ABORT);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch(
+ jlong switch_value, jint libfuzzer_case_values_length, jlong *case_values,
+ jint id) {
+ __sanitizer_cov_trace_switch_with_pc(
+ idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values));
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong(
+ JNIEnv *env, jclass cls, jlong value, jint id) {
+ __sanitizer_cov_trace_div8_with_pc(idToPc(id), value);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong(
+ jlong value, jint id) {
+ __sanitizer_cov_trace_div8_with_pc(idToPc(id), value);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt(
+ JNIEnv *env, jclass cls, jint value, jint id) {
+ __sanitizer_cov_trace_div4_with_pc(idToPc(id), value);
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt(
+ jint value, jint id) {
+ __sanitizer_cov_trace_div4_with_pc(idToPc(id), value);
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep(
+ JNIEnv *env, jclass cls, jlong idx, jint id) {
+ __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx));
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep(
+ jlong idx, jint id) {
+ __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx));
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir(
+ JNIEnv *env, jclass cls, jint caller_id, jint callee_id) {
+ __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id),
+ static_cast<uintptr_t>(callee_id));
+}
+
+extern "C" [[maybe_unused]] JNIEXPORT void JNICALL
+JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir(
+ jint caller_id, jint callee_id) {
+ __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id),
+ static_cast<uintptr_t>(callee_id));
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp
new file mode 100644
index 00000000..b7a0df5d
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp
@@ -0,0 +1,131 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <mutex>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h"
+
+namespace {
+bool is_using_native_libraries = false;
+std::once_flag ignore_list_flag;
+std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges;
+
+/**
+ * Adds the address ranges of executable segments of the library lib_name to
+ * the ignorelist for C standard library function interception (strcmp, memcmp,
+ * ...).
+ */
+void ignoreLibraryForInterception(const std::string &lib_name) {
+ std::ifstream loaded_libs("/proc/self/maps");
+ if (!loaded_libs) {
+ // This early exit is taken e.g. on macOS, where /proc does not exist.
+ return;
+ }
+ std::string line;
+ while (std::getline(loaded_libs, line)) {
+ if (!absl::StrContains(line, lib_name)) continue;
+ // clang-format off
+ // A typical line looks as follows:
+ // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so
+ // clang-format on
+ std::vector<std::string> parts =
+ absl::StrSplit(line, ' ', absl::SkipEmpty());
+ if (parts.size() != 6) {
+ std::cout << "ERROR: Invalid format for /proc/self/maps\n"
+ << line << std::endl;
+ exit(1);
+ }
+ // Skip non-executable address rang"s.
+ if (!absl::StrContains(parts[1], "x")) continue;
+ std::string range_str = parts[0];
+ std::vector<std::string> range = absl::StrSplit(range_str, "-");
+ if (range.size() != 2) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ std::size_t pos;
+ auto start = std::stoull(range[0], &pos, 16);
+ if (pos != range[0].size()) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ auto end = std::stoull(range[1], &pos, 16);
+ if (pos != range[0].size()) {
+ std::cout
+ << "ERROR: Unexpected address range format in /proc/self/maps line: "
+ << range_str << std::endl;
+ exit(1);
+ }
+ ignore_for_interception_ranges.emplace_back(start, end);
+ }
+}
+
+const std::vector<std::string> kLibrariesToIgnoreForInterception = {
+ // The launcher executable itself can be treated just like a library.
+ "jazzer", "libjazzer_preload.so",
+ "libinstrument.so", "libjava.so",
+ "libjimage.so", "libjli.so",
+ "libjvm.so", "libnet.so",
+ "libverify.so", "libzip.so",
+};
+} // namespace
+
+extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc(
+ void *caller_pc) {
+ // If the fuzz target is not using native libraries, calls to strcmp, memcmp,
+ // etc. should never be intercepted. The values reported if they were at best
+ // duplicate the values received from our bytecode instrumentation and at
+ // worst pollute the table of recent compares with string internal to the JDK.
+ if (!is_using_native_libraries) return false;
+ // If the fuzz target is using native libraries, intercept calls only if they
+ // don't originate from those address ranges that are known to belong to the
+ // JDK.
+ return std::none_of(
+ ignore_for_interception_ranges.cbegin(),
+ ignore_for_interception_ranges.cend(),
+ [caller_pc](const std::pair<uintptr_t, uintptr_t> &range) {
+ uintptr_t start;
+ uintptr_t end;
+ std::tie(start, end) = range;
+ auto address = reinterpret_cast<uintptr_t>(caller_pc);
+ return start <= address && address <= end;
+ });
+}
+
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_handleLibraryLoad(
+ JNIEnv *, jclass) {
+ std::call_once(ignore_list_flag, [] {
+ std::cout << "INFO: detected a native library load, enabling interception "
+ "for libc functions"
+ << std::endl;
+ for (const auto &lib_name : kLibrariesToIgnoreForInterception)
+ ignoreLibraryForInterception(lib_name);
+ // Enable the ignore list after it has been populated since vector is not
+ // thread-safe with respect to concurrent writes and reads.
+ is_using_native_libraries = true;
+ });
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp
new file mode 100644
index 00000000..4e21612b
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp
@@ -0,0 +1,31 @@
+// Copyright 2023 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+#include <cstdint>
+
+#include "com_code_intelligence_jazzer_runtime_Mutator.h"
+
+extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+[[maybe_unused]] jint
+Java_com_code_1intelligence_jazzer_runtime_Mutator_defaultMutateNative(
+ JNIEnv *env, jclass, jbyteArray jni_data, jint size) {
+ jint maxSize = env->GetArrayLength(jni_data);
+ uint8_t *data =
+ static_cast<uint8_t *>(env->GetPrimitiveArrayCritical(jni_data, nullptr));
+ jint res = LLVMFuzzerMutate(data, size, maxSize);
+ env->ReleasePrimitiveArrayCritical(jni_data, data, 0);
+ return res;
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h
new file mode 100644
index 00000000..be655adb
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2021 Code Intelligence GmbH
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+// This file declares variants of the libFuzzer compare, division, switch and
+// gep hooks that accept an additional caller_pc argument that can be used to
+// pass a custom value that is recorded as the caller's instruction pointer
+// ("program counter"). This allows synthetic program counters obtained from
+// Java coverage information to be used with libFuzzer's value profile, with
+// which it records detailed information about the result of compares and
+// associates it with particular coverage locations.
+//
+// Note: Only the lower 9 bits of the caller_pc argument are used by libFuzzer.
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __sanitizer_cov_trace_cmp4_with_pc(void *caller_pc, uint32_t arg1,
+ uint32_t arg2);
+void __sanitizer_cov_trace_cmp8_with_pc(void *caller_pc, uint64_t arg1,
+ uint64_t arg2);
+
+void __sanitizer_cov_trace_switch_with_pc(void *caller_pc, uint64_t val,
+ uint64_t *cases);
+
+void __sanitizer_cov_trace_div4_with_pc(void *caller_pc, uint32_t val);
+void __sanitizer_cov_trace_div8_with_pc(void *caller_pc, uint64_t val);
+
+void __sanitizer_cov_trace_gep_with_pc(void *caller_pc, uintptr_t idx);
+
+void __sanitizer_cov_trace_pc_indir_with_pc(void *caller_pc, uintptr_t callee);
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp
new file mode 100644
index 00000000..abc5f04e
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp
@@ -0,0 +1,26 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Suppress libFuzzer warnings about missing sanitizer methods in non-sanitizer
+// builds.
+extern "C" [[maybe_unused]] int __sanitizer_acquire_crash_state() { return 1; }
+
+namespace jazzer {
+void DumpJvmStackTraces();
+}
+
+// Dump a JVM stack trace on timeouts.
+extern "C" [[maybe_unused]] void __sanitizer_print_stack_trace() {
+ jazzer::DumpJvmStackTraces();
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp
new file mode 100644
index 00000000..e284925d
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp
@@ -0,0 +1,40 @@
+// Copyright 2021 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include <atomic>
+#include <csignal>
+
+#include "com_code_intelligence_jazzer_driver_SignalHandler.h"
+
+#ifdef _WIN32
+// Windows does not have SIGUSR1, which triggers a graceful exit of libFuzzer.
+// Instead, trigger a hard exit.
+#define SIGUSR1 SIGTERM
+#endif
+
+// Handles SIGINT raised while running Java code.
+[[maybe_unused]] void
+Java_com_code_1intelligence_jazzer_driver_SignalHandler_handleInterrupt(
+ JNIEnv *, jclass) {
+ static std::atomic<bool> already_exiting{false};
+ if (!already_exiting.exchange(true)) {
+ // Let libFuzzer exit gracefully when the JVM received SIGINT.
+ raise(SIGUSR1);
+ } else {
+ // Exit libFuzzer forcefully on repeated SIGINTs.
+ raise(SIGTERM);
+ }
+}
diff --git a/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c b/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c
new file mode 100644
index 00000000..074c3d22
--- /dev/null
+++ b/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c
@@ -0,0 +1,249 @@
+// Copyright 2022 Code Intelligence GmbH
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * Dynamically exported definitions of fuzzer hooks and libc functions that
+ * forward to the symbols provided by the jazzer_driver JNI library once it has
+ * been loaded.
+ */
+
+#define _GNU_SOURCE // for RTLD_NEXT
+#include <dlfcn.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdint.h>
+#ifdef __APPLE__
+// Using dyld's interpose feature requires knowing the addresses of libc
+// functions.
+#include <string.h>
+#endif
+
+#if defined(__APPLE__) && defined(__arm64__)
+// arm64 has a fixed instruction length of 32 bits, which means that the lowest
+// two bits of the return address of a function are always zero. Since
+// libFuzzer's value profiling uses the lowest bits of the address to index into
+// a hash table, we increase their entropy by shifting away the constant bits.
+#define GET_CALLER_PC() \
+ ((void *)(((uintptr_t)__builtin_return_address(0)) >> 2))
+#else
+#define GET_CALLER_PC() __builtin_return_address(0)
+#endif
+#define LIKELY(x) __builtin_expect(!!(x), 1)
+#define UNLIKELY(x) __builtin_expect(!!(x), 0)
+
+// Unwraps (foo, bar) passed as arguments to foo, bar - this allows passing
+// multiple var args into a single macro.
+#define UNWRAP_VA_ARGS(...) __VA_ARGS__
+
+// Define a dynamic, global symbol such as __sanitizer_weak_hook_memcmp that
+// calls the local symbol of the same name in the jazzer_driver shared library
+// loaded in the JVM.
+#define DEFINE_LIBC_HOOK(name, ret, params, args) \
+ typedef void (*name##_hook_t)(void *, UNWRAP_VA_ARGS params, ret); \
+ static _Atomic name##_hook_t name##_hook; \
+ \
+ __attribute__((visibility("default"))) void __sanitizer_weak_hook_##name( \
+ void *called_pc, UNWRAP_VA_ARGS params, ret result) { \
+ name##_hook_t hook = \
+ atomic_load_explicit(&name##_hook, memory_order_relaxed); \
+ if (LIKELY(hook != NULL)) { \
+ hook(called_pc, UNWRAP_VA_ARGS args, result); \
+ } \
+ }
+
+#define INIT_LIBC_HOOK(handle, name) \
+ atomic_store(&name##_hook, dlsym(handle, "__sanitizer_weak_hook_" #name))
+
+#ifdef __linux__
+// Alternate definitions for libc functions mimicking those that libFuzzer would
+// provide if it were linked into the JVM. All these functions invoke the real
+// libc function loaded from the next library in search order (either libc
+// itself or a sanitizer's interceptor).
+//
+// Function pointers have to be loaded and stored atomically even if libc
+// functions are invoked from different threads, but we do not need any
+// synchronization guarantees - in the worst case, we will non-deterministically
+// lose a few hook invocations.
+
+#define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args) \
+ DEFINE_LIBC_HOOK(name, ret, params, args) \
+ \
+ typedef ret (*name##_t)(UNWRAP_VA_ARGS params); \
+ static _Atomic name##_t name##_real; \
+ \
+ __attribute__((visibility("default"))) ret name(UNWRAP_VA_ARGS params) { \
+ name##_t name##_real_local = \
+ atomic_load_explicit(&name##_real, memory_order_relaxed); \
+ if (UNLIKELY(name##_real_local == NULL)) { \
+ name##_real_local = dlsym(RTLD_NEXT, #name); \
+ atomic_store_explicit(&name##_real, name##_real_local, \
+ memory_order_relaxed); \
+ } \
+ ret result = name##_real_local(UNWRAP_VA_ARGS args); \
+ __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args, \
+ result); \
+ return result; \
+ }
+
+#elif __APPLE__
+// macOS namespace concept makes it impossible to override symbols in shared
+// library dependencies simply by defining them. Instead, the dynamic linker's
+// interpose feature is used to request that one function, identified by its
+// address, is replaced by another at runtime.
+
+typedef struct {
+ const uintptr_t interceptor;
+ const uintptr_t func;
+} interpose_t;
+
+#define INTERPOSE(_interceptor, _func) \
+ __attribute__((used)) static interpose_t _interpose_##_func \
+ __attribute__((section("__DATA,__interpose"))) = { \
+ (uintptr_t)&_interceptor, (uintptr_t)&_func};
+
+#define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args) \
+ DEFINE_LIBC_HOOK(name, ret, params, args) \
+ \
+ __attribute__((visibility("default"))) \
+ ret interposed_##name(UNWRAP_VA_ARGS params) { \
+ ret result = name(UNWRAP_VA_ARGS args); \
+ __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args, \
+ result); \
+ return result; \
+ } \
+ \
+ INTERPOSE(interposed_##name, name)
+#else
+// TODO: Use https://github.com/microsoft/Detours to add Windows support.
+#error "jazzer_preload is not supported on this OS"
+#endif
+
+DEFINE_LIBC_INTERCEPTOR(bcmp, int, (const void *s1, const void *s2, size_t n),
+ (s1, s2, n))
+DEFINE_LIBC_INTERCEPTOR(memcmp, int, (const void *s1, const void *s2, size_t n),
+ (s1, s2, n))
+DEFINE_LIBC_INTERCEPTOR(strncmp, int,
+ (const char *s1, const char *s2, size_t n), (s1, s2, n))
+DEFINE_LIBC_INTERCEPTOR(strncasecmp, int,
+ (const char *s1, const char *s2, size_t n), (s1, s2, n))
+DEFINE_LIBC_INTERCEPTOR(strcmp, int, (const char *s1, const char *s2), (s1, s2))
+DEFINE_LIBC_INTERCEPTOR(strcasecmp, int, (const char *s1, const char *s2),
+ (s1, s2))
+DEFINE_LIBC_INTERCEPTOR(strstr, char *, (const char *s1, const char *s2),
+ (s1, s2))
+DEFINE_LIBC_INTERCEPTOR(strcasestr, char *, (const char *s1, const char *s2),
+ (s1, s2))
+DEFINE_LIBC_INTERCEPTOR(memmem, void *,
+ (const void *s1, size_t n1, const void *s2, size_t n2),
+ (s1, n1, s2, n2))
+
+// Native libraries instrumented for fuzzing include references to fuzzer hooks
+// that are resolved by the dynamic linker. We need to route these to the
+// corresponding local symbols in the Jazzer driver JNI library.
+// The __sanitizer_cov_trace_* family of functions is only invoked from code
+// compiled with -fsanitize=fuzzer. We can assume that the Jazzer JNI library
+// has been loaded before any such code, which necessarily belongs to the fuzz
+// target, is executed and thus don't need NULL checks.
+#define DEFINE_TRACE_HOOK(name, params, args) \
+ typedef void (*trace_##name##_t)(void *, UNWRAP_VA_ARGS params); \
+ static _Atomic trace_##name##_t trace_##name##_with_pc; \
+ \
+ __attribute__((visibility("default"))) void __sanitizer_cov_trace_##name( \
+ UNWRAP_VA_ARGS params) { \
+ trace_##name##_t hook = \
+ atomic_load_explicit(&trace_##name##_with_pc, memory_order_relaxed); \
+ hook(GET_CALLER_PC(), UNWRAP_VA_ARGS args); \
+ }
+
+#define INIT_TRACE_HOOK(handle, name) \
+ atomic_store(&trace_##name##_with_pc, \
+ dlsym(handle, "__sanitizer_cov_trace_" #name "_with_pc"))
+
+DEFINE_TRACE_HOOK(cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2));
+
+DEFINE_TRACE_HOOK(const_cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(const_cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(const_cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2));
+DEFINE_TRACE_HOOK(const_cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2));
+
+DEFINE_TRACE_HOOK(switch, (uint64_t val, uint64_t *cases), (val, cases));
+
+DEFINE_TRACE_HOOK(div4, (uint32_t arg), (arg))
+DEFINE_TRACE_HOOK(div8, (uint64_t arg), (arg))
+
+DEFINE_TRACE_HOOK(gep, (uintptr_t arg), (arg))
+
+DEFINE_TRACE_HOOK(pc_indir, (uintptr_t arg), (arg))
+
+typedef void (*cov_8bit_counters_init_t)(uint8_t *, uint8_t *);
+static _Atomic cov_8bit_counters_init_t cov_8bit_counters_init;
+typedef void (*cov_pcs_init_t)(const uintptr_t *, const uintptr_t *);
+static _Atomic cov_pcs_init_t cov_pcs_init;
+
+__attribute__((visibility("default"))) void __sanitizer_cov_8bit_counters_init(
+ uint8_t *start, uint8_t *end) {
+ cov_8bit_counters_init_t init =
+ atomic_load_explicit(&cov_8bit_counters_init, memory_order_relaxed);
+ init(start, end);
+}
+
+__attribute__((visibility("default"))) void __sanitizer_cov_pcs_init(
+ const uintptr_t *pcs_beg, const uintptr_t *pcs_end) {
+ cov_pcs_init_t init =
+ atomic_load_explicit(&cov_pcs_init, memory_order_relaxed);
+ init(pcs_beg, pcs_end);
+}
+
+// TODO: This is never updated and thus doesn't provide any information to the
+// fuzzer.
+__attribute__((
+ visibility("default"))) _Thread_local uintptr_t __sancov_lowest_stack = 0;
+
+__attribute__((visibility("default"))) void jazzer_preload_init(void *handle) {
+ INIT_LIBC_HOOK(handle, bcmp);
+ INIT_LIBC_HOOK(handle, memcmp);
+ INIT_LIBC_HOOK(handle, strncmp);
+ INIT_LIBC_HOOK(handle, strcmp);
+ INIT_LIBC_HOOK(handle, strncasecmp);
+ INIT_LIBC_HOOK(handle, strcasecmp);
+ INIT_LIBC_HOOK(handle, strstr);
+ INIT_LIBC_HOOK(handle, strcasestr);
+ INIT_LIBC_HOOK(handle, memmem);
+
+ INIT_TRACE_HOOK(handle, cmp1);
+ INIT_TRACE_HOOK(handle, cmp2);
+ INIT_TRACE_HOOK(handle, cmp4);
+ INIT_TRACE_HOOK(handle, cmp8);
+
+ INIT_TRACE_HOOK(handle, const_cmp1);
+ INIT_TRACE_HOOK(handle, const_cmp2);
+ INIT_TRACE_HOOK(handle, const_cmp4);
+ INIT_TRACE_HOOK(handle, const_cmp8);
+
+ INIT_TRACE_HOOK(handle, switch);
+
+ INIT_TRACE_HOOK(handle, div4);
+ INIT_TRACE_HOOK(handle, div8);
+
+ INIT_TRACE_HOOK(handle, gep);
+
+ INIT_TRACE_HOOK(handle, pc_indir);
+
+ atomic_store(&cov_8bit_counters_init,
+ dlsym(handle, "__sanitizer_cov_8bit_counters_init"));
+ atomic_store(&cov_pcs_init, dlsym(handle, "__sanitizer_cov_pcs_init"));
+}