diff options
Diffstat (limited to 'src/main/native/com')
22 files changed, 2931 insertions, 0 deletions
diff --git a/src/main/native/com/code_intelligence/jazzer/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/BUILD.bazel new file mode 100644 index 00000000..689adc9a --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/BUILD.bazel @@ -0,0 +1,60 @@ +load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library") +load("//bazel:compat.bzl", "MULTI_PLATFORM", "SKIP_ON_WINDOWS") + +DYNAMIC_SYMBOLS_TO_EXPORT = [ + "__sancov_lowest_stack", + "__sanitizer_cov_8bit_counters_init", + "__sanitizer_cov_pcs_init", + "__sanitizer_cov_trace_cmp1", + "__sanitizer_cov_trace_cmp4", + "__sanitizer_cov_trace_cmp4", + "__sanitizer_cov_trace_cmp8", + "__sanitizer_cov_trace_const_cmp1", + "__sanitizer_cov_trace_const_cmp4", + "__sanitizer_cov_trace_const_cmp4", + "__sanitizer_cov_trace_const_cmp8", + "__sanitizer_cov_trace_div4", + "__sanitizer_cov_trace_div8", + "__sanitizer_cov_trace_gep", + "__sanitizer_cov_trace_pc_indir", + "__sanitizer_cov_trace_switch", + "__sanitizer_weak_hook_memcmp", + "__sanitizer_weak_hook_memmem", + "__sanitizer_weak_hook_strcasecmp", + "__sanitizer_weak_hook_strcasestr", + "__sanitizer_weak_hook_strcmp", + "__sanitizer_weak_hook_strncasecmp", + "__sanitizer_weak_hook_strncmp", + "__sanitizer_weak_hook_strstr", + "bcmp", + "jazzer_preload_init", + "memcmp", + "memmem", + "strcasecmp", + "strcasestr", + "strcmp", + "strncasecmp", + "strncmp", + "strstr", +] + +cc_jni_library( + name = "jazzer_preload", + srcs = ["jazzer_preload.c"], + linkopts = select({ + "@platforms//os:linux": [ + "-Wl,--export-dynamic-symbol=" + symbol + for symbol in DYNAMIC_SYMBOLS_TO_EXPORT + ] + [ + "-ldl", + ], + "@platforms//os:macos": [ + "-ldl", + ], + "//conditions:default": [], + }), + platforms = MULTI_PLATFORM, + target_compatible_with = SKIP_ON_WINDOWS, + visibility = ["//src/main/java/com/code_intelligence/jazzer:__pkg__"], + deps = ["//src/main/native/com/code_intelligence/jazzer/driver:sanitizer_hooks_with_pc"], +) diff --git a/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel new file mode 100644 index 00000000..74f98cda --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/android/BUILD.bazel @@ -0,0 +1,47 @@ +load("//bazel:compat.bzl", "SKIP_ON_WINDOWS") +load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library") +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") + +copy_file( + name = "jvmti_h_encoded", + src = "@android_jvmti//file", + out = "jvmti.encoded", + is_executable = False, + tags = ["manual"], + target_compatible_with = SKIP_ON_WINDOWS, +) + +genrule( + name = "jvmti_h", + srcs = [ + "jvmti.encoded", + ], + outs = ["jvmti.h"], + cmd = "base64 --decode $< > $(OUTS)", + tags = ["manual"], + target_compatible_with = SKIP_ON_WINDOWS, +) + +cc_jni_library( + name = "android_native_agent", + srcs = [ + "dex_file_manager.cpp", + "dex_file_manager.h", + "jazzer_jvmti_allocator.h", + "native_agent.cpp", + ":jvmti_h", + ], + includes = [ + ".", + ], + linkopts = [ + "-lz", + ], + tags = ["manual"], + target_compatible_with = SKIP_ON_WINDOWS, + visibility = ["//visibility:public"], + deps = [ + "@com_google_absl//absl/strings", + "@jazzer_slicer", + ], +) diff --git a/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp new file mode 100644 index 00000000..b409e82b --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.cpp @@ -0,0 +1,208 @@ +// Copyright 2023 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "dex_file_manager.h" + +#include <algorithm> +#include <iostream> +#include <sstream> +#include <string> +#include <vector> + +#include "jazzer_jvmti_allocator.h" +#include "jvmti.h" +#include "slicer/dex_ir.h" +#include "slicer/reader.h" +#include "slicer/writer.h" + +std::string GetName(const char* name) { + std::stringstream ss; + // Class name needs to be in the format "L<class_name>;" as it is stored in + // the types table in the DEX file for slicer to find it + ss << "L" << name << ";"; + return ss.str(); +} + +bool IsValidIndex(dex::u4 index) { return index != (unsigned)-1; } + +void DexFileManager::addDexFile(const unsigned char* bytes, int length) { + unsigned char* newArr = new unsigned char[length]; + std::copy(bytes, bytes + length, newArr); + + dexFiles.push_back(newArr); + dexFilesSize.push_back(length); +} + +unsigned char* DexFileManager::getClassBytes(const char* className, + int dexFileIndex, jvmtiEnv* jvmti, + size_t* newSize) { + dex::Reader dexReader(dexFiles[dexFileIndex], dexFilesSize[dexFileIndex]); + auto descName = GetName(className); + + auto classIndex = dexReader.FindClassIndex(descName.c_str()); + if (!IsValidIndex(classIndex)) { + *newSize = *newSize; + return nullptr; + } + + dexReader.CreateClassIr(classIndex); + auto oldIr = dexReader.GetIr(); + + dex::Writer writer(oldIr); + JazzerJvmtiAllocator allocator(jvmti); + return writer.CreateImage(&allocator, newSize); +} + +uint32_t DexFileManager::findDexFileForClass(const char* className) { + for (int i = 0; i < dexFiles.size(); i++) { + dex::Reader dexReader(dexFiles[i], dexFilesSize[i]); + + std::string descName = GetName(className); + dex::u4 classIndex = dexReader.FindClassIndex(descName.c_str()); + + if (IsValidIndex(classIndex)) { + return i; + } + } + + return -1; +} + +std::vector<std::string> getMethodDescriptions( + std::vector<ir::EncodedMethod*>* encMethodList) { + std::vector<std::string> methodDescs; + + for (int i = 0; i < encMethodList->size(); i++) { + std::stringstream ss; + ss << (*encMethodList)[i]->access_flags; + ss << (*encMethodList)[i]->decl->name->c_str(); + ss << (*encMethodList)[i]->decl->prototype->Signature().c_str(); + + methodDescs.push_back(ss.str()); + } + + sort(methodDescs.begin(), methodDescs.end()); + return methodDescs; +} + +std::vector<std::string> getFieldDescriptions( + std::vector<ir::EncodedField*>* encFieldList) { + std::vector<std::string> fieldDescs; + + for (int i = 0; i < encFieldList->size(); i++) { + std::stringstream ss; + ss << (*encFieldList)[i]->access_flags; + ss << (*encFieldList)[i]->decl->type->descriptor->c_str(); + ss << (*encFieldList)[i]->decl->name->c_str(); + fieldDescs.push_back(ss.str()); + } + + sort(fieldDescs.begin(), fieldDescs.end()); + return fieldDescs; +} + +bool matchFields(std::vector<ir::EncodedField*>* encodedFieldListOne, + std::vector<ir::EncodedField*>* encodedFieldListTwo) { + std::vector<std::string> fDescListOne = + getFieldDescriptions(encodedFieldListOne); + std::vector<std::string> fDescListTwo = + getFieldDescriptions(encodedFieldListTwo); + + if (fDescListOne.size() != fDescListTwo.size()) { + return false; + } + + for (int i = 0; i < fDescListOne.size(); i++) { + if (fDescListOne[i] != fDescListTwo[i]) { + return false; + } + } + + return true; +} + +bool matchMethods(std::vector<ir::EncodedMethod*>* encodedMethodListOne, + std::vector<ir::EncodedMethod*>* encodedMethodListTwo) { + std::vector<std::string> mDescListOne = + getMethodDescriptions(encodedMethodListOne); + std::vector<std::string> mDescListTwo = + getMethodDescriptions(encodedMethodListTwo); + + if (mDescListOne.size() != mDescListTwo.size()) { + return false; + } + + for (int i = 0; i < mDescListOne.size(); i++) { + if (mDescListOne[i] != mDescListTwo[i]) { + return false; + } + } + + return true; +} + +bool classStructureMatches(ir::Class* classOne, ir::Class* classTwo) { + return matchMethods(&(classOne->direct_methods), + &(classTwo->direct_methods)) && + matchMethods(&(classOne->virtual_methods), + &(classTwo->virtual_methods)) && + matchFields(&(classOne->static_fields), &(classTwo->static_fields)) && + matchFields(&(classOne->instance_fields), + &(classTwo->instance_fields)) && + classOne->access_flags == classTwo->access_flags; +} + +bool DexFileManager::structureMatches(dex::Reader* oldReader, + dex::Reader* newReader, + const char* className) { + std::string descName = GetName(className); + + dex::u4 oldReaderIndex = oldReader->FindClassIndex(descName.c_str()); + dex::u4 newReaderIndex = newReader->FindClassIndex(descName.c_str()); + + if (!IsValidIndex(oldReaderIndex) || !IsValidIndex(newReaderIndex)) { + return false; + } + + oldReader->CreateClassIr(oldReaderIndex); + newReader->CreateClassIr(newReaderIndex); + + std::shared_ptr<ir::DexFile> oldDexFile = oldReader->GetIr(); + std::shared_ptr<ir::DexFile> newDexFile = newReader->GetIr(); + + for (int i = 0; i < oldDexFile->classes.size(); i++) { + const char* oldClassDescriptor = + oldDexFile->classes[i]->type->descriptor->c_str(); + if (strcmp(oldClassDescriptor, descName.c_str()) != 0) { + continue; + } + + bool match = false; + for (int j = 0; j < newDexFile->classes.size(); j++) { + const char* newClassDescriptor = + newDexFile->classes[j]->type->descriptor->c_str(); + if (strcmp(oldClassDescriptor, newClassDescriptor) == 0) { + match = classStructureMatches(oldDexFile->classes[i].get(), + newDexFile->classes[j].get()); + break; + } + } + + if (!match) { + return false; + } + } + + return true; +} diff --git a/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h new file mode 100644 index 00000000..2b7dd67a --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/android/dex_file_manager.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> + +#include "jvmti.h" +#include "slicer/reader.h" + +// DexFileManager will contain the contents to multiple DEX files +class DexFileManager { + public: + DexFileManager() {} + + void addDexFile(const unsigned char* bytes, int length); + unsigned char* getClassBytes(const char* className, int dexFileIndex, + jvmtiEnv* jvmti, size_t* newSize); + uint32_t findDexFileForClass(const char* className); + bool structureMatches(dex::Reader* oldReader, dex::Reader* newReader, + const char* className); + + private: + std::vector<unsigned char*> dexFiles; + std::vector<int> dexFilesSize; +}; diff --git a/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h b/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h new file mode 100644 index 00000000..0748c177 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/android/jazzer_jvmti_allocator.h @@ -0,0 +1,52 @@ +/* + * Copyright 2023 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <iostream> + +#include "slicer/writer.h" + +class JazzerJvmtiAllocator : public dex::Writer::Allocator { + public: + JazzerJvmtiAllocator(jvmtiEnv* jvmti_env) : jvmti_env_(jvmti_env) {} + + virtual void* Allocate(size_t size) { + unsigned char* alloc = nullptr; + jvmtiError error_num = jvmti_env_->Allocate(size, &alloc); + + if (error_num != JVMTI_ERROR_NONE) { + std::cerr << "JazzerJvmtiAllocator Allocation error. JVMTI error: " + << error_num << std::endl; + } + + return (void*)alloc; + } + + virtual void Free(void* ptr) { + if (ptr == nullptr) { + return; + } + + jvmtiError error_num = jvmti_env_->Deallocate((unsigned char*)ptr); + + if (error_num != JVMTI_ERROR_NONE) { + std::cout << "JazzerJvmtiAllocator Free error. JVMTI error: " << error_num + << std::endl; + } + } + + private: + jvmtiEnv* jvmti_env_; +}; diff --git a/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp b/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp new file mode 100644 index 00000000..9f0b2ad8 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/android/native_agent.cpp @@ -0,0 +1,313 @@ +// Copyright 2023 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <fstream> +#include <iostream> +#include <map> +#include <memory> +#include <sstream> +#include <string> +#include <unordered_set> +#include <vector> + +#include "absl/strings/str_split.h" +#include "dex_file_manager.h" +#include "jazzer_jvmti_allocator.h" +#include "jvmti.h" +#include "slicer/arrayview.h" +#include "slicer/dex_format.h" +#include "slicer/reader.h" +#include "slicer/writer.h" + +static std::string agentOptions; +static DexFileManager dfm; + +const std::string kAndroidAgentClass = + "com/code_intelligence/jazzer/android/DexFileManager"; + +void retransformLoadedClasses(jvmtiEnv* jvmti, JNIEnv* env) { + jint classCount = 0; + jclass* classes; + + jvmti->GetLoadedClasses(&classCount, &classes); + + std::vector<jclass> classesToRetransform; + for (int i = 0; i < classCount; i++) { + jboolean isModifiable = false; + jvmti->IsModifiableClass(classes[i], &isModifiable); + + if ((bool)isModifiable) { + classesToRetransform.push_back(classes[i]); + } + } + + jvmtiError errorNum = jvmti->RetransformClasses(classesToRetransform.size(), + &classesToRetransform[0]); + if (errorNum != JVMTI_ERROR_NONE) { + std::cerr << "Could not retransform classes. JVMTI error: " << errorNum + << std::endl; + exit(1); + } +} + +std::vector<std::string> getDexFiles(std::string jarPath, JNIEnv* env) { + jclass jazzerClass = env->FindClass(kAndroidAgentClass.c_str()); + if (jazzerClass == nullptr) { + std::cerr << kAndroidAgentClass << " could not be found" << std::endl; + exit(1); + } + + const char* getDexFilesFunction = "getDexFilesForJar"; + jmethodID getDexFilesForJar = + env->GetStaticMethodID(jazzerClass, getDexFilesFunction, + "(Ljava/lang/String;)[Ljava/lang/String;"); + if (getDexFilesForJar == nullptr) { + std::cerr << getDexFilesFunction << " could not be found\n"; + exit(1); + } + + jstring jJarFile = env->NewStringUTF(jarPath.data()); + jobjectArray dexFilesArray = (jobjectArray)env->CallStaticObjectMethod( + jazzerClass, getDexFilesForJar, jJarFile); + + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + exit(1); + } + + int length = env->GetArrayLength(dexFilesArray); + + std::vector<std::string> dexFilesResult; + for (int i = 0; i < length; i++) { + jstring dexFileJstring = + (jstring)env->GetObjectArrayElement(dexFilesArray, i); + const char* dexFileChars = env->GetStringUTFChars(dexFileJstring, NULL); + std::string dexFileString(dexFileChars); + + env->ReleaseStringUTFChars(dexFileJstring, dexFileChars); + dexFilesResult.push_back(dexFileString); + } + + return dexFilesResult; +} + +void initializeBootclassOverrideJar(std::string jarPath, JNIEnv* env) { + std::vector<std::string> dexFiles = getDexFiles(jarPath, env); + + std::cerr << "Adding DEX files for: " << jarPath << std::endl; + for (int i = 0; i < dexFiles.size(); i++) { + std::cerr << "DEX FILE: " << dexFiles[i] << std::endl; + } + + for (int i = 0; i < dexFiles.size(); i++) { + jclass bootHelperClass = env->FindClass(kAndroidAgentClass.c_str()); + if (bootHelperClass == nullptr) { + std::cerr << kAndroidAgentClass << " could not be found" << std::endl; + exit(1); + } + + jmethodID getBytecodeFromDex = + env->GetStaticMethodID(bootHelperClass, "getBytecodeFromDex", + "(Ljava/lang/String;Ljava/lang/String;)[B"); + if (getBytecodeFromDex == nullptr) { + std::cerr << "'getBytecodeFromDex' not found\n"; + exit(1); + } + + jstring jjarPath = env->NewStringUTF(jarPath.data()); + jstring jdexFile = env->NewStringUTF(dexFiles[i].data()); + + int length = 1; + std::vector<unsigned char> dexFileBytes; + + jbyteArray dexBytes = (jbyteArray)env->CallStaticObjectMethod( + bootHelperClass, getBytecodeFromDex, jjarPath, jdexFile); + + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + exit(1); + } + + jbyte* data = new jbyte; + data = env->GetByteArrayElements(dexBytes, 0); + length = env->GetArrayLength(dexBytes); + + for (int j = 0; j < length; j++) { + dexFileBytes.push_back(data[j]); + } + + env->DeleteLocalRef(dexBytes); + env->DeleteLocalRef(jjarPath); + env->DeleteLocalRef(jdexFile); + env->DeleteLocalRef(bootHelperClass); + + unsigned char* usData = reinterpret_cast<unsigned char*>(&dexFileBytes[0]); + dfm.addDexFile(usData, length); + } +} + +void JNICALL jazzerClassFileLoadHook( + jvmtiEnv* jvmti, JNIEnv* jni_env, jclass class_being_redefined, + jobject loader, const char* name, jobject protection_domain, + jint class_data_len, const unsigned char* class_data, + jint* new_class_data_len, unsigned char** new_class_data) { + // check if Jazzer class + const char* prefix = "com/code_intelligence/jazzer/"; + if (strncmp(name, prefix, 29) == 0) { + return; + } + + int indx = dfm.findDexFileForClass(name); + if (indx < 0) { + return; + } + + size_t newSize; + unsigned char* newClassDataResult = + dfm.getClassBytes(name, indx, jvmti, &newSize); + + dex::Reader oldReader(const_cast<unsigned char*>(class_data), + (size_t)class_data_len); + dex::Reader newReader(newClassDataResult, newSize); + if (dfm.structureMatches(&oldReader, &newReader, name)) { + std::cout << "REDEFINING WITH INSTRUMENTATION: " << name << std::endl; + *new_class_data = newClassDataResult; + *new_class_data_len = static_cast<jint>(newSize); + } +} + +bool fileExists(std::string filePath) { return std::ifstream(filePath).good(); } + +void JNICALL jazzerVMInit(jvmtiEnv* jvmti_env, JNIEnv* jni_env, + jthread thread) { + // Parse agentOptions + + std::stringstream ss(agentOptions); + std::string token; + + std::string jazzerClassesJar; + std::vector<std::string> bootpathClassesOverrides; + while (std::getline(ss, token, ',')) { + std::vector<std::string> split = + absl::StrSplit(token, absl::MaxSplits('=', 1)); + if (split.size() < 2) { + std::cerr << "ERROR: no option given for: " << token; + exit(1); + } + + if (split[0] == "injectJars") { + jazzerClassesJar = split[1]; + } else if (split[0] == "bootstrapClassOverrides") { + bootpathClassesOverrides = + absl::StrSplit(split[1], absl::MaxSplits(':', 10)); + } + } + + if (!fileExists(jazzerClassesJar)) { + std::cerr << "ERROR: Jazzer bootstrap class file not found at: " + << jazzerClassesJar << std::endl; + exit(1); + } + + jvmti_env->AddToBootstrapClassLoaderSearch(jazzerClassesJar.c_str()); + + jvmtiCapabilities jazzerJvmtiCapabilities = { + .can_tag_objects = 0, + .can_generate_field_modification_events = 0, + .can_generate_field_access_events = 0, + .can_get_bytecodes = 0, + .can_get_synthetic_attribute = 0, + .can_get_owned_monitor_info = 0, + .can_get_current_contended_monitor = 0, + .can_get_monitor_info = 0, + .can_pop_frame = 0, + .can_redefine_classes = 1, + .can_signal_thread = 0, + .can_get_source_file_name = 1, + .can_get_line_numbers = 0, + .can_get_source_debug_extension = 0, + .can_access_local_variables = 0, + .can_maintain_original_method_order = 0, + .can_generate_single_step_events = 0, + .can_generate_exception_events = 0, + .can_generate_frame_pop_events = 0, + .can_generate_breakpoint_events = 0, + .can_suspend = 0, + .can_redefine_any_class = 0, + .can_get_current_thread_cpu_time = 0, + .can_get_thread_cpu_time = 0, + .can_generate_method_entry_events = 0, + .can_generate_method_exit_events = 0, + .can_generate_all_class_hook_events = 0, + .can_generate_compiled_method_load_events = 0, + .can_generate_monitor_events = 0, + .can_generate_vm_object_alloc_events = 0, + .can_generate_native_method_bind_events = 0, + .can_generate_garbage_collection_events = 0, + .can_generate_object_free_events = 0, + .can_force_early_return = 0, + .can_get_owned_monitor_stack_depth_info = 0, + .can_get_constant_pool = 0, + .can_set_native_method_prefix = 0, + .can_retransform_classes = 1, + .can_retransform_any_class = 0, + .can_generate_resource_exhaustion_heap_events = 0, + .can_generate_resource_exhaustion_threads_events = 0, + }; + + jvmtiError je = jvmti_env->AddCapabilities(&jazzerJvmtiCapabilities); + if (je != JVMTI_ERROR_NONE) { + std::cerr << "JVMTI ERROR: " << je << std::endl; + exit(1); + } + + for (int i = 0; i < bootpathClassesOverrides.size(); i++) { + if (!fileExists(bootpathClassesOverrides[i])) { + std::cerr << "ERROR: Bootpath Class override jar not found at: " + << bootpathClassesOverrides[i] << std::endl; + exit(1); + } + + initializeBootclassOverrideJar(bootpathClassesOverrides[i], jni_env); + } + + retransformLoadedClasses(jvmti_env, jni_env); +} + +JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, char* options, void* reserved) { + jvmtiEnv* jvmti = nullptr; + if (vm->GetEnv((void**)&jvmti, JVMTI_VERSION_1_2) != JNI_OK) { + return 1; + } + + jvmtiEventCallbacks callbacks; + + memset(&callbacks, 0, sizeof(callbacks)); + callbacks.ClassFileLoadHook = jazzerClassFileLoadHook; + callbacks.VMInit = jazzerVMInit; + + jvmti->SetEventCallbacks(&callbacks, sizeof(jvmtiEventCallbacks)); + jvmti->SetEventNotificationMode(JVMTI_ENABLE, + JVMTI_EVENT_CLASS_FILE_LOAD_HOOK, NULL); + jvmti->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_INIT, NULL); + + // Save the options string here, this is the only time it will be available + // however, we wont be able to use this to initialize until VMInit callback is + // called + agentOptions = std::string(options); + return 0; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel new file mode 100644 index 00000000..27d8a1c5 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/BUILD.bazel @@ -0,0 +1,166 @@ +load("@fmeum_rules_jni//jni:defs.bzl", "cc_jni_library") +load("//bazel:compat.bzl", "MULTI_PLATFORM", "SKIP_ON_WINDOWS") + +cc_jni_library( + name = "jazzer_driver", + platforms = MULTI_PLATFORM, + visibility = [ + "//src/jmh:__subpackages__", + "//src/main/java/com/code_intelligence/jazzer/driver:__pkg__", + "//src/main/java/com/code_intelligence/jazzer/junit:__pkg__", + "//src/main/java/com/code_intelligence/jazzer/runtime:__pkg__", + "//src/test:__subpackages__", + ], + deps = [ + ":jazzer_driver_lib", + "@jazzer_libfuzzer//:libfuzzer_no_main", + ] + select({ + # Windows doesn't have a concept analogous to RTLD_GLOBAL. + "@platforms//os:windows": [], + "//conditions:default": [":init_jazzer_preload"], + }), +) + +cc_library( + name = "jazzer_driver_lib", + visibility = ["//src/test/native/com/code_intelligence/jazzer/driver/mocks:__pkg__"], + deps = [ + ":coverage_tracker", + ":fuzz_target_runner", + ":jazzer_fuzzer_callbacks", + ":libfuzzer_callbacks", + ":mutator", + ], +) + +cc_jni_library( + name = "jazzer_android_tooling", + srcs = ["android_tooling.cpp"], + platforms = MULTI_PLATFORM, + target_compatible_with = SKIP_ON_WINDOWS, + visibility = ["//src/main/java/com/code_intelligence/jazzer/android:__pkg__"], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/android:android_runtime.hdrs", + ], +) + +cc_library( + name = "coverage_tracker", + srcs = ["coverage_tracker.cpp"], + hdrs = ["coverage_tracker.h"], + deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:coverage_map.hdrs"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzz_target_runner", + srcs = ["fuzz_target_runner.cpp"], + hdrs = ["fuzz_target_runner.h"], + linkopts = select({ + "@platforms//os:windows": [], + "//conditions:default": ["-ldl"], + }), + deps = [ + ":sanitizer_symbols", + "//src/main/java/com/code_intelligence/jazzer/runtime:fuzz_target_runner_natives.hdrs", + ], + # With sanitizers, symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "fuzzed_data_provider", + srcs = ["fuzzed_data_provider.cpp"], + visibility = [ + "//launcher:__pkg__", + ], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/driver:fuzzed_data_provider_impl.hdrs", + ], + # Symbols may only be referenced dynamically via JNI. + alwayslink = True, +) + +cc_jni_library( + name = "jazzer_fuzzed_data_provider", + platforms = MULTI_PLATFORM, + visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"], + deps = [":fuzzed_data_provider"], +) + +cc_library( + name = "jazzer_fuzzer_callbacks", + srcs = ["jazzer_fuzzer_callbacks.cpp"], + deps = [ + ":sanitizer_hooks_with_pc", + "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + ], + alwayslink = True, +) + +cc_jni_library( + name = "jazzer_signal_handler", + srcs = ["signal_handler.cpp"], + platforms = MULTI_PLATFORM, + visibility = ["//src/main/java/com/code_intelligence/jazzer/driver:__pkg__"], + deps = ["//src/main/java/com/code_intelligence/jazzer/driver:signal_handler.hdrs"], +) + +cc_library( + name = "libfuzzer_callbacks", + srcs = ["libfuzzer_callbacks.cpp"], + deps = [ + "//src/main/java/com/code_intelligence/jazzer/runtime:trace_data_flow_native_callbacks.hdrs", + "@com_google_absl//absl/strings", + ], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "mutator", + srcs = ["mutator.cpp"], + deps = ["//src/main/java/com/code_intelligence/jazzer/runtime:mutator.hdrs"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "init_jazzer_preload", + srcs = ["init_jazzer_preload.cpp"], + linkopts = ["-ldl"], + target_compatible_with = SKIP_ON_WINDOWS, + deps = ["@fmeum_rules_jni//jni"], + # Symbols are only referenced dynamically via JNI. + alwayslink = True, +) + +cc_library( + name = "sanitizer_hooks_with_pc", + hdrs = ["sanitizer_hooks_with_pc.h"], + visibility = ["//:__subpackages__"], +) + +cc_library( + name = "sanitizer_symbols", + srcs = ["sanitizer_symbols.cpp"], + # Symbols are referenced dynamically by libFuzzer. + alwayslink = True, +) + +cc_test( + name = "fuzzed_data_provider_test", + size = "small", + srcs = ["fuzzed_data_provider_test.cpp"], + copts = select({ + "@platforms//os:windows": ["/std:c++17"], + "//conditions:default": ["-std=c++17"], + }), + deps = [ + ":fuzzed_data_provider", + "@fmeum_rules_jni//jni", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) diff --git a/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp new file mode 100644 index 00000000..73444696 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/android_tooling.cpp @@ -0,0 +1,61 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <cstdlib> +#include <cstring> +#include <iostream> + +#include "com_code_intelligence_jazzer_android_AndroidRuntime.h" + +const char *RUNTIME_LIBRARY = "libandroid_runtime.so"; + +// Register native methods from the Android Runtime (ART) framework. +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_android_AndroidRuntime_registerNatives( + JNIEnv *env, jclass clazz) { + void *handle = nullptr; + handle = dlopen(RUNTIME_LIBRARY, RTLD_LAZY); + + if (handle == nullptr) { + std::cerr + << "ERROR: Unable to locate runtime library. Check LD_LIBRARY_PATH." + << std::endl; + exit(1); + } + // reset errors + dlerror(); + + // Load the symbol from library + typedef jint (*Register_Frameworks_t)(JNIEnv *); + Register_Frameworks_t Register_Frameworks; + + Register_Frameworks = reinterpret_cast<Register_Frameworks_t>( + dlsym(handle, "registerFrameworkNatives")); + const char *dlsym_error = dlerror(); + if (dlsym_error) { + std::cerr << "ERROR: Unable to invoke registerFrameworkNatives." + << std::endl; + exit(1); + } + + if (Register_Frameworks == nullptr) { + std::cerr << "ERROR: Register_Frameworks is null." << std::endl; + exit(1); + } + + return Register_Frameworks(env); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp new file mode 100644 index 00000000..d904c2d5 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.cpp @@ -0,0 +1,122 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "coverage_tracker.h" + +#include <jni.h> +#include <stdint.h> + +#include <iostream> +#include <vector> + +#include "com_code_intelligence_jazzer_runtime_CoverageMap.h" + +extern "C" void __sanitizer_cov_8bit_counters_init(uint8_t *start, + uint8_t *end); +extern "C" void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg, + const uintptr_t *pcs_end); +extern "C" size_t __sanitizer_cov_get_observed_pcs(uintptr_t **pc_entries); + +namespace { +void AssertNoException(JNIEnv &env) { + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + std::cerr << "ERROR: Java exception occurred in CoverageTracker JNI code" + << std::endl; + _Exit(1); + } +} +} // namespace + +namespace jazzer { + +uint8_t *CoverageTracker::counters_ = nullptr; +PCTableEntry *CoverageTracker::pc_entries_ = nullptr; + +void CoverageTracker::Initialize(JNIEnv &env, jlong counters) { + if (counters_ != nullptr) { + std::cerr << "ERROR: CoverageTracker::Initialize must not be called more " + "than once" + << std::endl; + _Exit(1); + } + counters_ = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(counters)); +} + +void CoverageTracker::RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters) { + if (counters_ == nullptr) { + std::cerr + << "ERROR: CoverageTracker::Initialize should have been called first" + << std::endl; + _Exit(1); + } + if (new_num_counters < old_num_counters) { + std::cerr + << "ERROR: new_num_counters must not be smaller than old_num_counters" + << std::endl; + _Exit(1); + } + if (new_num_counters == old_num_counters) { + return; + } + std::size_t diff_num_counters = new_num_counters - old_num_counters; + // libFuzzer requires an array containing the instruction addresses associated + // with the coverage counters registered above. This is required to report how + // many edges have been covered. However, libFuzzer only checks these + // addresses when the corresponding flag is set to 1. Therefore, it is safe to + // set the all PC entries to any value as long as the corresponding flag is + // set to zero. We set the value of each PC to the index of the corresponding + // edge ID. This facilitates finding the edge ID of each covered PC reported + // by libFuzzer. + pc_entries_ = new PCTableEntry[diff_num_counters]; + for (std::size_t i = 0; i < diff_num_counters; ++i) { + pc_entries_[i] = {i, 0}; + } + __sanitizer_cov_8bit_counters_init(counters_ + old_num_counters, + counters_ + new_num_counters); + __sanitizer_cov_pcs_init((uintptr_t *)(pc_entries_), + (uintptr_t *)(pc_entries_ + diff_num_counters)); +} +} // namespace jazzer + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_initialize( + JNIEnv *env, jclass, jlong counters) { + ::jazzer::CoverageTracker::Initialize(*env, counters); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_registerNewCounters( + JNIEnv *env, jclass, jint old_num_counters, jint new_num_counters) { + ::jazzer::CoverageTracker::RegisterNewCounters(*env, old_num_counters, + new_num_counters); +} + +[[maybe_unused]] jintArray +Java_com_code_1intelligence_jazzer_runtime_CoverageMap_getEverCoveredIds( + JNIEnv *env, jclass) { + uintptr_t *covered_pcs; + jint num_covered_pcs = __sanitizer_cov_get_observed_pcs(&covered_pcs); + std::vector<jint> covered_edge_ids(covered_pcs, + covered_pcs + num_covered_pcs); + delete[] covered_pcs; + + jintArray covered_edge_ids_jni = env->NewIntArray(num_covered_pcs); + AssertNoException(*env); + env->SetIntArrayRegion(covered_edge_ids_jni, 0, num_covered_pcs, + covered_edge_ids.data()); + AssertNoException(*env); + return covered_edge_ids_jni; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h new file mode 100644 index 00000000..234536dc --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/coverage_tracker.h @@ -0,0 +1,43 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <jni.h> +#include <stdint.h> + +#include <string> + +namespace jazzer { + +// The members of this struct are only accessed by libFuzzer. +struct __attribute__((packed)) PCTableEntry { + [[maybe_unused]] uintptr_t PC, PCFlags; +}; + +// CoverageTracker registers an array of 8-bit coverage counters with +// libFuzzer. The array is populated from Java using Unsafe. +class CoverageTracker { + private: + static uint8_t *counters_; + static PCTableEntry *pc_entries_; + + public: + static void Initialize(JNIEnv &env, jlong counters); + static void RegisterNewCounters(JNIEnv &env, jint old_num_counters, + jint new_num_counters); +}; +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp new file mode 100644 index 00000000..02e9ae14 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.cpp @@ -0,0 +1,240 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * A native wrapper around the FuzzTargetRunner Java class that executes it as a + * libFuzzer fuzz target. + */ + +#include "fuzz_target_runner.h" + +#ifndef _WIN32 +#include <dlfcn.h> +#endif +#include <jni.h> +#include <stdint.h> + +#include <iostream> +#include <limits> +#include <string> +#include <vector> + +#include "com_code_intelligence_jazzer_runtime_FuzzTargetRunnerNatives.h" + +extern "C" int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, + size_t Size)); +extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); + +namespace { +jclass gRunner; +jmethodID gRunOneId; +jmethodID gMutateOneId; +jmethodID gCrossOverId; +JavaVM *gJavaVm; +JNIEnv *gEnv; +jboolean gUseExperimentalMutator; + +// A libFuzzer-registered callback that outputs the crashing input, but does +// not include a stack trace. +void (*gLibfuzzerPrintCrashingInput)() = nullptr; + +int testOneInput(const uint8_t *data, const std::size_t size) { + JNIEnv &env = *gEnv; + jint jsize = + std::min(size, static_cast<size_t>(std::numeric_limits<jint>::max())); + int res = env.CallStaticIntMethod(gRunner, gRunOneId, data, jsize); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return res; +} +} // namespace + +extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, + size_t MaxSize, unsigned int Seed) { + if (gUseExperimentalMutator) { + JNIEnv &env = *gEnv; + jint jsize = + std::min(Size, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jmaxSize = std::min( + MaxSize, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jseed = static_cast<jint>(Seed); + jint newSize = env.CallStaticLongMethod(gRunner, gMutateOneId, Data, jsize, + jmaxSize, jseed); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return static_cast<uint32_t>(newSize); + } else { + return LLVMFuzzerMutate(Data, Size, MaxSize); + } +} + +extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1, + const uint8_t *Data2, size_t Size2, + uint8_t *Out, size_t MaxOutSize, + unsigned int Seed) { + if (gUseExperimentalMutator) { + JNIEnv &env = *gEnv; + jint jsize1 = + std::min(Size1, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jsize2 = + std::min(Size2, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jMaxOutSize = std::min( + MaxOutSize, static_cast<size_t>(std::numeric_limits<jint>::max())); + jint jseed = static_cast<jint>(Seed); + + jint newSize = + env.CallStaticLongMethod(gRunner, gCrossOverId, Data1, jsize1, Data2, + jsize2, Out, jMaxOutSize, jseed); + if (env.ExceptionCheck()) { + env.ExceptionDescribe(); + _Exit(1); + } + return static_cast<uint32_t>(newSize); + } else { + // No custom cross over supported. + return 0; + } +} + +namespace jazzer { +void DumpJvmStackTraces() { + JNIEnv *env = nullptr; + if (gJavaVm->AttachCurrentThread(reinterpret_cast<void **>(&env), nullptr) != + JNI_OK) { + return; + } + jmethodID dumpStack = + env->GetStaticMethodID(gRunner, "dumpAllStackTraces", "()V"); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + env->CallStaticVoidMethod(gRunner, dumpStack); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + return; + } + // Do not detach as we may be the main thread (but the JVM exits anyway). +} +} // namespace jazzer + +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_startLibFuzzer( + JNIEnv *env, jclass, jobjectArray args, jclass runner, + jboolean useExperimentalMutator) { + gUseExperimentalMutator = useExperimentalMutator; + gEnv = env; + env->GetJavaVM(&gJavaVm); + gRunner = reinterpret_cast<jclass>(env->NewGlobalRef(runner)); + gRunOneId = env->GetStaticMethodID(runner, "runOne", "(JI)I"); + gMutateOneId = env->GetStaticMethodID(runner, "mutateOne", "(JIII)I"); + gCrossOverId = env->GetStaticMethodID(runner, "crossOver", "(JIJIJII)I"); + if (gRunOneId == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + + int argc = env->GetArrayLength(args); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + std::vector<std::string> argv_strings; + std::vector<const char *> argv_c; + for (jsize i = 0; i < argc; i++) { + auto arg_jni = + reinterpret_cast<jbyteArray>(env->GetObjectArrayElement(args, i)); + if (arg_jni == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + jbyte *arg_c = env->GetByteArrayElements(arg_jni, nullptr); + if (arg_c == nullptr) { + env->ExceptionDescribe(); + _Exit(1); + } + std::size_t arg_size = env->GetArrayLength(arg_jni); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + argv_strings.emplace_back(reinterpret_cast<const char *>(arg_c), arg_size); + env->ReleaseByteArrayElements(arg_jni, arg_c, JNI_ABORT); + if (env->ExceptionCheck()) { + env->ExceptionDescribe(); + _Exit(1); + } + } + for (jsize i = 0; i < argc; i++) { + argv_c.emplace_back(argv_strings[i].c_str()); + } + // Null-terminate argv. + argv_c.emplace_back(nullptr); + + const char **argv = argv_c.data(); + return LLVMFuzzerRunDriver(&argc, const_cast<char ***>(&argv), testOneInput); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_printCrashingInput( + JNIEnv *, jclass) { + if (gLibfuzzerPrintCrashingInput == nullptr) { + std::cerr << "<not available>" << std::endl; + } else { + gLibfuzzerPrintCrashingInput(); + } +} + +namespace fuzzer { +// Defined in: +// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L43 +// Used here: +// https://github.com/llvm/llvm-project/blob/27cc31b64c0491725aa88a6822f0f2a2c18914d7/compiler-rt/lib/fuzzer/FuzzerLoop.cpp#L244 +extern bool RunningUserCallback; +} // namespace fuzzer + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_FuzzTargetRunnerNatives_temporarilyDisableLibfuzzerExitHook( + JNIEnv *, jclass) { + ::fuzzer::RunningUserCallback = false; +} + +// We apply a patch to libFuzzer to make it call this function instead of +// __sanitizer_set_death_callback to pass us the death callback. +extern "C" [[maybe_unused]] void __jazzer_set_death_callback( + void (*callback)()) { + gLibfuzzerPrintCrashingInput = callback; +#ifndef _WIN32 + void *sanitizer_set_death_callback = + dlsym(RTLD_DEFAULT, "__sanitizer_set_death_callback"); + if (sanitizer_set_death_callback != nullptr) { + (reinterpret_cast<void (*)(void (*)())>(sanitizer_set_death_callback))( + []() { + ::jazzer::DumpJvmStackTraces(); + gLibfuzzerPrintCrashingInput(); + // Ideally, we would be able to perform a graceful shutdown of the + // JVM. However, doing this directly results in a nested bug report by + // ASan or UBSan, likely because something about the stack/thread + // context in which they generate reports is incompatible with the JVM + // shutdown process. use_sigaltstack=0 does not help though, so this + // might be on us. + }); + } +#endif +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h new file mode 100644 index 00000000..e64eb8f2 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzz_target_runner.h @@ -0,0 +1,26 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace jazzer { +/* + * Print the stack traces of all active JVM threads. + * + * This function can be called from any thread. + */ +void DumpJvmStackTraces(); +} // namespace jazzer diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp new file mode 100644 index 00000000..7ea9c344 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider.cpp @@ -0,0 +1,692 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://raw.githubusercontent.com/google/atheris/034284dc4bb1ad4f4ab6ba5d34fb4dca7c633660/fuzzed_data_provider.cc +// +// Original license and copyright notices: +// +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Modified from +// https://github.com/llvm/llvm-project/blob/70de7e0d9a95b7fcd7c105b06bd90fdf4e01f563/compiler-rt/include/fuzzer/FuzzedDataProvider.h +// +// Original license and copyright notices: +// +//===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#include <algorithm> +#include <cstdint> +#include <limits> +#include <string> +#include <tuple> +#include <type_traits> + +#include "com_code_intelligence_jazzer_driver_FuzzedDataProviderImpl.h" + +namespace { + +jfieldID gDataPtrField = nullptr; +jfieldID gRemainingBytesField = nullptr; + +void ThrowIllegalArgumentException(JNIEnv &env, const std::string &message) { + jclass illegal_argument_exception = + env.FindClass("java/lang/IllegalArgumentException"); + env.ThrowNew(illegal_argument_exception, message.c_str()); +} + +template <typename T> +struct JniArrayType {}; + +#define JNI_ARRAY_TYPE(lower_case, sentence_case) \ + template <> \ + struct JniArrayType<j##lower_case> { \ + typedef j##lower_case type; \ + typedef j##lower_case##Array array_type; \ + static constexpr array_type (JNIEnv::*kNewArrayFunc)(jsize) = \ + &JNIEnv::New##sentence_case##Array; \ + static constexpr void (JNIEnv::*kSetArrayRegionFunc)( \ + array_type array, jsize start, jsize len, \ + const type *buf) = &JNIEnv::Set##sentence_case##ArrayRegion; \ + }; + +JNI_ARRAY_TYPE(boolean, Boolean); +JNI_ARRAY_TYPE(byte, Byte); +JNI_ARRAY_TYPE(short, Short); +JNI_ARRAY_TYPE(int, Int); +JNI_ARRAY_TYPE(long, Long); + +template <typename T> +typename JniArrayType<T>::array_type JNICALL +ConsumeIntegralArray(JNIEnv &env, jobject self, jint max_length) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + // Arrays of integral types are considered data and thus consumed from the + // beginning of the buffer. + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + jint max_num_bytes = + std::min(static_cast<jint>(sizeof(T)) * max_length, remainingBytes); + jsize actual_length = max_num_bytes / sizeof(T); + jint actual_num_bytes = sizeof(T) * actual_length; + auto array = (env.*(JniArrayType<T>::kNewArrayFunc))(actual_length); + (env.*(JniArrayType<T>::kSetArrayRegionFunc))( + array, 0, actual_length, reinterpret_cast<const T *>(dataPtr)); + + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + actual_num_bytes)); + env.SetIntField(self, gRemainingBytesField, + remainingBytes - actual_num_bytes); + + return array; +} + +template <typename T> +jbyteArray JNICALL ConsumeRemainingAsArray(JNIEnv &env, jobject self) { + return ConsumeIntegralArray<T>(env, self, std::numeric_limits<jint>::max()); +} + +template <typename T> +T JNICALL ConsumeIntegralInRange(JNIEnv &env, jobject self, T min, T max) { + uint64_t range = static_cast<uint64_t>(max) - min; + uint64_t result = 0; + jint offset = 0; + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + while (offset < 8 * sizeof(T) && (range >> offset) > 0 && + remainingBytes != 0) { + --remainingBytes; + result = (result << 8u) | dataPtr[remainingBytes]; + offset += 8; + } + + env.SetIntField(self, gRemainingBytesField, remainingBytes); + // dataPtr hasn't been modified, so we don't need to update gDataPtrField. + + if (range != std::numeric_limits<T>::max()) + // We accept modulo bias in favor of reading a dynamic number of bytes as + // this would make it harder for the fuzzer to mutate towards values from + // the table of recent compares. + result = result % (range + 1); + + return static_cast<T>(min + result); +} + +template <typename T> +T JNICALL ConsumeIntegral(JNIEnv &env, jobject self) { + // First generate an unsigned value and then (safely) cast it to a signed + // integral type. By doing this rather than calling ConsumeIntegralInRange + // with bounds [signed_min, signed_max], we ensure that there is a direct + // correspondence between the consumed raw bytes and the result (e.g., 0 + // corresponds to 0 and not to signed_min). This should help mutating + // towards entries of the table of recent compares. + using UnsignedT = typename std::make_unsigned<T>::type; + static_assert( + std::numeric_limits<UnsignedT>::is_modulo, + "Unsigned to signed conversion requires modulo-based overflow handling"); + return static_cast<T>(ConsumeIntegralInRange<UnsignedT>( + env, self, 0, std::numeric_limits<UnsignedT>::max())); +} + +bool JNICALL ConsumeBool(JNIEnv &env, jobject self) { + return ConsumeIntegral<uint8_t>(env, self) & 1u; +} + +jchar ConsumeCharInternal(JNIEnv &env, jobject self, bool filter_surrogates) { + auto raw_codepoint = ConsumeIntegral<jchar>(env, self); + if (filter_surrogates && raw_codepoint >= 0xd800 && raw_codepoint < 0xe000) + raw_codepoint -= 0xd800; + return raw_codepoint; +} + +jchar JNICALL ConsumeChar(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, false); +} + +jchar JNICALL ConsumeCharNoSurrogates(JNIEnv &env, jobject self) { + return ConsumeCharInternal(env, self, true); +} + +template <typename T> +T JNICALL ConsumeProbability(JNIEnv &env, jobject self) { + using IntegralType = + typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, + uint64_t>::type; + T result = static_cast<T>(ConsumeIntegral<IntegralType>(env, self)); + result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); + return result; +} + +template <typename T> +T JNICALL ConsumeFloatInRange(JNIEnv &env, jobject self, T min, T max) { + T range; + T result = min; + + // Deal with overflow, in the event min and max are very far apart + if (min < 0 && max > 0 && min + std::numeric_limits<T>::max() < max) { + range = (max / 2) - (min / 2); + if (ConsumeBool(env, self)) { + result += range; + } + } else { + range = max - min; + } + + T probability = ConsumeProbability<T>(env, self); + return result + range * probability; +} + +template <typename T> +T JNICALL ConsumeRegularFloat(JNIEnv &env, jobject self) { + return ConsumeFloatInRange(env, self, std::numeric_limits<T>::lowest(), + std::numeric_limits<T>::max()); +} + +template <typename T> +T JNICALL ConsumeFloat(JNIEnv &env, jobject self) { + if (env.GetIntField(self, gRemainingBytesField) == 0) return 0.0; + + auto type_val = ConsumeIntegral<uint8_t>(env, self); + + if (type_val <= 10) { + // Consume the same amount of bytes as for a regular float/double + ConsumeRegularFloat<T>(env, self); + + switch (type_val) { + case 0: + return 0.0; + case 1: + return -0.0; + case 2: + return std::numeric_limits<T>::infinity(); + case 3: + return -std::numeric_limits<T>::infinity(); + case 4: + return std::numeric_limits<T>::quiet_NaN(); + case 5: + return std::numeric_limits<T>::denorm_min(); + case 6: + return -std::numeric_limits<T>::denorm_min(); + case 7: + return std::numeric_limits<T>::min(); + case 8: + return -std::numeric_limits<T>::min(); + case 9: + return std::numeric_limits<T>::max(); + case 10: + return -std::numeric_limits<T>::max(); + default: + abort(); + } + } + + T regular = ConsumeRegularFloat<T>(env, self); + return regular; +} + +// Polyfill for C++20 std::countl_one, which counts the number of leading ones +// in an unsigned integer. +inline __attribute__((always_inline)) uint8_t countl_one(uint8_t byte) { + // The result of __builtin_clz is undefined for 0. + if (byte == 0xFF) return 8; + return __builtin_clz(static_cast<uint8_t>(~byte)) - 24; +} + +// Forces a byte to be a valid UTF-8 continuation byte. +inline __attribute__((always_inline)) void ForceContinuationByte( + uint8_t &byte) { + byte = (byte | (1u << 7u)) & ~(1u << 6u); +} + +constexpr uint8_t kTwoByteZeroLeadingByte = 0b11000000; +constexpr uint8_t kTwoByteZeroContinuationByte = 0b10000000; +constexpr uint8_t kThreeByteLowLeadingByte = 0b11100000; +constexpr uint8_t kSurrogateLeadingByte = 0b11101101; + +enum class Utf8GenerationState { + LeadingByte_Generic, + LeadingByte_AfterBackslash, + ContinuationByte_Generic, + ContinuationByte_LowLeadingByte, + FirstContinuationByte_LowLeadingByte, + FirstContinuationByte_SurrogateLeadingByte, + FirstContinuationByte_Generic, + SecondContinuationByte_Generic, + LeadingByte_LowSurrogate, + FirstContinuationByte_LowSurrogate, + SecondContinuationByte_HighSurrogate, + SecondContinuationByte_LowSurrogate, +}; + +// Consumes up to `max_bytes` arbitrary bytes pointed to by `ptr` and returns a +// valid "modified UTF-8" string of length at most `max_length` that resembles +// the input bytes as closely as possible as well as the number of consumed +// bytes. If `stop_on_slash` is true, then the string will end on the first +// single consumed '\'. +// +// "Modified UTF-8" is the string encoding used by the JNI. It is the same as +// the legacy encoding CESU-8, but with `\0` coded on two bytes. In these +// encodings, code points requiring 4 bytes in modern UTF-8 are represented as +// two surrogates, each of which is coded on 3 bytes. +// +// This function has been designed with the following goals in mind: +// 1. The generated string should be biased towards containing ASCII characters +// as these are often the ones that affect control flow directly. +// 2. Correctly encoded data (e.g. taken from the table of recent compares) +// should be emitted unchanged. +// 3. The raw fuzzer input should be preserved as far as possible, but the +// output must always be correctly encoded. +// +// The JVM accepts string in two encodings: UTF-16 and modified UTF-8. +// Generating UTF-16 would make it harder to fulfill the first design goal and +// would potentially hinder compatibility with corpora using the much more +// widely used UTF-8 encoding, which is reasonably similar to modified UTF-8. As +// a result, this function uses modified UTF-8. +// +// See Algorithm 1 of https://arxiv.org/pdf/2010.03090.pdf for more details on +// the individual cases involved in determining the validity of a UTF-8 string. +template <bool ascii_only, bool stop_on_backslash> +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, + jint max_length) { + std::string str; + // Every character in modified UTF-8 is coded on at most six bytes. Every + // consumed byte is transformed into at most one code unit, except for the + // case of a zero byte which requires two bytes. + if (ascii_only) { + str.reserve(std::min(2 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } else { + str.reserve(std::min(6 * static_cast<std::size_t>(max_length), + 2 * static_cast<std::size_t>(max_bytes))); + } + + Utf8GenerationState state = Utf8GenerationState::LeadingByte_Generic; + const uint8_t *pos = data; + const auto data_end = data + max_bytes; + for (jint length = 0; length < max_length && pos != data_end; ++pos) { + uint8_t c = *pos; + if (ascii_only) { + // Clamp to 7-bit ASCII range. + c &= 0x7Fu; + } + // Fix up c or previously read bytes according to the value of c and the + // current state. In the end, add the fixed up code unit c to the string. + // Exception: The zero character has to be coded on two bytes and is the + // only case in which an iteration of the loop adds two code units. + switch (state) { + case Utf8GenerationState::LeadingByte_Generic: { + switch (ascii_only ? 0 : countl_one(c)) { + case 0: { + // valid - 1-byte code point (ASCII) + // The zero character has to be coded on two bytes in modified + // UTF-8. + if (c == 0) { + str += static_cast<char>(kTwoByteZeroLeadingByte); + c = kTwoByteZeroContinuationByte; + } else if (stop_on_backslash && c == '\\') { + state = Utf8GenerationState::LeadingByte_AfterBackslash; + // The slash either signals the end of the string or is skipped, + // so don't append anything. + continue; + } + // Remain in state LeadingByte. + ++length; + break; + } + case 1: { + // invalid - continuation byte at leader byte position + // Fix it up to be of the form 0b110XXXXX and fall through to the + // case of a 2-byte sequence. + c |= 1u << 6u; + c &= ~(1u << 5u); + [[fallthrough]]; + } + case 2: { + // (most likely) valid - start of a 2-byte sequence + // ASCII characters must be coded on a single byte, so we must + // ensure that the lower two bits combined with the six non-header + // bits of the following byte do not form a 7-bit ASCII value. This + // could only be the case if at most the lowest bit is set. + if ((c & 0b00011110u) == 0) { + state = Utf8GenerationState::ContinuationByte_LowLeadingByte; + } else { + state = Utf8GenerationState::ContinuationByte_Generic; + } + break; + } + // The default case falls through to the case of three leading ones + // coming right after. + default: { + // invalid - at least four leading ones + // In the case of exactly four leading ones, this would be valid + // UTF-8, but is not valid in the JVM's modified UTF-8 encoding. + // Fix it up by clearing the fourth leading one and falling through + // to the 3-byte case. + c &= ~(1u << 4u); + [[fallthrough]]; + } + case 3: { + // valid - start of a 3-byte sequence + if (c == kThreeByteLowLeadingByte) { + state = Utf8GenerationState::FirstContinuationByte_LowLeadingByte; + } else if (c == kSurrogateLeadingByte) { + state = Utf8GenerationState:: + FirstContinuationByte_SurrogateLeadingByte; + } else { + state = Utf8GenerationState::FirstContinuationByte_Generic; + } + break; + } + } + break; + } + case Utf8GenerationState::LeadingByte_AfterBackslash: { + if (c != '\\') { + // Mark the current byte as consumed. + ++pos; + goto done; + } + // A double backslash is consumed as a single one. As we skipped the + // first one, emit the second one as usual. + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Preserve the zero character, which is coded on two bytes in modified + // UTF-8. In all other cases ensure that we are not incorrectly encoding + // an ASCII character on two bytes by setting the eighth least + // significant bit of the encoded value (second least significant bit of + // the leading byte). + auto previous_c = static_cast<uint8_t>(str.back()); + if (previous_c != kTwoByteZeroLeadingByte || + c != kTwoByteZeroContinuationByte) { + str.back() = static_cast<char>(previous_c | (1u << 1u)); + } + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::ContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: { + ForceContinuationByte(c); + // Ensure that the current code point could not have been coded on two + // bytes. As two bytes encode up to 11 bits and three bytes encode up + // to 16 bits, we thus have to make it such that the five highest bits + // are not all zero. Four of these bits are the non-header bits of the + // leader byte. Thus, set the highest non-header bit in this byte (fifth + // highest in the encoded value). + c |= 1u << 5u; + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: { + ForceContinuationByte(c); + if (c & (1u << 5u)) { + // Start with a high surrogate (0xD800-0xDBFF). c contains the second + // byte and the first two bits of the third byte. The first two bits + // of this second byte are fixed to 10 (in 0x8-0xB). + c |= 1u << 5u; + c &= ~(1u << 4u); + // The high surrogate must be followed by a low surrogate. + state = Utf8GenerationState::SecondContinuationByte_HighSurrogate; + } else { + state = Utf8GenerationState::SecondContinuationByte_Generic; + } + break; + } + case Utf8GenerationState::FirstContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::SecondContinuationByte_Generic; + break; + } + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_LowSurrogate; + ++length; + break; + } + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + case Utf8GenerationState::SecondContinuationByte_Generic: { + ForceContinuationByte(c); + state = Utf8GenerationState::LeadingByte_Generic; + ++length; + break; + } + case Utf8GenerationState::LeadingByte_LowSurrogate: { + // We have to emit a low surrogate leading byte, which is a fixed value. + // We still consume a byte from the input to make fuzzer changes more + // stable and preserve valid surrogate pairs picked up from e.g. the + // table of recent compares. + c = kSurrogateLeadingByte; + state = Utf8GenerationState::FirstContinuationByte_LowSurrogate; + break; + } + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: { + ForceContinuationByte(c); + // Low surrogates are code points in the range 0xDC00-0xDFFF. c contains + // the second byte and the first two bits of the third byte. The first + // two bits of this second byte are fixed to 11 (in 0xC-0xF). + c |= (1u << 5u) | (1u << 4u); + // The second continuation byte of a low surrogate is not restricted, + // but we need to track it differently to allow for correct backtracking + // if it isn't completed. + state = Utf8GenerationState::SecondContinuationByte_LowSurrogate; + break; + } + } + str += static_cast<uint8_t>(c); + } + + // Backtrack the current incomplete character. + switch (state) { + case Utf8GenerationState::SecondContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::FirstContinuationByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_LowSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::SecondContinuationByte_Generic: + case Utf8GenerationState::SecondContinuationByte_HighSurrogate: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::ContinuationByte_Generic: + case Utf8GenerationState::ContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_Generic: + case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: + case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: + str.pop_back(); + [[fallthrough]]; + case Utf8GenerationState::LeadingByte_Generic: + case Utf8GenerationState::LeadingByte_AfterBackslash: + // No backtracking required. + break; + } + +done: + return std::make_pair(str, pos - data); +} +} // namespace + +namespace jazzer { +// Exposed for testing only. +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *data, + jint max_bytes, jint max_length, + bool ascii_only, + bool stop_on_backslash) { + if (ascii_only) { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<true, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<true, false>(data, max_bytes, max_length); + } + } else { + if (stop_on_backslash) { + return ::FixUpModifiedUtf8<false, true>(data, max_bytes, max_length); + } else { + return ::FixUpModifiedUtf8<false, false>(data, max_bytes, max_length); + } + } +} +} // namespace jazzer + +namespace { +jstring ConsumeStringInternal(JNIEnv &env, jobject self, jint max_length, + bool ascii_only, bool stop_on_backslash) { + if (max_length < 0) { + ThrowIllegalArgumentException(env, "maxLength must not be negative"); + return nullptr; + } + + const auto *dataPtr = + reinterpret_cast<const uint8_t *>(env.GetLongField(self, gDataPtrField)); + jint remainingBytes = env.GetIntField(self, gRemainingBytesField); + + if (max_length == 0 || remainingBytes == 0) return env.NewStringUTF(""); + + if (remainingBytes == 1) { + env.SetIntField(self, gRemainingBytesField, 0); + return env.NewStringUTF(""); + } + + std::string str; + jint consumed_bytes; + std::tie(str, consumed_bytes) = jazzer::FixUpModifiedUtf8( + dataPtr, remainingBytes, max_length, ascii_only, stop_on_backslash); + env.SetLongField(self, gDataPtrField, (jlong)(dataPtr + consumed_bytes)); + env.SetIntField(self, gRemainingBytesField, remainingBytes - consumed_bytes); + return env.NewStringUTF(str.c_str()); +} + +jstring JNICALL ConsumeAsciiString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, true, true); +} + +jstring JNICALL ConsumeString(JNIEnv &env, jobject self, jint max_length) { + return ConsumeStringInternal(env, self, max_length, false, true); +} + +jstring JNICALL ConsumeRemainingAsAsciiString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + true, false); +} + +jstring JNICALL ConsumeRemainingAsString(JNIEnv &env, jobject self) { + return ConsumeStringInternal(env, self, std::numeric_limits<jint>::max(), + false, false); +} + +std::size_t RemainingBytes(JNIEnv &env, jobject self) { + return env.GetIntField(self, gRemainingBytesField); +} + +const JNINativeMethod kFuzzedDataMethods[]{ + {(char *)"consumeBoolean", (char *)"()Z", (void *)&ConsumeBool}, + {(char *)"consumeByte", (char *)"()B", (void *)&ConsumeIntegral<jbyte>}, + {(char *)"consumeByteUnchecked", (char *)"(BB)B", + (void *)&ConsumeIntegralInRange<jbyte>}, + {(char *)"consumeShort", (char *)"()S", (void *)&ConsumeIntegral<jshort>}, + {(char *)"consumeShortUnchecked", (char *)"(SS)S", + (void *)&ConsumeIntegralInRange<jshort>}, + {(char *)"consumeInt", (char *)"()I", (void *)&ConsumeIntegral<jint>}, + {(char *)"consumeIntUnchecked", (char *)"(II)I", + (void *)&ConsumeIntegralInRange<jint>}, + {(char *)"consumeLong", (char *)"()J", (void *)&ConsumeIntegral<jlong>}, + {(char *)"consumeLongUnchecked", (char *)"(JJ)J", + (void *)&ConsumeIntegralInRange<jlong>}, + {(char *)"consumeFloat", (char *)"()F", (void *)&ConsumeFloat<jfloat>}, + {(char *)"consumeRegularFloat", (char *)"()F", + (void *)&ConsumeRegularFloat<jfloat>}, + {(char *)"consumeRegularFloatUnchecked", (char *)"(FF)F", + (void *)&ConsumeFloatInRange<jfloat>}, + {(char *)"consumeProbabilityFloat", (char *)"()F", + (void *)&ConsumeProbability<jfloat>}, + {(char *)"consumeDouble", (char *)"()D", (void *)&ConsumeFloat<jdouble>}, + {(char *)"consumeRegularDouble", (char *)"()D", + (void *)&ConsumeRegularFloat<jdouble>}, + {(char *)"consumeRegularDoubleUnchecked", (char *)"(DD)D", + (void *)&ConsumeFloatInRange<jdouble>}, + {(char *)"consumeProbabilityDouble", (char *)"()D", + (void *)&ConsumeProbability<jdouble>}, + {(char *)"consumeChar", (char *)"()C", (void *)&ConsumeChar}, + {(char *)"consumeCharUnchecked", (char *)"(CC)C", + (void *)&ConsumeIntegralInRange<jchar>}, + {(char *)"consumeCharNoSurrogates", (char *)"()C", + (void *)&ConsumeCharNoSurrogates}, + {(char *)"consumeAsciiString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeAsciiString}, + {(char *)"consumeRemainingAsAsciiString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsAsciiString}, + {(char *)"consumeString", (char *)"(I)Ljava/lang/String;", + (void *)&ConsumeString}, + {(char *)"consumeRemainingAsString", (char *)"()Ljava/lang/String;", + (void *)&ConsumeRemainingAsString}, + {(char *)"consumeBooleans", (char *)"(I)[Z", + (void *)&ConsumeIntegralArray<jboolean>}, + {(char *)"consumeBytes", (char *)"(I)[B", + (void *)&ConsumeIntegralArray<jbyte>}, + {(char *)"consumeShorts", (char *)"(I)[S", + (void *)&ConsumeIntegralArray<jshort>}, + {(char *)"consumeInts", (char *)"(I)[I", + (void *)&ConsumeIntegralArray<jint>}, + {(char *)"consumeLongs", (char *)"(I)[J", + (void *)&ConsumeIntegralArray<jlong>}, + {(char *)"consumeRemainingAsBytes", (char *)"()[B", + (void *)&ConsumeRemainingAsArray<jbyte>}, + {(char *)"remainingBytes", (char *)"()I", (void *)&RemainingBytes}, +}; +const jint kNumFuzzedDataMethods = + sizeof(kFuzzedDataMethods) / sizeof(kFuzzedDataMethods[0]); +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_FuzzedDataProviderImpl_nativeInit( + JNIEnv *env, jclass clazz) { + env->RegisterNatives(clazz, kFuzzedDataMethods, kNumFuzzedDataMethods); + gDataPtrField = env->GetFieldID(clazz, "dataPtr", "J"); + gRemainingBytesField = env->GetFieldID(clazz, "remainingBytes", "I"); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp new file mode 100644 index 00000000..2395cd97 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/fuzzed_data_provider_test.cpp @@ -0,0 +1,98 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <cstddef> +#include <cstdint> +#include <random> +#include <string> +#include <vector> + +#include "gtest/gtest.h" + +namespace jazzer { +std::pair<std::string, jint> FixUpModifiedUtf8(const uint8_t *pos, + jint max_bytes, jint max_length, + bool ascii_only, + bool stop_on_backslash); +} + +std::pair<std::string, jint> FixUpRemainingModifiedUtf8( + const std::string &str, bool ascii_only, bool stop_on_backslash) { + return jazzer::FixUpModifiedUtf8( + reinterpret_cast<const uint8_t *>(str.c_str()), str.length(), + std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash); +} + +std::pair<std::string, jint> expect(const std::string &s, jint i) { + return std::make_pair(s, i); +} + +using namespace std::literals::string_literals; +TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, false, false)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false)); + EXPECT_EQ(expect("ja\\zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false)); + EXPECT_EQ(expect("ja\\\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false)); + EXPECT_EQ(expect("ۧ"s, 5), + FixUpRemainingModifiedUtf8(u8"ۧ"s, false, false)); +} + +TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, true, false)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false)); + EXPECT_EQ(expect("ja\\zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false)); + EXPECT_EQ(expect("ja\\\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false)); + EXPECT_EQ(expect("\x62\x02\x2C\x43\x1F"s, 5), + FixUpRemainingModifiedUtf8(u8"ۧ"s, true, false)); +} + +TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, false, true)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true)); + EXPECT_EQ(expect("ja"s, 4), + FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true)); + EXPECT_EQ(expect("ja\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true)); +} + +TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) { + EXPECT_EQ(expect("jazzer"s, 6), + FixUpRemainingModifiedUtf8("jazzer"s, true, true)); + EXPECT_EQ(expect("ja\xC0\x80zzer"s, 7), + FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true)); + EXPECT_EQ(expect("ja\xC0\x80\xC0\x80zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true)); + EXPECT_EQ(expect("ja"s, 4), + FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true)); + EXPECT_EQ(expect("ja\\zzer"s, 8), + FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true)); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp new file mode 100644 index 00000000..23a86c53 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/init_jazzer_preload.cpp @@ -0,0 +1,56 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <dlfcn.h> +#include <jni.h> + +#include <cstdlib> + +#if defined(_ANDROID) +#define __jni_version__ JNI_VERSION_1_6 +#else +#define __jni_version__ JNI_VERSION_1_8 +#endif + +// The jazzer_preload library, if used, forwards all calls to native libFuzzer +// hooks such as __sanitizer_cov_trace_cmp8 to the Jazzer JNI library. In order +// to load the hook symbols when the library is ready, it needs to be passed a +// handle - the JVM loads libraries with RTLD_LOCAL and thus their symbols +// wouldn't be found as part of the global lookup procedure. +jint JNI_OnLoad(JavaVM *, void *) { + Dl_info info; + + if (!dladdr(reinterpret_cast<const void *>(&JNI_OnLoad), &info) || + !info.dli_fname) { + fprintf(stderr, "Failed to determine our dli_fname\n"); + abort(); + } + + void *handle = dlopen(info.dli_fname, RTLD_NOLOAD | RTLD_LAZY); + if (handle == nullptr) { + fprintf(stderr, "Failed to dlopen self: %s\n", dlerror()); + abort(); + } + + void *preload_init = dlsym(RTLD_DEFAULT, "jazzer_preload_init"); + // jazzer_preload is only preloaded when Jazzer is started with --native, so + // not finding this method is an expected error. + if (preload_init) { + reinterpret_cast<void (*)(void *)>(preload_init)(handle); + } + + dlclose(handle); + + return __jni_version__; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp new file mode 100644 index 00000000..8764aaaa --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/jazzer_fuzzer_callbacks.cpp @@ -0,0 +1,184 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <cstddef> +#include <cstdint> + +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" +#include "sanitizer_hooks_with_pc.h" + +namespace { + +extern "C" { +void __sanitizer_weak_hook_compare_bytes(void *caller_pc, const void *s1, + const void *s2, std::size_t n1, + std::size_t n2, int result); +void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, + const void *s2, size_t len2, void *result); +} + +inline __attribute__((always_inline)) void *idToPc(jint id) { + return reinterpret_cast<void *>(static_cast<uintptr_t>(id)); +} +} // namespace + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + JNIEnv *env, jclass cls, jbyteArray needle, jint id) { + jint needle_length = env->GetArrayLength(needle); + auto *needle_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(needle, nullptr)); + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); + env->ReleasePrimitiveArrayCritical(needle, needle_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceStrstr0( + jint needle_length, jbyte *needle_native, jint id) { + __sanitizer_weak_hook_memmem(idToPc(id), nullptr, 0, needle_native, + needle_length, nullptr); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + JNIEnv *env, jclass cls, jbyteArray b1, jbyteArray b2, jint result, + jint id) { + jint b1_length = env->GetArrayLength(b1); + jint b2_length = env->GetArrayLength(b2); + auto *b1_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b1, nullptr)); + auto *b2_native = + static_cast<jbyte *>(env->GetPrimitiveArrayCritical(b2, nullptr)); + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1_native, b2_native, + b1_length, b2_length, result); + env->ReleasePrimitiveArrayCritical(b1, b1_native, JNI_ABORT); + env->ReleasePrimitiveArrayCritical(b2, b2_native, JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceMemcmp( + jint b1_length, jbyte *b1, jint b2_length, jbyte *b2, jint result, + jint id) { + __sanitizer_weak_hook_compare_bytes(idToPc(id), b1, b2, b1_length, b2_length, + result); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + JNIEnv *env, jclass cls, jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpLong( + jlong value1, jlong value2, jint id) { + __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + JNIEnv *env, jclass cls, jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceConstCmpInt( + jint value1, jint value2, jint id) { + __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + JNIEnv *env, jclass cls, jlong switch_value, + jlongArray libfuzzer_case_values, jint id) { + auto *case_values = static_cast<jlong *>( + env->GetPrimitiveArrayCritical(libfuzzer_case_values, nullptr)); + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); + env->ReleasePrimitiveArrayCritical(libfuzzer_case_values, case_values, + JNI_ABORT); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceSwitch( + jlong switch_value, jint libfuzzer_case_values_length, jlong *case_values, + jint id) { + __sanitizer_cov_trace_switch_with_pc( + idToPc(id), switch_value, reinterpret_cast<uint64_t *>(case_values)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + JNIEnv *env, jclass cls, jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivLong( + jlong value, jint id) { + __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + JNIEnv *env, jclass cls, jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceDivInt( + jint value, jint id) { + __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + JNIEnv *env, jclass cls, jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_traceGep( + jlong idx, jint id) { + __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast<uintptr_t>(idx)); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + JNIEnv *env, jclass cls, jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} + +extern "C" [[maybe_unused]] JNIEXPORT void JNICALL +JavaCritical_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_tracePcIndir( + jint caller_id, jint callee_id) { + __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), + static_cast<uintptr_t>(callee_id)); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp new file mode 100644 index 00000000..b7a0df5d --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/libfuzzer_callbacks.cpp @@ -0,0 +1,131 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <mutex> +#include <utility> +#include <vector> + +#include "absl/strings/str_split.h" +#include "com_code_intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks.h" + +namespace { +bool is_using_native_libraries = false; +std::once_flag ignore_list_flag; +std::vector<std::pair<uintptr_t, uintptr_t>> ignore_for_interception_ranges; + +/** + * Adds the address ranges of executable segments of the library lib_name to + * the ignorelist for C standard library function interception (strcmp, memcmp, + * ...). + */ +void ignoreLibraryForInterception(const std::string &lib_name) { + std::ifstream loaded_libs("/proc/self/maps"); + if (!loaded_libs) { + // This early exit is taken e.g. on macOS, where /proc does not exist. + return; + } + std::string line; + while (std::getline(loaded_libs, line)) { + if (!absl::StrContains(line, lib_name)) continue; + // clang-format off + // A typical line looks as follows: + // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so + // clang-format on + std::vector<std::string> parts = + absl::StrSplit(line, ' ', absl::SkipEmpty()); + if (parts.size() != 6) { + std::cout << "ERROR: Invalid format for /proc/self/maps\n" + << line << std::endl; + exit(1); + } + // Skip non-executable address rang"s. + if (!absl::StrContains(parts[1], "x")) continue; + std::string range_str = parts[0]; + std::vector<std::string> range = absl::StrSplit(range_str, "-"); + if (range.size() != 2) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + std::size_t pos; + auto start = std::stoull(range[0], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + auto end = std::stoull(range[1], &pos, 16); + if (pos != range[0].size()) { + std::cout + << "ERROR: Unexpected address range format in /proc/self/maps line: " + << range_str << std::endl; + exit(1); + } + ignore_for_interception_ranges.emplace_back(start, end); + } +} + +const std::vector<std::string> kLibrariesToIgnoreForInterception = { + // The launcher executable itself can be treated just like a library. + "jazzer", "libjazzer_preload.so", + "libinstrument.so", "libjava.so", + "libjimage.so", "libjli.so", + "libjvm.so", "libnet.so", + "libverify.so", "libzip.so", +}; +} // namespace + +extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc( + void *caller_pc) { + // If the fuzz target is not using native libraries, calls to strcmp, memcmp, + // etc. should never be intercepted. The values reported if they were at best + // duplicate the values received from our bytecode instrumentation and at + // worst pollute the table of recent compares with string internal to the JDK. + if (!is_using_native_libraries) return false; + // If the fuzz target is using native libraries, intercept calls only if they + // don't originate from those address ranges that are known to belong to the + // JDK. + return std::none_of( + ignore_for_interception_ranges.cbegin(), + ignore_for_interception_ranges.cend(), + [caller_pc](const std::pair<uintptr_t, uintptr_t> &range) { + uintptr_t start; + uintptr_t end; + std::tie(start, end) = range; + auto address = reinterpret_cast<uintptr_t>(caller_pc); + return start <= address && address <= end; + }); +} + +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_runtime_TraceDataFlowNativeCallbacks_handleLibraryLoad( + JNIEnv *, jclass) { + std::call_once(ignore_list_flag, [] { + std::cout << "INFO: detected a native library load, enabling interception " + "for libc functions" + << std::endl; + for (const auto &lib_name : kLibrariesToIgnoreForInterception) + ignoreLibraryForInterception(lib_name); + // Enable the ignore list after it has been populated since vector is not + // thread-safe with respect to concurrent writes and reads. + is_using_native_libraries = true; + }); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp new file mode 100644 index 00000000..4e21612b --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/mutator.cpp @@ -0,0 +1,31 @@ +// Copyright 2023 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstddef> +#include <cstdint> + +#include "com_code_intelligence_jazzer_runtime_Mutator.h" + +extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); + +[[maybe_unused]] jint +Java_com_code_1intelligence_jazzer_runtime_Mutator_defaultMutateNative( + JNIEnv *env, jclass, jbyteArray jni_data, jint size) { + jint maxSize = env->GetArrayLength(jni_data); + uint8_t *data = + static_cast<uint8_t *>(env->GetPrimitiveArrayCritical(jni_data, nullptr)); + jint res = LLVMFuzzerMutate(data, size, maxSize); + env->ReleasePrimitiveArrayCritical(jni_data, data, 0); + return res; +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h new file mode 100644 index 00000000..be655adb --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_hooks_with_pc.h @@ -0,0 +1,49 @@ +/* + * Copyright 2021 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <cstdint> + +// This file declares variants of the libFuzzer compare, division, switch and +// gep hooks that accept an additional caller_pc argument that can be used to +// pass a custom value that is recorded as the caller's instruction pointer +// ("program counter"). This allows synthetic program counters obtained from +// Java coverage information to be used with libFuzzer's value profile, with +// which it records detailed information about the result of compares and +// associates it with particular coverage locations. +// +// Note: Only the lower 9 bits of the caller_pc argument are used by libFuzzer. +#ifdef __cplusplus +extern "C" { +#endif +void __sanitizer_cov_trace_cmp4_with_pc(void *caller_pc, uint32_t arg1, + uint32_t arg2); +void __sanitizer_cov_trace_cmp8_with_pc(void *caller_pc, uint64_t arg1, + uint64_t arg2); + +void __sanitizer_cov_trace_switch_with_pc(void *caller_pc, uint64_t val, + uint64_t *cases); + +void __sanitizer_cov_trace_div4_with_pc(void *caller_pc, uint32_t val); +void __sanitizer_cov_trace_div8_with_pc(void *caller_pc, uint64_t val); + +void __sanitizer_cov_trace_gep_with_pc(void *caller_pc, uintptr_t idx); + +void __sanitizer_cov_trace_pc_indir_with_pc(void *caller_pc, uintptr_t callee); +#ifdef __cplusplus +} +#endif diff --git a/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp new file mode 100644 index 00000000..abc5f04e --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/sanitizer_symbols.cpp @@ -0,0 +1,26 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Suppress libFuzzer warnings about missing sanitizer methods in non-sanitizer +// builds. +extern "C" [[maybe_unused]] int __sanitizer_acquire_crash_state() { return 1; } + +namespace jazzer { +void DumpJvmStackTraces(); +} + +// Dump a JVM stack trace on timeouts. +extern "C" [[maybe_unused]] void __sanitizer_print_stack_trace() { + jazzer::DumpJvmStackTraces(); +} diff --git a/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp new file mode 100644 index 00000000..e284925d --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/driver/signal_handler.cpp @@ -0,0 +1,40 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <jni.h> + +#include <atomic> +#include <csignal> + +#include "com_code_intelligence_jazzer_driver_SignalHandler.h" + +#ifdef _WIN32 +// Windows does not have SIGUSR1, which triggers a graceful exit of libFuzzer. +// Instead, trigger a hard exit. +#define SIGUSR1 SIGTERM +#endif + +// Handles SIGINT raised while running Java code. +[[maybe_unused]] void +Java_com_code_1intelligence_jazzer_driver_SignalHandler_handleInterrupt( + JNIEnv *, jclass) { + static std::atomic<bool> already_exiting{false}; + if (!already_exiting.exchange(true)) { + // Let libFuzzer exit gracefully when the JVM received SIGINT. + raise(SIGUSR1); + } else { + // Exit libFuzzer forcefully on repeated SIGINTs. + raise(SIGTERM); + } +} diff --git a/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c b/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c new file mode 100644 index 00000000..074c3d22 --- /dev/null +++ b/src/main/native/com/code_intelligence/jazzer/jazzer_preload.c @@ -0,0 +1,249 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Dynamically exported definitions of fuzzer hooks and libc functions that + * forward to the symbols provided by the jazzer_driver JNI library once it has + * been loaded. + */ + +#define _GNU_SOURCE // for RTLD_NEXT +#include <dlfcn.h> +#include <stdatomic.h> +#include <stddef.h> +#include <stdint.h> +#ifdef __APPLE__ +// Using dyld's interpose feature requires knowing the addresses of libc +// functions. +#include <string.h> +#endif + +#if defined(__APPLE__) && defined(__arm64__) +// arm64 has a fixed instruction length of 32 bits, which means that the lowest +// two bits of the return address of a function are always zero. Since +// libFuzzer's value profiling uses the lowest bits of the address to index into +// a hash table, we increase their entropy by shifting away the constant bits. +#define GET_CALLER_PC() \ + ((void *)(((uintptr_t)__builtin_return_address(0)) >> 2)) +#else +#define GET_CALLER_PC() __builtin_return_address(0) +#endif +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) + +// Unwraps (foo, bar) passed as arguments to foo, bar - this allows passing +// multiple var args into a single macro. +#define UNWRAP_VA_ARGS(...) __VA_ARGS__ + +// Define a dynamic, global symbol such as __sanitizer_weak_hook_memcmp that +// calls the local symbol of the same name in the jazzer_driver shared library +// loaded in the JVM. +#define DEFINE_LIBC_HOOK(name, ret, params, args) \ + typedef void (*name##_hook_t)(void *, UNWRAP_VA_ARGS params, ret); \ + static _Atomic name##_hook_t name##_hook; \ + \ + __attribute__((visibility("default"))) void __sanitizer_weak_hook_##name( \ + void *called_pc, UNWRAP_VA_ARGS params, ret result) { \ + name##_hook_t hook = \ + atomic_load_explicit(&name##_hook, memory_order_relaxed); \ + if (LIKELY(hook != NULL)) { \ + hook(called_pc, UNWRAP_VA_ARGS args, result); \ + } \ + } + +#define INIT_LIBC_HOOK(handle, name) \ + atomic_store(&name##_hook, dlsym(handle, "__sanitizer_weak_hook_" #name)) + +#ifdef __linux__ +// Alternate definitions for libc functions mimicking those that libFuzzer would +// provide if it were linked into the JVM. All these functions invoke the real +// libc function loaded from the next library in search order (either libc +// itself or a sanitizer's interceptor). +// +// Function pointers have to be loaded and stored atomically even if libc +// functions are invoked from different threads, but we do not need any +// synchronization guarantees - in the worst case, we will non-deterministically +// lose a few hook invocations. + +#define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args) \ + DEFINE_LIBC_HOOK(name, ret, params, args) \ + \ + typedef ret (*name##_t)(UNWRAP_VA_ARGS params); \ + static _Atomic name##_t name##_real; \ + \ + __attribute__((visibility("default"))) ret name(UNWRAP_VA_ARGS params) { \ + name##_t name##_real_local = \ + atomic_load_explicit(&name##_real, memory_order_relaxed); \ + if (UNLIKELY(name##_real_local == NULL)) { \ + name##_real_local = dlsym(RTLD_NEXT, #name); \ + atomic_store_explicit(&name##_real, name##_real_local, \ + memory_order_relaxed); \ + } \ + ret result = name##_real_local(UNWRAP_VA_ARGS args); \ + __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args, \ + result); \ + return result; \ + } + +#elif __APPLE__ +// macOS namespace concept makes it impossible to override symbols in shared +// library dependencies simply by defining them. Instead, the dynamic linker's +// interpose feature is used to request that one function, identified by its +// address, is replaced by another at runtime. + +typedef struct { + const uintptr_t interceptor; + const uintptr_t func; +} interpose_t; + +#define INTERPOSE(_interceptor, _func) \ + __attribute__((used)) static interpose_t _interpose_##_func \ + __attribute__((section("__DATA,__interpose"))) = { \ + (uintptr_t)&_interceptor, (uintptr_t)&_func}; + +#define DEFINE_LIBC_INTERCEPTOR(name, ret, params, args) \ + DEFINE_LIBC_HOOK(name, ret, params, args) \ + \ + __attribute__((visibility("default"))) \ + ret interposed_##name(UNWRAP_VA_ARGS params) { \ + ret result = name(UNWRAP_VA_ARGS args); \ + __sanitizer_weak_hook_##name(GET_CALLER_PC(), UNWRAP_VA_ARGS args, \ + result); \ + return result; \ + } \ + \ + INTERPOSE(interposed_##name, name) +#else +// TODO: Use https://github.com/microsoft/Detours to add Windows support. +#error "jazzer_preload is not supported on this OS" +#endif + +DEFINE_LIBC_INTERCEPTOR(bcmp, int, (const void *s1, const void *s2, size_t n), + (s1, s2, n)) +DEFINE_LIBC_INTERCEPTOR(memcmp, int, (const void *s1, const void *s2, size_t n), + (s1, s2, n)) +DEFINE_LIBC_INTERCEPTOR(strncmp, int, + (const char *s1, const char *s2, size_t n), (s1, s2, n)) +DEFINE_LIBC_INTERCEPTOR(strncasecmp, int, + (const char *s1, const char *s2, size_t n), (s1, s2, n)) +DEFINE_LIBC_INTERCEPTOR(strcmp, int, (const char *s1, const char *s2), (s1, s2)) +DEFINE_LIBC_INTERCEPTOR(strcasecmp, int, (const char *s1, const char *s2), + (s1, s2)) +DEFINE_LIBC_INTERCEPTOR(strstr, char *, (const char *s1, const char *s2), + (s1, s2)) +DEFINE_LIBC_INTERCEPTOR(strcasestr, char *, (const char *s1, const char *s2), + (s1, s2)) +DEFINE_LIBC_INTERCEPTOR(memmem, void *, + (const void *s1, size_t n1, const void *s2, size_t n2), + (s1, n1, s2, n2)) + +// Native libraries instrumented for fuzzing include references to fuzzer hooks +// that are resolved by the dynamic linker. We need to route these to the +// corresponding local symbols in the Jazzer driver JNI library. +// The __sanitizer_cov_trace_* family of functions is only invoked from code +// compiled with -fsanitize=fuzzer. We can assume that the Jazzer JNI library +// has been loaded before any such code, which necessarily belongs to the fuzz +// target, is executed and thus don't need NULL checks. +#define DEFINE_TRACE_HOOK(name, params, args) \ + typedef void (*trace_##name##_t)(void *, UNWRAP_VA_ARGS params); \ + static _Atomic trace_##name##_t trace_##name##_with_pc; \ + \ + __attribute__((visibility("default"))) void __sanitizer_cov_trace_##name( \ + UNWRAP_VA_ARGS params) { \ + trace_##name##_t hook = \ + atomic_load_explicit(&trace_##name##_with_pc, memory_order_relaxed); \ + hook(GET_CALLER_PC(), UNWRAP_VA_ARGS args); \ + } + +#define INIT_TRACE_HOOK(handle, name) \ + atomic_store(&trace_##name##_with_pc, \ + dlsym(handle, "__sanitizer_cov_trace_" #name "_with_pc")) + +DEFINE_TRACE_HOOK(cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2)); + +DEFINE_TRACE_HOOK(const_cmp1, (uint8_t arg1, uint8_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(const_cmp2, (uint16_t arg1, uint16_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(const_cmp4, (uint32_t arg1, uint32_t arg2), (arg1, arg2)); +DEFINE_TRACE_HOOK(const_cmp8, (uint64_t arg1, uint64_t arg2), (arg1, arg2)); + +DEFINE_TRACE_HOOK(switch, (uint64_t val, uint64_t *cases), (val, cases)); + +DEFINE_TRACE_HOOK(div4, (uint32_t arg), (arg)) +DEFINE_TRACE_HOOK(div8, (uint64_t arg), (arg)) + +DEFINE_TRACE_HOOK(gep, (uintptr_t arg), (arg)) + +DEFINE_TRACE_HOOK(pc_indir, (uintptr_t arg), (arg)) + +typedef void (*cov_8bit_counters_init_t)(uint8_t *, uint8_t *); +static _Atomic cov_8bit_counters_init_t cov_8bit_counters_init; +typedef void (*cov_pcs_init_t)(const uintptr_t *, const uintptr_t *); +static _Atomic cov_pcs_init_t cov_pcs_init; + +__attribute__((visibility("default"))) void __sanitizer_cov_8bit_counters_init( + uint8_t *start, uint8_t *end) { + cov_8bit_counters_init_t init = + atomic_load_explicit(&cov_8bit_counters_init, memory_order_relaxed); + init(start, end); +} + +__attribute__((visibility("default"))) void __sanitizer_cov_pcs_init( + const uintptr_t *pcs_beg, const uintptr_t *pcs_end) { + cov_pcs_init_t init = + atomic_load_explicit(&cov_pcs_init, memory_order_relaxed); + init(pcs_beg, pcs_end); +} + +// TODO: This is never updated and thus doesn't provide any information to the +// fuzzer. +__attribute__(( + visibility("default"))) _Thread_local uintptr_t __sancov_lowest_stack = 0; + +__attribute__((visibility("default"))) void jazzer_preload_init(void *handle) { + INIT_LIBC_HOOK(handle, bcmp); + INIT_LIBC_HOOK(handle, memcmp); + INIT_LIBC_HOOK(handle, strncmp); + INIT_LIBC_HOOK(handle, strcmp); + INIT_LIBC_HOOK(handle, strncasecmp); + INIT_LIBC_HOOK(handle, strcasecmp); + INIT_LIBC_HOOK(handle, strstr); + INIT_LIBC_HOOK(handle, strcasestr); + INIT_LIBC_HOOK(handle, memmem); + + INIT_TRACE_HOOK(handle, cmp1); + INIT_TRACE_HOOK(handle, cmp2); + INIT_TRACE_HOOK(handle, cmp4); + INIT_TRACE_HOOK(handle, cmp8); + + INIT_TRACE_HOOK(handle, const_cmp1); + INIT_TRACE_HOOK(handle, const_cmp2); + INIT_TRACE_HOOK(handle, const_cmp4); + INIT_TRACE_HOOK(handle, const_cmp8); + + INIT_TRACE_HOOK(handle, switch); + + INIT_TRACE_HOOK(handle, div4); + INIT_TRACE_HOOK(handle, div8); + + INIT_TRACE_HOOK(handle, gep); + + INIT_TRACE_HOOK(handle, pc_indir); + + atomic_store(&cov_8bit_counters_init, + dlsym(handle, "__sanitizer_cov_8bit_counters_init")); + atomic_store(&cov_pcs_init, dlsym(handle, "__sanitizer_cov_pcs_init")); +} |