// Copyright 2021 Code Intelligence GmbH // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "libfuzzer_callbacks.h" #include #include #include #include #include #include #include "absl/strings/match.h" #include "absl/strings/str_format.h" #include "absl/strings/str_split.h" #include "gflags/gflags.h" #include "glog/logging.h" #include "sanitizer_hooks_with_pc.h" DEFINE_bool( fake_pcs, false, "Supply synthetic Java program counters to libFuzzer trace hooks to " "make value profiling more effective. Enabled by default if " "-use_value_profile=1 is specified."); namespace { const char kLibfuzzerTraceDataFlowHooksClass[] = "com/code_intelligence/jazzer/runtime/" "TraceDataFlowNativeCallbacks"; extern "C" { void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2, std::size_t n, int result); void __sanitizer_weak_hook_compare_bytes(void *caller_pc, const void *s1, const void *s2, std::size_t n1, std::size_t n2, int result); void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2, int result); void __sanitizer_weak_hook_strstr(void *caller_pc, const char *s1, const char *s2, const char *result); void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2); void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2); void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases); void __sanitizer_cov_trace_div4(uint32_t val); void __sanitizer_cov_trace_div8(uint64_t val); void __sanitizer_cov_trace_gep(uintptr_t idx); } inline __attribute__((always_inline)) void *idToPc(jint id) { return reinterpret_cast(static_cast(id)); } void JNICALL libfuzzerStringCompareCallback(JNIEnv &env, jclass cls, jstring s1, jstring s2, jint result, jint id) { const char *s1_native = env.GetStringUTFChars(s1, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); std::size_t n1 = env.GetStringUTFLength(s1); if (env.ExceptionCheck()) env.ExceptionDescribe(); const char *s2_native = env.GetStringUTFChars(s2, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); std::size_t n2 = env.GetStringUTFLength(s2); if (env.ExceptionCheck()) env.ExceptionDescribe(); __sanitizer_weak_hook_compare_bytes(idToPc(id), s1_native, s2_native, n1, n2, result); env.ReleaseStringUTFChars(s1, s1_native); if (env.ExceptionCheck()) env.ExceptionDescribe(); env.ReleaseStringUTFChars(s2, s2_native); if (env.ExceptionCheck()) env.ExceptionDescribe(); } void JNICALL libfuzzerStringContainCallback(JNIEnv &env, jclass cls, jstring s1, jstring s2, jint id) { const char *s1_native = env.GetStringUTFChars(s1, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); const char *s2_native = env.GetStringUTFChars(s2, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); // libFuzzer currently ignores the result, which allows us to simply pass a // valid but arbitrary pointer here instead of performing an actual strstr // operation. __sanitizer_weak_hook_strstr(idToPc(id), s1_native, s2_native, s1_native); env.ReleaseStringUTFChars(s1, s1_native); if (env.ExceptionCheck()) env.ExceptionDescribe(); env.ReleaseStringUTFChars(s2, s2_native); if (env.ExceptionCheck()) env.ExceptionDescribe(); } void JNICALL libfuzzerByteCompareCallback(JNIEnv &env, jclass cls, jbyteArray b1, jbyteArray b2, jint result, jint id) { jbyte *b1_native = env.GetByteArrayElements(b1, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); jbyte *b2_native = env.GetByteArrayElements(b2, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); jint b1_length = env.GetArrayLength(b1); if (env.ExceptionCheck()) env.ExceptionDescribe(); jint b2_length = env.GetArrayLength(b2); if (env.ExceptionCheck()) env.ExceptionDescribe(); __sanitizer_weak_hook_compare_bytes(idToPc(id), b1_native, b2_native, b1_length, b2_length, result); env.ReleaseByteArrayElements(b1, b1_native, JNI_ABORT); if (env.ExceptionCheck()) env.ExceptionDescribe(); env.ReleaseByteArrayElements(b2, b2_native, JNI_ABORT); if (env.ExceptionCheck()) env.ExceptionDescribe(); } void JNICALL libfuzzerLongCompareCallback(JNIEnv &env, jclass cls, jlong value1, jlong value2, jint id) { __sanitizer_cov_trace_cmp8(value1, value2); } void JNICALL libfuzzerLongCompareCallbackWithPc(JNIEnv &env, jclass cls, jlong value1, jlong value2, jint id) { __sanitizer_cov_trace_cmp8_with_pc(idToPc(id), value1, value2); } void JNICALL libfuzzerIntCompareCallback(JNIEnv &env, jclass cls, jint value1, jint value2, jint id) { __sanitizer_cov_trace_cmp4(value1, value2); } void JNICALL libfuzzerIntCompareCallbackWithPc(JNIEnv &env, jclass cls, jint value1, jint value2, jint id) { __sanitizer_cov_trace_cmp4_with_pc(idToPc(id), value1, value2); } void JNICALL libfuzzerSwitchCaseCallback(JNIEnv &env, jclass cls, jlong switch_value, jlongArray libfuzzer_case_values, jint id) { jlong *case_values = env.GetLongArrayElements(libfuzzer_case_values, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); __sanitizer_cov_trace_switch(switch_value, reinterpret_cast(case_values)); env.ReleaseLongArrayElements(libfuzzer_case_values, case_values, JNI_ABORT); if (env.ExceptionCheck()) env.ExceptionDescribe(); } void JNICALL libfuzzerSwitchCaseCallbackWithPc(JNIEnv &env, jclass cls, jlong switch_value, jlongArray libfuzzer_case_values, jint id) { jlong *case_values = env.GetLongArrayElements(libfuzzer_case_values, nullptr); if (env.ExceptionCheck()) env.ExceptionDescribe(); __sanitizer_cov_trace_switch_with_pc( idToPc(id), switch_value, reinterpret_cast(case_values)); env.ReleaseLongArrayElements(libfuzzer_case_values, case_values, JNI_ABORT); if (env.ExceptionCheck()) env.ExceptionDescribe(); } void JNICALL libfuzzerLongDivCallback(JNIEnv &env, jclass cls, jlong value, jint id) { __sanitizer_cov_trace_div8(value); } void JNICALL libfuzzerLongDivCallbackWithPc(JNIEnv &env, jclass cls, jlong value, jint id) { __sanitizer_cov_trace_div8_with_pc(idToPc(id), value); } void JNICALL libfuzzerIntDivCallback(JNIEnv &env, jclass cls, jint value, jint id) { __sanitizer_cov_trace_div4(value); } void JNICALL libfuzzerIntDivCallbackWithPc(JNIEnv &env, jclass cls, jint value, jint id) { __sanitizer_cov_trace_div4_with_pc(idToPc(id), value); } void JNICALL libfuzzerGepCallback(JNIEnv &env, jclass cls, jlong idx, jint id) { __sanitizer_cov_trace_gep(static_cast(idx)); } void JNICALL libfuzzerGepCallbackWithPc(JNIEnv &env, jclass cls, jlong idx, jint id) { __sanitizer_cov_trace_gep_with_pc(idToPc(id), static_cast(idx)); } void JNICALL libfuzzerPcIndirCallback(JNIEnv &env, jclass cls, jint caller_id, jint callee_id) { __sanitizer_cov_trace_pc_indir_with_pc(idToPc(caller_id), static_cast(callee_id)); } bool is_using_native_libraries = false; std::once_flag ignore_list_flag; std::vector> ignore_for_interception_ranges; extern "C" [[maybe_unused]] bool __sanitizer_weak_is_relevant_pc( void *caller_pc) { // If the fuzz target is not using native libraries, calls to strcmp, memcmp, // etc. should never be intercepted. The values reported if they were at best // duplicate the values received from our bytecode instrumentation and at // worst pollute the table of recent compares with string internal to the JDK. if (!is_using_native_libraries) return false; // If the fuzz target is using native libraries, intercept calls only if they // don't originate from those address ranges that are known to belong to the // JDK. return std::none_of(ignore_for_interception_ranges.cbegin(), ignore_for_interception_ranges.cend(), [caller_pc](const auto &range) { uintptr_t start; uintptr_t end; std::tie(start, end) = range; auto address = reinterpret_cast(caller_pc); return start <= address && address <= end; }); } /** * Adds the address ranges of executable segmentes of the library lib_name to * the ignorelist for C standard library function interception (strcmp, memcmp, * ...). */ void ignoreLibraryForInterception(const std::string &lib_name) { const auto num_address_ranges = ignore_for_interception_ranges.size(); std::ifstream loaded_libs("/proc/self/maps"); if (!loaded_libs) { // This early exit is taken e.g. on macOS, where /proc does not exist. return; } std::string line; while (std::getline(loaded_libs, line)) { if (!absl::StrContains(line, lib_name)) continue; // clang-format off // A typical line looks as follows: // 7f15356c9000-7f1536367000 r-xp 0020d000 fd:01 19275673 /usr/lib/jvm/java-15-openjdk-amd64/lib/server/libjvm.so // clang-format on std::vector parts = absl::StrSplit(line, ' ', absl::SkipEmpty()); if (parts.size() != 6) { std::cout << "ERROR: Invalid format for /proc/self/maps\n" << line << std::endl; exit(1); } // Skip non-executable address rang"s. if (!absl::StrContains(parts[1], "x")) continue; std::string_view range_str = parts[0]; std::vector range = absl::StrSplit(range_str, "-"); if (range.size() != 2) { std::cout << "ERROR: Unexpected address range format in /proc/self/maps line: " << range_str << std::endl; exit(1); } std::size_t pos; auto start = std::stoull(range[0], &pos, 16); if (pos != range[0].size()) { std::cout << "ERROR: Unexpected address range format in /proc/self/maps line: " << range_str << std::endl; exit(1); } auto end = std::stoull(range[1], &pos, 16); if (pos != range[0].size()) { std::cout << "ERROR: Unexpected address range format in /proc/self/maps line: " << range_str << std::endl; exit(1); } ignore_for_interception_ranges.emplace_back(start, end); } const auto num_code_segments = ignore_for_interception_ranges.size() - num_address_ranges; LOG(INFO) << "added " << num_code_segments << " code segment of native library " << lib_name << " to interceptor ignorelist"; } const std::vector kLibrariesToIgnoreForInterception = { // The driver executable itself can be treated just like a library. "jazzer_driver", "libinstrument.so", "libjava.so", "libjimage.so", "libjli.so", "libjvm.so", "libnet.so", "libverify.so", "libzip.so", }; void JNICALL handleLibraryLoad(JNIEnv &env, jclass cls) { std::call_once(ignore_list_flag, [] { LOG(INFO) << "detected a native library load, enabling interception for libc " "functions"; for (const auto &lib_name : kLibrariesToIgnoreForInterception) ignoreLibraryForInterception(lib_name); // Enable the ignore list after it has been populated since vector is not // thread-safe with respect to concurrent writes and reads. is_using_native_libraries = true; }); } void registerCallback(JNIEnv &env, const char *java_hooks_class_name, const JNINativeMethod *methods, int num_methods) { auto java_hooks_class = env.FindClass(java_hooks_class_name); if (java_hooks_class == nullptr) { env.ExceptionDescribe(); throw std::runtime_error( absl::StrFormat("could not find class %s", java_hooks_class_name)); } LOG(INFO) << "registering hooks for class " << java_hooks_class_name; env.RegisterNatives(java_hooks_class, methods, num_methods); if (env.ExceptionCheck()) { env.ExceptionDescribe(); throw std::runtime_error("could not register native callbacks"); } } } // namespace namespace jazzer { bool registerFuzzerCallbacks(JNIEnv &env) { if (FLAGS_fake_pcs) { LOG(INFO) << "using callback variants with fake pcs"; CalibrateTrampoline(); } { JNINativeMethod string_methods[]{ {(char *)"traceMemcmp", (char *)"([B[BII)V", (void *)&libfuzzerByteCompareCallback}, {(char *)"traceStrcmp", (char *)"(Ljava/lang/String;Ljava/lang/String;II)V", (void *)&libfuzzerStringCompareCallback}, {(char *)"traceStrstr", (char *)"(Ljava/lang/String;Ljava/lang/String;I)V", (void *)&libfuzzerStringContainCallback}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, string_methods, sizeof(string_methods) / sizeof(string_methods[0])); } { JNINativeMethod cmp_methods[]{ {(char *)"traceCmpLong", (char *)"(JJI)V", (void *)(FLAGS_fake_pcs ? &libfuzzerLongCompareCallbackWithPc : &libfuzzerLongCompareCallback)}, {(char *)"traceCmpInt", (char *)"(III)V", (void *)(FLAGS_fake_pcs ? &libfuzzerIntCompareCallbackWithPc : &libfuzzerIntCompareCallback)}, // libFuzzer internally treats const comparisons the same as // non-constant cmps. {(char *)"traceConstCmpInt", (char *)"(III)V", (void *)(FLAGS_fake_pcs ? &libfuzzerIntCompareCallbackWithPc : &libfuzzerIntCompareCallback)}, {(char *)"traceSwitch", (char *)"(J[JI)V", (void *)(FLAGS_fake_pcs ? &libfuzzerSwitchCaseCallbackWithPc : &libfuzzerSwitchCaseCallback)}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, cmp_methods, sizeof(cmp_methods) / sizeof(cmp_methods[0])); } { JNINativeMethod div_methods[]{ {(char *)"traceDivLong", (char *)"(JI)V", (void *)(FLAGS_fake_pcs ? &libfuzzerLongDivCallbackWithPc : &libfuzzerLongDivCallback)}, {(char *)"traceDivInt", (char *)"(II)V", (void *)(FLAGS_fake_pcs ? &libfuzzerIntDivCallbackWithPc : &libfuzzerIntDivCallback)}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, div_methods, sizeof(div_methods) / sizeof(div_methods[0])); } { JNINativeMethod gep_methods[]{ {(char *)"traceGep", (char *)"(JI)V", (void *)(FLAGS_fake_pcs ? &libfuzzerGepCallbackWithPc : &libfuzzerGepCallback)}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, gep_methods, sizeof(gep_methods) / sizeof(gep_methods[0])); } { JNINativeMethod indir_methods[]{{(char *)"tracePcIndir", (char *)"(II)V", (void *)(&libfuzzerPcIndirCallback)}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, indir_methods, sizeof(indir_methods) / sizeof(indir_methods[0])); } { JNINativeMethod native_methods[]{{(char *)"handleLibraryLoad", (char *)"()V", (void *)(&handleLibraryLoad)}}; registerCallback(env, kLibfuzzerTraceDataFlowHooksClass, native_methods, sizeof(native_methods) / sizeof(native_methods[0])); } return env.ExceptionCheck(); } } // namespace jazzer