diff options
Diffstat (limited to 'src/main/java/com/code_intelligence/jazzer/agent')
6 files changed, 784 insertions, 0 deletions
diff --git a/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt b/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt new file mode 100644 index 00000000..9bcd744f --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/Agent.kt @@ -0,0 +1,172 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@file:JvmName("Agent") + +package com.code_intelligence.jazzer.agent + +import com.code_intelligence.jazzer.driver.Opt +import com.code_intelligence.jazzer.instrumentor.CoverageRecorder +import com.code_intelligence.jazzer.instrumentor.Hooks +import com.code_intelligence.jazzer.instrumentor.InstrumentationType +import com.code_intelligence.jazzer.sanitizers.Constants +import com.code_intelligence.jazzer.utils.ClassNameGlobber +import com.code_intelligence.jazzer.utils.Log +import com.code_intelligence.jazzer.utils.ManifestUtils +import java.lang.instrument.Instrumentation +import java.nio.file.Paths +import kotlin.io.path.exists +import kotlin.io.path.isDirectory + +fun install(instrumentation: Instrumentation) { + installInternal(instrumentation) +} + +fun installInternal( + instrumentation: Instrumentation, + userHookNames: List<String> = findManifestCustomHookNames() + Opt.customHooks, + disabledHookNames: List<String> = Opt.disabledHooks, + instrumentationIncludes: List<String> = Opt.instrumentationIncludes.get(), + instrumentationExcludes: List<String> = Opt.instrumentationExcludes.get(), + customHookIncludes: List<String> = Opt.customHookIncludes.get(), + customHookExcludes: List<String> = Opt.customHookExcludes.get(), + trace: List<String> = Opt.trace, + idSyncFile: String? = Opt.idSyncFile, + dumpClassesDir: String = Opt.dumpClassesDir, + additionalClassesExcludes: List<String> = Opt.additionalClassesExcludes, +) { + val allCustomHookNames = (Constants.SANITIZER_HOOK_NAMES + userHookNames).toSet() + check(allCustomHookNames.isNotEmpty()) { "No hooks registered; expected at least the built-in hooks" } + val customHookNames = allCustomHookNames - disabledHookNames.toSet() + val disabledCustomHooksToPrint = allCustomHookNames - customHookNames.toSet() + if (disabledCustomHooksToPrint.isNotEmpty()) { + Log.info("Not using the following disabled hooks: ${disabledCustomHooksToPrint.joinToString(", ")}") + } + + val classNameGlobber = ClassNameGlobber(instrumentationIncludes, instrumentationExcludes + customHookNames) + CoverageRecorder.classNameGlobber = classNameGlobber + val customHookClassNameGlobber = ClassNameGlobber(customHookIncludes, customHookExcludes + customHookNames) + // FIXME: Setting trace to the empty string explicitly results in all rather than no trace types + // being applied - this is unintuitive. + val instrumentationTypes = (trace.takeIf { it.isNotEmpty() } ?: listOf("all")).flatMap { + when (it) { + "cmp" -> setOf(InstrumentationType.CMP) + "cov" -> setOf(InstrumentationType.COV) + "div" -> setOf(InstrumentationType.DIV) + "gep" -> setOf(InstrumentationType.GEP) + "indir" -> setOf(InstrumentationType.INDIR) + "native" -> setOf(InstrumentationType.NATIVE) + // Disable GEP instrumentation by default as it appears to negatively affect fuzzing + // performance. Our current GEP instrumentation only reports constant indices, but even + // when we instead reported non-constant indices, they tended to completely fill up the + // table of recent compares and value profile map. + "all" -> InstrumentationType.values().toSet() - InstrumentationType.GEP + else -> { + println("WARN: Skipping unknown instrumentation type $it") + emptySet() + } + } + }.toSet() + + val idSyncFilePath = idSyncFile?.takeUnless { it.isEmpty() }?.let { + Paths.get(it).also { path -> + Log.info("Synchronizing coverage IDs in ${path.toAbsolutePath()}") + } + } + val dumpClassesDirPath = dumpClassesDir.takeUnless { it.isEmpty() }?.let { + Paths.get(it).toAbsolutePath().also { path -> + if (path.exists() && path.isDirectory()) { + Log.info("Dumping instrumented classes into $path") + } else { + Log.error("Cannot dump instrumented classes into $path; does not exist or not a directory") + } + } + } + val includedHookNames = instrumentationTypes + .mapNotNull { type -> + when (type) { + InstrumentationType.CMP -> "com.code_intelligence.jazzer.runtime.TraceCmpHooks" + InstrumentationType.DIV -> "com.code_intelligence.jazzer.runtime.TraceDivHooks" + InstrumentationType.INDIR -> "com.code_intelligence.jazzer.runtime.TraceIndirHooks" + InstrumentationType.NATIVE -> "com.code_intelligence.jazzer.runtime.NativeLibHooks" + else -> null + } + } + val coverageIdSynchronizer = if (idSyncFilePath != null) { + FileSyncCoverageIdStrategy(idSyncFilePath) + } else { + MemSyncCoverageIdStrategy() + } + + // If we don't append the JARs containing the custom hooks to the bootstrap class loader, + // third-party hooks not contained in the agent JAR will not be able to instrument Java standard + // library classes. These classes are loaded by the bootstrap / system class loader and would + // not be considered when resolving references to hook methods, leading to NoClassDefFoundError + // being thrown. + Hooks.appendHooksToBootstrapClassLoaderSearch(instrumentation, customHookNames.toSet()) + val (includedHooks, customHooks) = Hooks.loadHooks(additionalClassesExcludes, includedHookNames.toSet(), customHookNames.toSet()) + + val runtimeInstrumentor = RuntimeInstrumentor( + instrumentation, + classNameGlobber, + customHookClassNameGlobber, + instrumentationTypes, + includedHooks.hooks, + customHooks.hooks, + customHooks.additionalHookClassNameGlobber, + coverageIdSynchronizer, + dumpClassesDirPath, + ) + + // These classes are e.g. dependencies of the RuntimeInstrumentor or hooks and thus were loaded + // before the instrumentor was ready. Since we haven't enabled it yet, they can safely be + // "retransformed": They haven't been transformed yet. + val classesToRetransform = instrumentation.allLoadedClasses + .filter { + classNameGlobber.includes(it.name) || + customHookClassNameGlobber.includes(it.name) || + customHooks.additionalHookClassNameGlobber.includes(it.name) + } + .filter { + instrumentation.isModifiableClass(it) + } + .toTypedArray() + + instrumentation.addTransformer(runtimeInstrumentor, true) + + if (classesToRetransform.isNotEmpty()) { + if (instrumentation.isRetransformClassesSupported) { + retransformClassesWithRetry(instrumentation, classesToRetransform) + } + } +} + +private fun retransformClassesWithRetry(instrumentation: Instrumentation, classesToRetransform: Array<Class<*>>) { + try { + instrumentation.retransformClasses(*classesToRetransform) + } catch (e: Throwable) { + if (classesToRetransform.size == 1) { + Log.warn("Error retransforming class ${classesToRetransform[0].name }", e) + } else { + // The docs state that no transformation was performed if an exception is thrown. + // Try again in a binary search fashion, until the not transformable classes have been isolated and reported. + retransformClassesWithRetry(instrumentation, classesToRetransform.copyOfRange(0, classesToRetransform.size / 2)) + retransformClassesWithRetry(instrumentation, classesToRetransform.copyOfRange(classesToRetransform.size / 2, classesToRetransform.size)) + } + } +} + +private fun findManifestCustomHookNames() = ManifestUtils.combineManifestValues(ManifestUtils.HOOK_CLASSES) + .flatMap { it.split(':') } + .filter { it.isNotBlank() } diff --git a/src/main/java/com/code_intelligence/jazzer/agent/AgentInstaller.java b/src/main/java/com/code_intelligence/jazzer/agent/AgentInstaller.java new file mode 100644 index 00000000..5dd041ac --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/AgentInstaller.java @@ -0,0 +1,58 @@ +// Copyright 2022 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.code_intelligence.jazzer.agent; + +import static com.code_intelligence.jazzer.agent.AgentUtils.extractBootstrapJar; +import static com.code_intelligence.jazzer.runtime.Constants.IS_ANDROID; + +import java.lang.instrument.Instrumentation; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.concurrent.atomic.AtomicBoolean; +import net.bytebuddy.agent.ByteBuddyAgent; + +public class AgentInstaller { + private static final AtomicBoolean hasBeenInstalled = new AtomicBoolean(); + + /** + * Appends the parts of Jazzer that have to be visible to all classes, including those in the Java + * standard library, to the bootstrap class loader path. Additionally, if enableAgent is true, + * also enables the Jazzer agent that instruments classes for fuzzing. + */ + public static void install(boolean enableAgent) { + // Only install the agent once. + if (!hasBeenInstalled.compareAndSet(false, true)) { + return; + } + + if (IS_ANDROID) { + return; + } + + Instrumentation instrumentation = ByteBuddyAgent.install(); + instrumentation.appendToBootstrapClassLoaderSearch(extractBootstrapJar()); + if (!enableAgent) { + return; + } + try { + Class<?> agent = Class.forName("com.code_intelligence.jazzer.agent.Agent"); + Method install = agent.getMethod("install", Instrumentation.class); + install.invoke(null, instrumentation); + } catch (ClassNotFoundException | InvocationTargetException | NoSuchMethodException + | IllegalAccessException e) { + throw new IllegalStateException("Failed to run Agent.install", e); + } + } +} diff --git a/src/main/java/com/code_intelligence/jazzer/agent/AgentUtils.java b/src/main/java/com/code_intelligence/jazzer/agent/AgentUtils.java new file mode 100644 index 00000000..e654252a --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/AgentUtils.java @@ -0,0 +1,45 @@ +/* + * Copyright 2023 Code Intelligence GmbH + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.code_intelligence.jazzer.agent; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; +import java.util.jar.JarFile; + +final class AgentUtils { + private static final String BOOTSTRAP_JAR = + "/com/code_intelligence/jazzer/runtime/jazzer_bootstrap.jar"; + + public static JarFile extractBootstrapJar() { + try (InputStream bootstrapJarStream = AgentUtils.class.getResourceAsStream(BOOTSTRAP_JAR)) { + if (bootstrapJarStream == null) { + throw new IllegalStateException("Failed to find Jazzer agent bootstrap jar in resources"); + } + File bootstrapJar = Files.createTempFile("jazzer-agent-", ".jar").toFile(); + bootstrapJar.deleteOnExit(); + Files.copy(bootstrapJarStream, bootstrapJar.toPath(), StandardCopyOption.REPLACE_EXISTING); + return new JarFile(bootstrapJar); + } catch (IOException e) { + throw new IllegalStateException("Failed to extract Jazzer agent bootstrap jar", e); + } + } + + private AgentUtils() {} +} diff --git a/src/main/java/com/code_intelligence/jazzer/agent/BUILD.bazel b/src/main/java/com/code_intelligence/jazzer/agent/BUILD.bazel new file mode 100644 index 00000000..89acbda3 --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/BUILD.bazel @@ -0,0 +1,43 @@ +load("@io_bazel_rules_kotlin//kotlin:jvm.bzl", "kt_jvm_library") +load("//bazel:kotlin.bzl", "ktlint") + +java_library( + name = "agent_installer", + srcs = ["AgentInstaller.java"], + resources = select({ + "@platforms//os:android": [ + "//src/main/java/com/code_intelligence/jazzer/android:jazzer_bootstrap_android", + ], + "//conditions:default": [ + "//src/main/java/com/code_intelligence/jazzer/runtime:jazzer_bootstrap", + ], + }), + visibility = ["//visibility:public"], + deps = [ + ":agent_lib", + "//src/main/java/com/code_intelligence/jazzer/driver:opt", + "//src/main/java/com/code_intelligence/jazzer/runtime:constants", + "@net_bytebuddy_byte_buddy_agent//jar", + ], +) + +kt_jvm_library( + name = "agent_lib", + srcs = [ + "Agent.kt", + "AgentUtils.java", + "CoverageIdStrategy.kt", + "RuntimeInstrumentor.kt", + ], + deps = [ + "//sanitizers/src/main/java/com/code_intelligence/jazzer/sanitizers:constants", + "//src/main/java/com/code_intelligence/jazzer/driver:opt", + "//src/main/java/com/code_intelligence/jazzer/instrumentor", + "//src/main/java/com/code_intelligence/jazzer/utils:class_name_globber", + "//src/main/java/com/code_intelligence/jazzer/utils:log", + "//src/main/java/com/code_intelligence/jazzer/utils:manifest_utils", + "@com_github_classgraph_classgraph//:classgraph", + ], +) + +ktlint() diff --git a/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt b/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt new file mode 100644 index 00000000..75d76003 --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/CoverageIdStrategy.kt @@ -0,0 +1,223 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.code_intelligence.jazzer.agent + +import com.code_intelligence.jazzer.utils.Log +import java.nio.ByteBuffer +import java.nio.channels.FileChannel +import java.nio.channels.FileLock +import java.nio.file.Path +import java.nio.file.StandardOpenOption +import java.util.UUID + +/** + * Indicates a fatal failure to generate synchronized coverage IDs. + */ +class CoverageIdException(cause: Throwable? = null) : + RuntimeException("Failed to synchronize coverage IDs", cause) + +/** + * [CoverageIdStrategy] provides an abstraction to switch between context specific coverage ID generation. + * + * Coverage (i.e., edge) IDs differ from other kinds of IDs, such as those generated for call sites or cmp + * instructions, in that they should be consecutive, collision-free, and lie in a known, small range. + * This precludes us from generating them simply as hashes of class names. + */ +interface CoverageIdStrategy { + + /** + * [withIdForClass] provides the initial coverage ID of the given [className] as parameter to the + * [block] to execute. [block] has to return the number of additionally used IDs. + */ + @Throws(CoverageIdException::class) + fun withIdForClass(className: String, block: (Int) -> Int) +} + +/** + * A memory synced strategy for coverage ID generation. + * + * This strategy uses a synchronized block to guard access to a global edge ID counter. + * Even though concurrent fuzzing is not fully supported this strategy enables consistent coverage + * IDs in case of concurrent class loading. + * + * It only prevents races within one VM instance. + */ +class MemSyncCoverageIdStrategy : CoverageIdStrategy { + private var nextEdgeId = 0 + + @Synchronized + override fun withIdForClass(className: String, block: (Int) -> Int) { + nextEdgeId += block(nextEdgeId) + } +} + +/** + * A strategy for coverage ID generation that synchronizes the IDs assigned to a class with other processes via the + * specified [idSyncFile]. + * This class takes care of synchronizing the access to the file between multiple processes as long as the general + * contract of [CoverageIdStrategy] is followed. + */ +class FileSyncCoverageIdStrategy(private val idSyncFile: Path) : CoverageIdStrategy { + private val uuid: UUID = UUID.randomUUID() + private var idFileLock: FileLock? = null + + private var cachedFirstId: Int? = null + private var cachedClassName: String? = null + private var cachedIdCount: Int? = null + + /** + * This method is synchronized to prevent concurrent access to the internal file lock which would result in + * [java.nio.channels.OverlappingFileLockException]. Furthermore, every coverage ID obtained by [obtainFirstId] + * is always committed back again to the sync file by [commitIdCount]. + */ + @Synchronized + override fun withIdForClass(className: String, block: (Int) -> Int) { + var actualNumEdgeIds = 0 + try { + val firstId = obtainFirstId(className) + actualNumEdgeIds = block(firstId) + } finally { + commitIdCount(actualNumEdgeIds) + } + } + + /** + * Obtains a coverage ID for [className] such that all cooperating agent processes will obtain the same ID. + * There are two cases to consider: + * - This agent process is the first to encounter [className], i.e., it does not find a record for that class in + * [idSyncFile]. In this case, a lock on the file is held until the class has been instrumented and a record with + * the required number of coverage IDs has been added. + * - Another agent process has already encountered [className], i.e., there is a record that class in [idSyncFile]. + * In this case, the lock on the file is returned immediately and the extracted first coverage ID is returned to + * the caller. The caller is still expected to call [commitIdCount] so that desynchronization can be detected. + */ + private fun obtainFirstId(className: String): Int { + try { + check(idFileLock == null) { "Already holding a lock on the ID file" } + val localIdFile = FileChannel.open( + idSyncFile, + StandardOpenOption.WRITE, + StandardOpenOption.READ, + ) + // Wait until we have obtained the lock on the sync file. We hold the lock from this point until we have + // finished reading and writing (if necessary) to the file. + val localIdFileLock = localIdFile.lock() + check(localIdFileLock.isValid && !localIdFileLock.isShared) + // Parse the sync file, which consists of lines of the form + // <class name>:<first ID>:<num IDs> + val idInfo = localIdFileLock.channel().readFully() + .lineSequence() + .filterNot { it.isBlank() } + .map { line -> + val parts = line.split(':') + check(parts.size == 4) { + "Expected ID file line to be of the form '<class name>:<first ID>:<num IDs>:<uuid>', got '$line'" + } + val lineClassName = parts[0] + val lineFirstId = parts[1].toInt() + check(lineFirstId >= 0) { "Negative first ID in line: $line" } + val lineIdCount = parts[2].toInt() + check(lineIdCount >= 0) { "Negative ID count in line: $line" } + Triple(lineClassName, lineFirstId, lineIdCount) + }.toList() + cachedClassName = className + val idInfoForClass = idInfo.filter { it.first == className } + return when (idInfoForClass.size) { + 0 -> { + // We are the first to encounter this class and thus need to hold the lock until the class has been + // instrumented and we know the required number of coverage IDs. + idFileLock = localIdFileLock + // Compute the next free ID as the maximum over the sums of first ID and ID count, starting at 0 if + // this is the first ID to be assigned. In fact, since this is the only way new lines are added to + // the file, the maximum is always attained by the last line. + val nextFreeId = idInfo.asSequence().map { it.second + it.third }.lastOrNull() ?: 0 + cachedFirstId = nextFreeId + nextFreeId + } + 1 -> { + // This class has already been instrumented elsewhere, so we just return the first ID and ID count + // reported from there and release the lock right away. The caller is still expected to call + // commitIdCount. + localIdFile.close() + cachedIdCount = idInfoForClass.single().third + idInfoForClass.single().second + } + else -> { + localIdFile.close() + Log.println(idInfo.joinToString("\n") { "${it.first}:${it.second}:${it.third}" }) + throw IllegalStateException("Multiple entries for $className in ID file") + } + } + } catch (e: Exception) { + throw CoverageIdException(e) + } + } + + /** + * Records the number of coverage IDs used to instrument the class specified in a previous call to [obtainFirstId]. + * If instrumenting the class should fail, this function must still be called. In this case, [idCount] is set to 0. + */ + private fun commitIdCount(idCount: Int) { + val localIdFileLock = idFileLock + try { + check(cachedClassName != null) + if (localIdFileLock == null) { + // We released the lock already in obtainFirstId since the class had already been instrumented + // elsewhere. As we know the expected number of IDs for the current class in this case, check for + // deviations. + check(cachedIdCount != null) + check(idCount == cachedIdCount) { + "$cachedClassName has $idCount edges, but $cachedIdCount edges reserved in ID file" + } + } else { + // We are the first to instrument this class and should record the number of IDs in the sync file. + check(cachedFirstId != null) + localIdFileLock.channel().append("$cachedClassName:$cachedFirstId:$idCount:$uuid\n") + localIdFileLock.channel().force(true) + } + idFileLock = null + cachedFirstId = null + cachedIdCount = null + cachedClassName = null + } catch (e: Exception) { + throw CoverageIdException(e) + } finally { + localIdFileLock?.channel()?.close() + } + } +} + +/** + * Reads the [FileChannel] to the end as a UTF-8 string. + */ +fun FileChannel.readFully(): String { + check(size() <= Int.MAX_VALUE) + val buffer = ByteBuffer.allocate(size().toInt()) + while (buffer.hasRemaining()) { + when (read(buffer)) { + 0 -> throw IllegalStateException("No bytes read") + -1 -> break + } + } + return String(buffer.array()) +} + +/** + * Appends [string] to the end of the [FileChannel]. + */ +fun FileChannel.append(string: String) { + position(size()) + write(ByteBuffer.wrap(string.toByteArray())) +} diff --git a/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt b/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt new file mode 100644 index 00000000..57410f30 --- /dev/null +++ b/src/main/java/com/code_intelligence/jazzer/agent/RuntimeInstrumentor.kt @@ -0,0 +1,243 @@ +// Copyright 2021 Code Intelligence GmbH +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.code_intelligence.jazzer.agent + +import com.code_intelligence.jazzer.driver.Opt +import com.code_intelligence.jazzer.instrumentor.ClassInstrumentor +import com.code_intelligence.jazzer.instrumentor.CoverageRecorder +import com.code_intelligence.jazzer.instrumentor.Hook +import com.code_intelligence.jazzer.instrumentor.InstrumentationType +import com.code_intelligence.jazzer.utils.ClassNameGlobber +import com.code_intelligence.jazzer.utils.Log +import io.github.classgraph.ClassGraph +import java.io.File +import java.lang.instrument.ClassFileTransformer +import java.lang.instrument.Instrumentation +import java.nio.file.Path +import java.security.ProtectionDomain +import kotlin.math.roundToInt +import kotlin.system.exitProcess +import kotlin.time.measureTimedValue + +class RuntimeInstrumentor( + private val instrumentation: Instrumentation, + private val classesToFullyInstrument: ClassNameGlobber, + private val classesToHookInstrument: ClassNameGlobber, + private val instrumentationTypes: Set<InstrumentationType>, + private val includedHooks: List<Hook>, + private val customHooks: List<Hook>, + // Dedicated name globber for additional classes to hook stated in hook annotations is needed due to + // existing include and exclude pattern of classesToHookInstrument. All classes are included in hook + // instrumentation except the ones from default excludes, like JDK and Kotlin classes. But additional + // classes to hook, based on annotations, are allowed to reference normally ignored ones, like JDK + // and Kotlin internals. + // FIXME: Adding an additional class to hook will apply _all_ hooks to it and not only the one it's + // defined in. At some point we might want to track the list of classes per custom hook rather than globally. + private val additionalClassesToHookInstrument: ClassNameGlobber, + private val coverageIdSynchronizer: CoverageIdStrategy, + private val dumpClassesDir: Path?, +) : ClassFileTransformer { + + @kotlin.time.ExperimentalTime + override fun transform( + loader: ClassLoader?, + internalClassName: String, + classBeingRedefined: Class<*>?, + protectionDomain: ProtectionDomain?, + classfileBuffer: ByteArray, + ): ByteArray? { + var pathPrefix = "" + if (!Opt.instrumentOnly.isEmpty() && protectionDomain != null) { + var outputPathPrefix = protectionDomain.getCodeSource().getLocation().getFile().toString() + if (outputPathPrefix.isNotEmpty()) { + if (outputPathPrefix.contains(File.separator)) { + outputPathPrefix = outputPathPrefix.substring(outputPathPrefix.lastIndexOf(File.separator) + 1, outputPathPrefix.length) + } + + if (outputPathPrefix.endsWith(".jar")) { + outputPathPrefix = outputPathPrefix.substring(0, outputPathPrefix.lastIndexOf(".jar")) + } + + if (outputPathPrefix.isNotEmpty()) { + pathPrefix = outputPathPrefix + File.separator + } + } + } + + return try { + // Bail out early if we would instrument ourselves. This prevents ClassCircularityErrors as we might need to + // load additional Jazzer classes until we reach the full exclusion logic. + if (internalClassName.startsWith("com/code_intelligence/jazzer/")) { + return null + } + // Workaround for a JDK bug (http://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8299798): + // When retransforming a class in the Java standard library, the provided classfileBuffer does not contain + // any StackMapTable attributes. Our transformations require stack map frames to calculate the number of + // local variables and stack slots as well as when adding control flow. + // + // We work around this by reloading the class file contents if we are retransforming (classBeingRedefined + // is also non-null in this situation) and the class is provided by the bootstrap loader. + // + // Alternatives considered: + // Using ClassWriter.COMPUTE_FRAMES as an escape hatch isn't possible in the context of an agent as the + // computation may itself need to load classes, which leads to circular loads and incompatible class + // redefinitions. + transformInternal(internalClassName, classfileBuffer.takeUnless { loader == null && classBeingRedefined != null }) + } catch (t: Throwable) { + // Throwables raised from transform are silently dropped, making it extremely hard to detect instrumentation + // failures. The docs advise to use a top-level try-catch. + // https://docs.oracle.com/javase/9/docs/api/java/lang/instrument/ClassFileTransformer.html + if (dumpClassesDir != null) { + dumpToClassFile(internalClassName, classfileBuffer, basenameSuffix = ".failed", pathPrefix = pathPrefix) + } + Log.warn("Failed to instrument $internalClassName:", t) + throw t + }.also { instrumentedByteCode -> + // Only dump classes that were instrumented. + if (instrumentedByteCode != null && dumpClassesDir != null) { + dumpToClassFile(internalClassName, instrumentedByteCode, pathPrefix = pathPrefix) + dumpToClassFile(internalClassName, classfileBuffer, basenameSuffix = ".original", pathPrefix = pathPrefix) + } + } + } + + private fun dumpToClassFile(internalClassName: String, bytecode: ByteArray, basenameSuffix: String = "", pathPrefix: String = "") { + val relativePath = "$pathPrefix$internalClassName$basenameSuffix.class" + val absolutePath = dumpClassesDir!!.resolve(relativePath) + val dumpFile = absolutePath.toFile() + dumpFile.parentFile.mkdirs() + dumpFile.writeBytes(bytecode) + } + + @kotlin.time.ExperimentalTime + override fun transform( + module: Module?, + loader: ClassLoader?, + internalClassName: String, + classBeingRedefined: Class<*>?, + protectionDomain: ProtectionDomain?, + classfileBuffer: ByteArray, + ): ByteArray? { + try { + if (module != null && !module.canRead(RuntimeInstrumentor::class.java.module)) { + // Make all other modules read our (unnamed) module, which allows them to access the classes needed by the + // instrumentations, e.g. CoverageMap. If a module can't be modified, it should not be instrumented as the + // injected bytecode might throw NoClassDefFoundError. + // https://mail.openjdk.java.net/pipermail/jigsaw-dev/2021-May/014663.html + if (!instrumentation.isModifiableModule(module)) { + val prettyClassName = internalClassName.replace('/', '.') + Log.warn("Failed to instrument $prettyClassName in unmodifiable module ${module.name}, skipping") + return null + } + instrumentation.redefineModule( + module, + setOf(RuntimeInstrumentor::class.java.module), // extraReads + emptyMap(), + emptyMap(), + emptySet(), + emptyMap(), + ) + } + } catch (t: Throwable) { + // Throwables raised from transform are silently dropped, making it extremely hard to detect instrumentation + // failures. The docs advise to use a top-level try-catch. + // https://docs.oracle.com/javase/9/docs/api/java/lang/instrument/ClassFileTransformer.html + if (dumpClassesDir != null) { + dumpToClassFile(internalClassName, classfileBuffer, basenameSuffix = ".failed") + } + Log.warn("Failed to instrument $internalClassName:", t) + throw t + } + return transform(loader, internalClassName, classBeingRedefined, protectionDomain, classfileBuffer) + } + + @kotlin.time.ExperimentalTime + fun transformInternal(internalClassName: String, maybeClassfileBuffer: ByteArray?): ByteArray? { + val (fullInstrumentation, printInfo) = when { + classesToFullyInstrument.includes(internalClassName) -> Pair(true, true) + classesToHookInstrument.includes(internalClassName) -> Pair(false, true) + // The classes to hook specified by hooks are more of an implementation detail of the hook. The list is + // always the same unless the set of hooks changes and doesn't help the user judge whether their classes are + // being instrumented, so we don't print info for them. + additionalClassesToHookInstrument.includes(internalClassName) -> Pair(false, false) + else -> return null + } + val className = internalClassName.replace('/', '.') + val classfileBuffer = maybeClassfileBuffer ?: ClassGraph() + .enableSystemJarsAndModules() + .ignoreClassVisibility() + .acceptClasses(className) + .scan() + .use { + it.getClassInfo(className)?.resource?.load() ?: run { + Log.warn("Failed to load bytecode of class $className") + return null + } + } + val (instrumentedBytecode, duration) = measureTimedValue { + try { + instrument(internalClassName, classfileBuffer, fullInstrumentation) + } catch (e: CoverageIdException) { + Log.error("Coverage IDs are out of sync") + e.printStackTrace() + exitProcess(1) + } + } + val durationInMs = duration.inWholeMilliseconds + val sizeIncrease = ((100.0 * (instrumentedBytecode.size - classfileBuffer.size)) / classfileBuffer.size).roundToInt() + if (printInfo) { + if (fullInstrumentation) { + Log.info("Instrumented $className (took $durationInMs ms, size +$sizeIncrease%)") + } else { + Log.info("Instrumented $className with custom hooks only (took $durationInMs ms, size +$sizeIncrease%)") + } + } + return instrumentedBytecode + } + + private fun instrument(internalClassName: String, bytecode: ByteArray, fullInstrumentation: Boolean): ByteArray { + val classWithHooksEnabledField = if (Opt.conditionalHooks) { + // Let the hook instrumentation emit additional logic that checks the value of the + // hooksEnabled field on this class and skips the hook if it is false. + "com/code_intelligence/jazzer/runtime/JazzerInternal" + } else { + null + } + return ClassInstrumentor(internalClassName, bytecode).run { + if (fullInstrumentation) { + // Coverage instrumentation must be performed before any other code updates + // or there will be additional coverage points injected if any calls are inserted + // and JaCoCo will produce a broken coverage report. + coverageIdSynchronizer.withIdForClass(internalClassName) { firstId -> + coverage(firstId).also { actualNumEdgeIds -> + CoverageRecorder.recordInstrumentedClass( + internalClassName, + bytecode, + firstId, + actualNumEdgeIds, + ) + } + } + // Hook instrumentation must be performed after data flow tracing as the injected + // bytecode would trigger the GEP callbacks for byte[]. + traceDataFlow(instrumentationTypes) + hooks(includedHooks + customHooks, classWithHooksEnabledField) + } else { + hooks(customHooks, classWithHooksEnabledField) + } + instrumentedBytecode + } + } +} |