GPUCORE-36682 Lock MMU while disabling AS to prevent use after freeandroid-13.0.0_r0.117 android-13.0.0_r0.116 android-13.0.0_r0.115 android-13.0.0_r0.114 android-13.0.0_r0.113 android-13.0.0_r0.112 android-gs-felix-5.10-android13-qpr3

During an invalid GPU page fault, kbase will try to flush the GPU cache and disable the faulting address space (AS). There is a small window between flushing of the GPU L2 cache (MMU resumes) and when the AS is disabled where existing jobs on the GPU may access memory for that AS, dirtying the GPU cache. This is a problem as the kctx->as_nr is marked as KBASEP_AS_NR_INVALID and thus no cache maintenance will be performed on the AS of the faulty context when cleaning up the csg_slot and releasing the context. This patch addresses that issue by: 1. locking the AS via a GPU command 2. flushing the cache 3. disabling the AS 4. unlocking the AS This ensures that any jobs remaining on the GPU will not be able to access the memory due to the locked AS. Once the AS is unlocked, any memory access will fail as the AS is now disabled. The issue only happens on CSF GPUs. To avoid any issues, the code path for non-CSF GPUs is left undisturbed. (cherry picked from commit 566789dffda3dfec00ecf00f9819e7a515fb2c61) Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/5071 Bug: 274014055 Change-Id: I2028182878b4f88505cc135a5f53ae4c7e734650
author: Suzanne Candanedo <suzanne.candanedo@arm.com> 2023-03-20 20:31:29 +0000
committer: Guus Sliepen <gsliepen@google.com> 2023-06-06 22:45:36 +0000
commit: 83b03c4f316ecc92f4b64f23c024d1f2eef8e523 (patch)
tree: 6974de665b4e2693c8a1d827204c2c68d7f4bfd5 /mali_kbase
parent: 200a509f48df46d43acdb683966bd367ddc8cbe1 (diff)
download: gpu-83b03c4f316ecc92f4b64f23c024d1f2eef8e523.tar.gz
3 files changed, 109 insertions, 1 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 4828cdc..34a97ae 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -2066,6 +2066,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 
+#if !MALI_USE_CSF
 /**
  * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
  * without retaining the kbase context.
@@ -2119,6 +2120,7 @@ static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_
 			kbase_reset_gpu_locked(kbdev);
 	}
 }
+#endif
 
 void kbase_mmu_update(struct kbase_device *kbdev,
 		struct kbase_mmu_table *mmut,
@@ -2140,6 +2142,88 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 	kbdev->mmu_mode->disable_as(kbdev, as_nr);
 }
 
+#if MALI_USE_CSF
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_mmu_hw_op_param op_param = { 0 };
+	int lock_err, flush_err;
+
+	/* ASSERT that the context has a valid as_nr, which is only the case
+	 * when it's scheduled in.
+	 *
+	 * as_nr won't change because the caller has the hwaccess_lock
+	 */
+	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+
+	op_param.vpfn = 0;
+	op_param.nr = ~0;
+	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+	op_param.kctx_id = kctx->id;
+	op_param.mmu_sync_info = mmu_sync_info;
+
+#if MALI_USE_CSF
+	/* 0xF value used to prevent skipping of any levels when flushing */
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+#endif
+
+	/* lock MMU to prevent existing jobs on GPU from executing while the AS is
+	 * not yet disabled
+	 */
+	lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+	if (lock_err)
+		dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
+			kctx->id);
+
+	/* Issue the flush command only when L2 cache is in stable power on state.
+	 * Any other state for L2 cache implies that shader cores are powered off,
+	 * which in turn implies there is no execution happening on the GPU.
+	 */
+	if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
+		flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+								GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+		if (flush_err)
+			dev_err(kbdev->dev,
+				"Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
+				kctx->as_nr, kctx->tgid, kctx->id);
+	}
+	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
+
+	if (!lock_err) {
+		/* unlock the MMU to allow it to resume */
+		lock_err =
+			kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+		if (lock_err)
+			dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
+				kctx->tgid, kctx->id);
+	}
+
+#if !MALI_USE_CSF
+	/*
+	 * JM GPUs has some L1 read only caches that need to be invalidated
+	 * with START_FLUSH configuration. Purge the MMU disabled kctx from
+	 * the slot_rb tracking field so such invalidation is performed when
+	 * a new katom is executed on the affected slots.
+	 */
+	kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+#endif
+
+	/* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only
+	 * reset the GPU if locking or unlocking fails.
+	 */
+	if (lock_err)
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu_locked(kbdev);
+}
+#else
 void kbase_mmu_disable(struct kbase_context *kctx)
 {
 	/* ASSERT that the context has a valid as_nr, which is only the case
@@ -2172,6 +2256,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
 #endif
 }
+#endif
 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
 
 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 438dd5e..3291143 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -133,6 +133,21 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
 int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
 			   const struct kbase_mmu_hw_op_param *op_param);
 /**
+ * kbase_mmu_hw_do_lock - Issue a LOCK operation to the MMU.
+ *
+ * @kbdev:      Kbase device to issue the MMU operation on.
+ * @as:         Address space to issue the MMU operation on.
+ * @op_param:   Pointer to struct containing information about the MMU
+ *              operation to perform.
+ *
+ * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex
+ *
+ * Return: Zero if the operation was successful, non-zero otherwise.
+ */
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			  const struct kbase_mmu_hw_op_param *op_param);
+
+/**
  * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU.
  *
  * @kbdev:      Kbase device to issue the MMU operation on.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 1a6157a..122e9ef 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -410,6 +410,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 	return ret;
 }
 
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			  const struct kbase_mmu_hw_op_param *op_param)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return mmu_hw_do_lock(kbdev, as, op_param);
+}
+
 int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
 				   const struct kbase_mmu_hw_op_param *op_param)
 {
author	Suzanne Candanedo <suzanne.candanedo@arm.com>	2023-03-20 20:31:29 +0000
committer	Guus Sliepen <gsliepen@google.com>	2023-06-06 22:45:36 +0000
commit	83b03c4f316ecc92f4b64f23c024d1f2eef8e523 (patch)
tree	6974de665b4e2693c8a1d827204c2c68d7f4bfd5 /mali_kbase
parent	200a509f48df46d43acdb683966bd367ddc8cbe1 (diff)
download	gpu-83b03c4f316ecc92f4b64f23c024d1f2eef8e523.tar.gz