Revert "GPUCORE-36682 Lock MMU while disabling AS to prevent use after free"

This reverts commit d4a9cc691fdde6aae0f5d40ad3d949ab76518e42. Bug: 274827412 Reason for revert: stability Change-Id: I929c4e7b11bd5b62a0c14a5b960b32127b26233a
author: Kevin DuBois <kevindubois@google.com> 2023-04-20 23:24:54 +0000
committer: Kevin DuBois <kevindubois@google.com> 2023-04-21 17:32:16 +0000
commit: f9b29365a917cd77293a1b82dbe9ea170f6c5e1b (patch)
tree: dfc302831b2140adff39d78701d6e8bd2a7b661e
parent: d4a9cc691fdde6aae0f5d40ad3d949ab76518e42 (diff)
download: gpu-f9b29365a917cd77293a1b82dbe9ea170f6c5e1b.tar.gz
3 files changed, 66 insertions, 75 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 88b0197..4828cdc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -2066,6 +2066,60 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 
+/**
+ * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
+ * without retaining the kbase context.
+ * @kctx: The KBase context.
+ * @vpfn: The virtual page frame number to start the flush on.
+ * @nr: The number of pages to flush.
+ *
+ * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
+ * other locking.
+ */
+static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	int err;
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	struct kbase_mmu_hw_op_param op_param;
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return;
+
+	/* flush L2 and unlock the VA (resumes the MMU) */
+	op_param.vpfn = vpfn;
+	op_param.nr = nr;
+	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+	op_param.kctx_id = kctx->id;
+	op_param.mmu_sync_info = mmu_sync_info;
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+		/* Value used to prevent skipping of any levels when flushing */
+		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+		err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr],
+							&op_param);
+	} else {
+		err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr],
+						   &op_param);
+	}
+
+	if (err) {
+		/* Flush failed to complete, assume the
+		 * GPU has hung and perform a reset to recover
+		 */
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+			kbase_reset_gpu_locked(kbdev);
+	}
+}
+
 void kbase_mmu_update(struct kbase_device *kbdev,
 		struct kbase_mmu_table *mmut,
 		int as_nr)
@@ -2088,14 +2142,6 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 
 void kbase_mmu_disable(struct kbase_context *kctx)
 {
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_mmu_hw_op_param op_param = { 0 };
-	int lock_err, flush_err;
-
 	/* ASSERT that the context has a valid as_nr, which is only the case
 	 * when it's scheduled in.
 	 *
@@ -2106,49 +2152,16 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
 
-	op_param.vpfn = 0;
-	op_param.nr = ~0;
-	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
-	op_param.kctx_id = kctx->id;
-	op_param.mmu_sync_info = mmu_sync_info;
-
-#if MALI_USE_CSF
-	/* 0xF value used to prevent skipping of any levels when flushing */
-	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
-		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
-#endif
-
-	/* lock MMU to prevent existing jobs on GPU from executing while the AS is
-	 * not yet disabled
-	 */
-	lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
-	if (lock_err)
-		dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
-			kctx->id);
-
-	/* Issue the flush command only when L2 cache is in stable power on state.
-	 * Any other state for L2 cache implies that shader cores are powered off,
-	 * which in turn implies there is no execution happening on the GPU.
+	/*
+	 * The address space is being disabled, drain all knowledge of it out
+	 * from the caches as pages and page tables might be freed after this.
+	 *
+	 * The job scheduler code will already be holding the locks and context
+	 * so just do the flush.
 	 */
-	if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
-		flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
-								GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
-		if (flush_err)
-			dev_err(kbdev->dev,
-				"Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
-				kctx->as_nr, kctx->tgid, kctx->id);
-	}
-	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
-
-	if (!lock_err) {
-		/* unlock the MMU to allow it to resume */
-		lock_err =
-			kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
-		if (lock_err)
-			dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
-				kctx->tgid, kctx->id);
-	}
+	kbase_mmu_flush_noretain(kctx, 0, ~0);
 
+	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
 #if !MALI_USE_CSF
 	/*
 	 * JM GPUs has some L1 read only caches that need to be invalidated
@@ -2156,15 +2169,8 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	 * the slot_rb tracking field so such invalidation is performed when
 	 * a new katom is executed on the affected slots.
 	 */
-	kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+	kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
 #endif
-
-	/* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only
-	 * reset the GPU if locking or unlocking fails.
-	 */
-	if (lock_err)
-		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
-			kbase_reset_gpu_locked(kbdev);
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 3291143..438dd5e 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -133,21 +133,6 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
 int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
 			   const struct kbase_mmu_hw_op_param *op_param);
 /**
- * kbase_mmu_hw_do_lock - Issue a LOCK operation to the MMU.
- *
- * @kbdev:      Kbase device to issue the MMU operation on.
- * @as:         Address space to issue the MMU operation on.
- * @op_param:   Pointer to struct containing information about the MMU
- *              operation to perform.
- *
- * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex
- *
- * Return: Zero if the operation was successful, non-zero otherwise.
- */
-int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
-			  const struct kbase_mmu_hw_op_param *op_param);
-
-/**
  * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU.
  *
  * @kbdev:      Kbase device to issue the MMU operation on.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 9b41894..1a6157a 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -387,7 +387,7 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a
 	return ret;
 }
 
-int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 			  const struct kbase_mmu_hw_op_param *op_param)
 {
 	int ret;
@@ -550,7 +550,7 @@ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_a
 		gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2;
 
 	/* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
-	ret = kbase_mmu_hw_do_lock(kbdev, as, op_param);
+	ret = mmu_hw_do_lock(kbdev, as, op_param);
 	if (ret)
 		return ret;
author	Kevin DuBois <kevindubois@google.com>	2023-04-20 23:24:54 +0000
committer	Kevin DuBois <kevindubois@google.com>	2023-04-21 17:32:16 +0000
commit	f9b29365a917cd77293a1b82dbe9ea170f6c5e1b (patch)
tree	dfc302831b2140adff39d78701d6e8bd2a7b661e
parent	d4a9cc691fdde6aae0f5d40ad3d949ab76518e42 (diff)
download	gpu-f9b29365a917cd77293a1b82dbe9ea170f6c5e1b.tar.gz