From 679dfadbeee013cc0425cdba8ab3364501d96226 Mon Sep 17 00:00:00 2001 From: Kevin DuBois Date: Fri, 21 Apr 2023 19:10:01 +0000 Subject: Revert "GPUCORE-36682 Lock MMU while disabling AS to prevent use after free" This reverts commit d4a9cc691fdde6aae0f5d40ad3d949ab76518e42. Bug: 274827412 Reason for revert: stability Change-Id: Id952d2656a642b0f363d579a51843a03e7750c2c --- mali_kbase/mmu/mali_kbase_mmu.c | 122 ++++++++++++++++-------------- mali_kbase/mmu/mali_kbase_mmu_hw.h | 15 ---- mali_kbase/mmu/mali_kbase_mmu_hw_direct.c | 4 +- 3 files changed, 66 insertions(+), 75 deletions(-) (limited to 'mali_kbase') diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index b5ec66a..1b874a0 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2067,6 +2067,60 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); +/** + * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches + * without retaining the kbase context. + * @kctx: The KBase context. + * @vpfn: The virtual page frame number to start the flush on. + * @nr: The number of pages to flush. + * + * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any + * other locking. + */ +static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) +{ + struct kbase_device *kbdev = kctx->kbdev; + int err; + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_mmu_hw_op_param op_param; + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + + /* Early out if there is nothing to do */ + if (nr == 0) + return; + + /* flush L2 and unlock the VA (resumes the MMU) */ + op_param.vpfn = vpfn; + op_param.nr = nr; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { + /* Value used to prevent skipping of any levels when flushing */ + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); + err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } else { + err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], + &op_param); + } + + if (err) { + /* Flush failed to complete, assume the + * GPU has hung and perform a reset to recover + */ + dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); + } +} + void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, int as_nr) @@ -2089,14 +2143,6 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) void kbase_mmu_disable(struct kbase_context *kctx) { - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_mmu_hw_op_param op_param = { 0 }; - int lock_err, flush_err; - /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * @@ -2107,49 +2153,16 @@ void kbase_mmu_disable(struct kbase_context *kctx) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); - op_param.vpfn = 0; - op_param.nr = ~0; - op_param.op = KBASE_MMU_OP_FLUSH_MEM; - op_param.kctx_id = kctx->id; - op_param.mmu_sync_info = mmu_sync_info; - -#if MALI_USE_CSF - /* 0xF value used to prevent skipping of any levels when flushing */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) - op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); -#endif - - /* lock MMU to prevent existing jobs on GPU from executing while the AS is - * not yet disabled - */ - lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); - if (lock_err) - dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, - kctx->id); - - /* Issue the flush command only when L2 cache is in stable power on state. - * Any other state for L2 cache implies that shader cores are powered off, - * which in turn implies there is no execution happening on the GPU. + /* + * The address space is being disabled, drain all knowledge of it out + * from the caches as pages and page tables might be freed after this. + * + * The job scheduler code will already be holding the locks and context + * so just do the flush. */ - if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { - flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, - GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - if (flush_err) - dev_err(kbdev->dev, - "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", - kctx->as_nr, kctx->tgid, kctx->id); - } - kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); - - if (!lock_err) { - /* unlock the MMU to allow it to resume */ - lock_err = - kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); - if (lock_err) - dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, - kctx->tgid, kctx->id); - } + kbase_mmu_flush_noretain(kctx, 0, ~0); + kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); #if !MALI_USE_CSF /* * JM GPUs has some L1 read only caches that need to be invalidated @@ -2157,15 +2170,8 @@ void kbase_mmu_disable(struct kbase_context *kctx) * the slot_rb tracking field so such invalidation is performed when * a new katom is executed on the affected slots. */ - kbase_backend_slot_kctx_purge_locked(kbdev, kctx); + kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); #endif - - /* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only - * reset the GPU if locking or unlocking fails. - */ - if (lock_err) - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) - kbase_reset_gpu_locked(kbdev); } KBASE_EXPORT_TEST_API(kbase_mmu_disable); diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index 3291143..438dd5e 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -132,21 +132,6 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * */ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); -/** - * kbase_mmu_hw_do_lock - Issue a LOCK operation to the MMU. - * - * @kbdev: Kbase device to issue the MMU operation on. - * @as: Address space to issue the MMU operation on. - * @op_param: Pointer to struct containing information about the MMU - * operation to perform. - * - * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex - * - * Return: Zero if the operation was successful, non-zero otherwise. - */ -int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, - const struct kbase_mmu_hw_op_param *op_param); - /** * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. * diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 9b41894..1a6157a 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -387,7 +387,7 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a return ret; } -int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, +static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { int ret; @@ -550,7 +550,7 @@ int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_a gpu_cmd = GPU_COMMAND_CACHE_CLN_INV_L2; /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */ - ret = kbase_mmu_hw_do_lock(kbdev, as, op_param); + ret = mmu_hw_do_lock(kbdev, as, op_param); if (ret) return ret; -- cgit v1.2.3