diff options
author | Suzanne Candanedo <suzanne.candanedo@arm.com> | 2023-03-20 20:31:29 +0000 |
---|---|---|
committer | Guus Sliepen <gsliepen@google.com> | 2023-06-06 22:45:36 +0000 |
commit | 83b03c4f316ecc92f4b64f23c024d1f2eef8e523 (patch) | |
tree | 6974de665b4e2693c8a1d827204c2c68d7f4bfd5 /mali_kbase | |
parent | 200a509f48df46d43acdb683966bd367ddc8cbe1 (diff) | |
download | gpu-83b03c4f316ecc92f4b64f23c024d1f2eef8e523.tar.gz |
GPUCORE-36682 Lock MMU while disabling AS to prevent use after freeandroid-13.0.0_r0.117android-13.0.0_r0.116android-13.0.0_r0.115android-13.0.0_r0.114android-13.0.0_r0.113android-13.0.0_r0.112android-gs-felix-5.10-android13-qpr3
During an invalid GPU page fault, kbase will try to flush the GPU cache
and disable the faulting address space (AS). There is a small window
between flushing of the GPU L2 cache (MMU resumes) and when the AS is
disabled where existing jobs on the GPU may access memory for that AS,
dirtying the GPU cache.
This is a problem as the kctx->as_nr is marked as KBASEP_AS_NR_INVALID
and thus no cache maintenance will be performed on the AS of the faulty
context when cleaning up the csg_slot and releasing the context.
This patch addresses that issue by:
1. locking the AS via a GPU command
2. flushing the cache
3. disabling the AS
4. unlocking the AS
This ensures that any jobs remaining on the GPU will not be able to
access the memory due to the locked AS. Once the AS is unlocked, any
memory access will fail as the AS is now disabled.
The issue only happens on CSF GPUs. To avoid any issues, the code path
for non-CSF GPUs is left undisturbed.
(cherry picked from commit 566789dffda3dfec00ecf00f9819e7a515fb2c61)
Provenance: https://code.ipdelivery.arm.com/c/GPU/mali-ddk/+/5071
Bug: 274014055
Change-Id: I2028182878b4f88505cc135a5f53ae4c7e734650
Diffstat (limited to 'mali_kbase')
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu.c | 87 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu_hw.h | 15 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu_hw_direct.c | 8 |
3 files changed, 109 insertions, 1 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 4828cdc..34a97ae 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -2066,6 +2066,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); +#if !MALI_USE_CSF /** * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches * without retaining the kbase context. @@ -2119,6 +2120,7 @@ static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_ kbase_reset_gpu_locked(kbdev); } } +#endif void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, @@ -2140,6 +2142,88 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) kbdev->mmu_mode->disable_as(kbdev, as_nr); } +#if MALI_USE_CSF +void kbase_mmu_disable(struct kbase_context *kctx) +{ + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_mmu_hw_op_param op_param = { 0 }; + int lock_err, flush_err; + + /* ASSERT that the context has a valid as_nr, which is only the case + * when it's scheduled in. + * + * as_nr won't change because the caller has the hwaccess_lock + */ + KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID); + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + + op_param.vpfn = 0; + op_param.nr = ~0; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + +#if MALI_USE_CSF + /* 0xF value used to prevent skipping of any levels when flushing */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); +#endif + + /* lock MMU to prevent existing jobs on GPU from executing while the AS is + * not yet disabled + */ + lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, + kctx->id); + + /* Issue the flush command only when L2 cache is in stable power on state. + * Any other state for L2 cache implies that shader cores are powered off, + * which in turn implies there is no execution happening on the GPU. + */ + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + if (flush_err) + dev_err(kbdev->dev, + "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", + kctx->as_nr, kctx->tgid, kctx->id); + } + kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); + + if (!lock_err) { + /* unlock the MMU to allow it to resume */ + lock_err = + kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, + kctx->tgid, kctx->id); + } + +#if !MALI_USE_CSF + /* + * JM GPUs has some L1 read only caches that need to be invalidated + * with START_FLUSH configuration. Purge the MMU disabled kctx from + * the slot_rb tracking field so such invalidation is performed when + * a new katom is executed on the affected slots. + */ + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); +#endif + + /* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only + * reset the GPU if locking or unlocking fails. + */ + if (lock_err) + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) + kbase_reset_gpu_locked(kbdev); +} +#else void kbase_mmu_disable(struct kbase_context *kctx) { /* ASSERT that the context has a valid as_nr, which is only the case @@ -2172,6 +2256,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); #endif } +#endif KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index 438dd5e..3291143 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -133,6 +133,21 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param); /** + * kbase_mmu_hw_do_lock - Issue a LOCK operation to the MMU. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to struct containing information about the MMU + * operation to perform. + * + * Context: Acquires the hwaccess_lock, expects the caller to hold the mmu_hw_mutex + * + * Return: Zero if the operation was successful, non-zero otherwise. + */ +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param); + +/** * kbase_mmu_hw_do_flush - Issue a flush operation to the MMU. * * @kbdev: Kbase device to issue the MMU operation on. diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 1a6157a..122e9ef 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -410,6 +410,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, + const struct kbase_mmu_hw_op_param *op_param) +{ + lockdep_assert_held(&kbdev->hwaccess_lock); + + return mmu_hw_do_lock(kbdev, as, op_param); +} + int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { |