summaryrefslogtreecommitdiff
path: root/mali_kbase/mmu/mali_kbase_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/mmu/mali_kbase_mmu.c')
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c3130
1 files changed, 2244 insertions, 886 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 26ddd95..d6b4eb7 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
+#include <linux/migrate.h>
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_fault.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -45,10 +46,35 @@
#if !MALI_USE_CSF
#include <mali_kbase_hwaccess_jm.h>
#endif
+#include <linux/version_compat_defs.h>
#include <mali_kbase_trace_gpu_mem.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
+/* Threshold used to decide whether to flush full caches or just a physical range */
+#define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20
+#define MGM_DEFAULT_PTE_GROUP (0)
+
+/* Macro to convert updated PDGs to flags indicating levels skip in flush */
+#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
+
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration);
+
+/* Small wrapper function to factor out GPU-dependent context releasing */
+static void release_ctx(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+#if MALI_USE_CSF
+ CSTD_UNUSED(kbdev);
+ kbase_ctx_sched_release_ctx_lock(kctx);
+#else /* MALI_USE_CSF */
+ kbasep_js_runpool_release_ctx(kbdev, kctx);
+#endif /* MALI_USE_CSF */
+}
+
static void mmu_hw_operation_begin(struct kbase_device *kbdev)
{
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
@@ -91,7 +117,8 @@ static void mmu_hw_operation_end(struct kbase_device *kbdev)
/**
* mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
- * through GPU_CONTROL interface
+ * through GPU_CONTROL interface.
+ *
* @kbdev: kbase device to check GPU model ID on.
*
* This function returns whether a cache flush for page table update should
@@ -109,119 +136,213 @@ static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
}
/**
- * mmu_flush_invalidate_on_gpu_ctrl() - Flush and invalidate the GPU caches
- * through GPU_CONTROL interface.
- * @kbdev: kbase device to issue the MMU operation on.
- * @as: address space to issue the MMU operation on.
- * @op_param: parameters for the operation.
+ * mmu_flush_pa_range() - Flush physical address range
*
- * This wrapper function alternates AS_COMMAND_FLUSH_PT and AS_COMMAND_FLUSH_MEM
- * to equivalent GPU_CONTROL command FLUSH_CACHES.
- * The function first issue LOCK to MMU-AS with kbase_mmu_hw_do_operation().
- * And issues cache-flush with kbase_gpu_cache_flush_and_busy_wait() function
- * then issue UNLOCK to MMU-AS with kbase_mmu_hw_do_operation().
+ * @kbdev: kbase device to issue the MMU operation on.
+ * @phys: Starting address of the physical range to start the operation on.
+ * @nr_bytes: Number of bytes to work on.
+ * @op: Type of cache flush operation to perform.
*
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Issue a cache flush physical range command.
*/
-static int
-mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev,
- struct kbase_as *as,
- struct kbase_mmu_hw_op_param *op_param)
+#if MALI_USE_CSF
+static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes,
+ enum kbase_mmu_op_type op)
{
u32 flush_op;
- int ret, ret2;
-
- if (WARN_ON(kbdev == NULL) ||
- WARN_ON(as == NULL) ||
- WARN_ON(op_param == NULL))
- return -EINVAL;
lockdep_assert_held(&kbdev->hwaccess_lock);
- lockdep_assert_held(&kbdev->mmu_hw_mutex);
/* Translate operation to command */
- if (op_param->op == KBASE_MMU_OP_FLUSH_PT) {
- flush_op = GPU_COMMAND_CACHE_CLN_INV_L2;
- } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
- flush_op = GPU_COMMAND_CACHE_CLN_INV_L2_LSC;
- } else {
- dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n",
- op_param->op);
- return -EINVAL;
+ if (op == KBASE_MMU_OP_FLUSH_PT)
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
+ else if (op == KBASE_MMU_OP_FLUSH_MEM)
+ flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
+ else {
+ dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
+ return;
}
- /* 1. Issue MMU_AS_CONTROL.COMMAND.LOCK operation. */
- op_param->op = KBASE_MMU_OP_LOCK;
- ret = kbase_mmu_hw_do_operation(kbdev, as, op_param);
- if (ret)
- return ret;
+ if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
+ dev_err(kbdev->dev, "Flush for physical address range did not complete");
+}
+#endif
- /* 2. Issue GPU_CONTROL.COMMAND.FLUSH_CACHES operation */
- ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, flush_op);
+/**
+ * mmu_invalidate() - Perform an invalidate operation on MMU caches.
+ * @kbdev: The Kbase device.
+ * @kctx: The Kbase context.
+ * @as_nr: GPU address space number for which invalidate is required.
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
+ * operation to perform.
+ *
+ * Perform an MMU invalidate operation on a particual address space
+ * by issuing a UNLOCK command.
+ */
+static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
+ const struct kbase_mmu_hw_op_param *op_param)
+{
+ unsigned long flags;
- /* 3. Issue MMU_AS_CONTROL.COMMAND.UNLOCK operation. */
- op_param->op = KBASE_MMU_OP_UNLOCK;
- ret2 = kbase_mmu_hw_do_operation(kbdev, as, op_param);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- return ret ?: ret2;
+ if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
+ as_nr = kctx ? kctx->as_nr : as_nr;
+ if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev,
+ "Invalidate after GPU page table update did not complete");
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+
+/* Perform a flush/invalidate on a particular address space
+ */
+static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
+ const struct kbase_mmu_hw_op_param *op_param)
+{
+ unsigned long flags;
+
+ /* AS transaction begin */
+ mutex_lock(&kbdev->mmu_hw_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (kbdev->pm.backend.gpu_ready && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ /* AS transaction end */
}
/**
- * kbase_mmu_flush_invalidate() - Flush and invalidate the GPU caches.
- * @kctx: The KBase context.
- * @vpfn: The virtual page frame number to start the flush on.
- * @nr: The number of pages to flush.
- * @sync: Set if the operation should be synchronous or not.
+ * mmu_flush_invalidate() - Perform a flush operation on GPU caches.
+ * @kbdev: The Kbase device.
+ * @kctx: The Kbase context.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
+ * operation to perform.
*
- * Issue a cache flush + invalidate to the GPU caches and invalidate the TLBs.
+ * This function performs the cache flush operation described by @op_param.
+ * The function retains a reference to the given @kctx and releases it
+ * after performing the flush operation.
*
- * If sync is not set then transactions still in flight when the flush is issued
- * may use the old page tables and the data they write will not be written out
- * to memory, this function returns after the flush has been issued but
- * before all accesses which might effect the flushed region have completed.
+ * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue
+ * a cache flush + invalidate to the L2 caches and invalidate the TLBs.
*
- * If sync is set then accesses in the flushed region will be drained
- * before data is flush and invalidated through L1, L2 and into memory,
- * after which point this function will return.
- * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
+ * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
+ * invalidating the TLBs.
*/
-static void
-kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr,
- bool sync,
- enum kbase_caller_mmu_sync_info mmu_sync_info);
+static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
+ const struct kbase_mmu_hw_op_param *op_param)
+{
+ bool ctx_is_in_runpool;
+
+ /* Early out if there is nothing to do */
+ if (op_param->nr == 0)
+ return;
+
+ /* If no context is provided then MMU operation is performed on address
+ * space which does not belong to user space context. Otherwise, retain
+ * refcount to context provided and release after flush operation.
+ */
+ if (!kctx) {
+ mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param);
+ } else {
+#if !MALI_USE_CSF
+ rt_mutex_lock(&kbdev->js_data.queue_mutex);
+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
+ rt_mutex_unlock(&kbdev->js_data.queue_mutex);
+#else
+ ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
+#endif /* !MALI_USE_CSF */
+
+ if (ctx_is_in_runpool) {
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param);
+
+ release_ctx(kbdev, kctx);
+ }
+ }
+}
/**
- * kbase_mmu_flush_invalidate_no_ctx() - Flush and invalidate the GPU caches.
- * @kbdev: Device pointer.
- * @vpfn: The virtual page frame number to start the flush on.
- * @nr: The number of pages to flush.
- * @sync: Set if the operation should be synchronous or not.
- * @as_nr: GPU address space number for which flush + invalidate is required.
- * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
+ * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via
+ * the GPU_CONTROL interface
+ * @kbdev: The Kbase device.
+ * @kctx: The Kbase context.
+ * @as_nr: GPU address space number for which flush + invalidate is required.
+ * @op_param: Non-NULL pointer to struct containing information about the MMU
+ * operation to perform.
*
- * This is used for MMU tables which do not belong to a user space context.
+ * Perform a flush/invalidate on a particular address space via the GPU_CONTROL
+ * interface.
*/
-static void kbase_mmu_flush_invalidate_no_ctx(
- struct kbase_device *kbdev, u64 vpfn, size_t nr, bool sync, int as_nr,
- enum kbase_caller_mmu_sync_info mmu_sync_info);
+static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
+ int as_nr, const struct kbase_mmu_hw_op_param *op_param)
+{
+ unsigned long flags;
+
+ /* AS transaction begin */
+ mutex_lock(&kbdev->mmu_hw_mutex);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) {
+ as_nr = kctx ? kctx->as_nr : as_nr;
+ if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
+ }
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+}
+
+static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+ kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
+}
+
+static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
+{
+ /* Ensure that the GPU can read the pages from memory.
+ *
+ * pixel: b/200555454 requires this sync to happen even if the system
+ * is coherent.
+ */
+ dma_sync_single_for_device(kbdev->dev, handle, size,
+ DMA_TO_DEVICE);
+}
/**
* kbase_mmu_sync_pgd() - sync page directory to memory when needed.
- * @kbdev: Device pointer.
- * @handle: Address of DMA region.
- * @size: Size of the region to sync.
+ * @kbdev: Device pointer.
+ * @kctx: Context pointer.
+ * @phys: Starting physical address of the destination region.
+ * @handle: Address of DMA region.
+ * @size: Size of the region to sync.
+ * @flush_op: MMU cache flush operation to perform on the physical address
+ * range, if GPU control is available.
+ *
+ * This function is called whenever the association between a virtual address
+ * range and a physical address range changes, because a mapping is created or
+ * destroyed.
+ * One of the effects of this operation is performing an MMU cache flush
+ * operation only on the physical address range affected by this function, if
+ * GPU control is available.
*
* This should be called after each page directory update.
*/
-static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
- dma_addr_t handle, size_t size)
+static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, dma_addr_t handle, size_t size,
+ enum kbase_mmu_op_type flush_op)
{
- /* In non-coherent system, ensure the GPU can read
- * the pages from memory
- */
- if (kbdev->system_coherency == COHERENCY_NONE)
- dma_sync_single_for_device(kbdev->dev, handle, size,
- DMA_TO_DEVICE);
+
+ kbase_mmu_sync_pgd_cpu(kbdev, handle, size);
+ kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op);
}
/*
@@ -233,35 +354,153 @@ static void kbase_mmu_sync_pgd(struct kbase_device *kbdev,
* a 4kB physical page.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int group_id);
-
/**
* kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
* free memory of the page directories
*
- * @kbdev: Device pointer.
- * @mmut: GPU MMU page table.
- * @pgds: Physical addresses of page directories to be freed.
- * @vpfn: The virtual page frame number.
- * @level: The level of MMU page table.
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgds: Physical addresses of page directories to be freed.
+ * @vpfn: The virtual page frame number.
+ * @level: The level of MMU page table.
+ * @flush_op: The type of MMU flush operation to perform.
+ * @dirty_pgds: Flags to track every level where a PGD has been updated.
*/
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- phys_addr_t *pgds, u64 vpfn,
- int level);
+ struct kbase_mmu_table *mmut, phys_addr_t *pgds,
+ u64 vpfn, int level,
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
+
+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+ atomic_sub(1, &kbdev->memdev.used_pages);
+
+ /* If MMU tables belong to a context then pages will have been accounted
+ * against it, so we must decrement the usage counts here.
+ */
+ if (mmut->kctx) {
+ kbase_process_page_usage_dec(mmut->kctx, 1);
+ atomic_sub(1, &mmut->kctx->used_pages);
+ }
+
+ kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+}
+
+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut,
+ struct page *p)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(p);
+ bool page_is_isolated = false;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ if (!kbase_is_page_migration_enabled())
+ return false;
+
+ spin_lock(&page_md->migrate_lock);
+ if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
+ WARN_ON_ONCE(!mmut->kctx);
+ if (IS_PAGE_ISOLATED(page_md->status)) {
+ page_md->status = PAGE_STATUS_SET(page_md->status,
+ FREE_PT_ISOLATED_IN_PROGRESS);
+ page_md->data.free_pt_isolated.kbdev = kbdev;
+ page_is_isolated = true;
+ } else {
+ page_md->status =
+ PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
+ }
+ } else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) ||
+ (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) {
+ /* Nothing to do - fall through */
+ } else {
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
+ }
+ spin_unlock(&page_md->migrate_lock);
+
+ if (unlikely(page_is_isolated)) {
+ /* Do the CPU cache flush and accounting here for the isolated
+ * PGD page, which is done inside kbase_mmu_free_pgd() for the
+ * PGD page that did not get isolated.
+ */
+ dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
+ }
+
+ return page_is_isolated;
+}
+
/**
* kbase_mmu_free_pgd() - Free memory of the page directory
*
* @kbdev: Device pointer.
* @mmut: GPU MMU page table.
* @pgd: Physical address of page directory to be freed.
- * @dirty: Flag to indicate whether the page may be dirty in the cache.
+ *
+ * This function is supposed to be called with mmu_lock held and after
+ * ensuring that the GPU won't be able to access the page.
*/
-static void kbase_mmu_free_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
- bool dirty);
+static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t pgd)
+{
+ struct page *p;
+ bool page_is_isolated = false;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ p = pfn_to_page(PFN_DOWN(pgd));
+ page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+
+ if (likely(!page_is_isolated)) {
+ kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
+ kbase_mmu_account_freed_pgd(kbdev, mmut);
+ }
+}
+
+/**
+ * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ *
+ * This function will call kbase_mmu_free_pgd() on each page directory page
+ * present in the list of free PGDs inside @mmut.
+ *
+ * The function is supposed to be called after the GPU cache and MMU TLB has
+ * been invalidated post the teardown loop.
+ *
+ * The mmu_lock shall be held prior to calling the function.
+ */
+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+ size_t i;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
+ kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
+
+ mmut->scratch_mem.free_pgds.head_index = 0;
+}
+
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
+{
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
+ return;
+
+ mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
+}
+
+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
+{
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ mmut->scratch_mem.free_pgds.head_index = 0;
+}
+
/**
* reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
* a region on a GPU page fault
@@ -289,7 +528,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
if (!multiple) {
dev_warn(
kbdev->dev,
- "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+ "VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
return minimum_extra;
}
@@ -345,13 +584,14 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
struct kbase_as *faulting_as,
u64 start_pfn, size_t nr,
- u32 kctx_id)
+ u32 kctx_id, u64 dirty_pgds)
{
/* Calls to this function are inherently synchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
struct kbase_mmu_hw_op_param op_param;
+ int ret = 0;
mutex_lock(&kbdev->mmu_hw_mutex);
@@ -359,27 +599,31 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
KBASE_MMU_FAULT_TYPE_PAGE);
/* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = start_pfn,
- .nr = nr,
- .op = KBASE_MMU_OP_FLUSH_PT,
- .kctx_id = kctx_id,
- .mmu_sync_info = mmu_sync_info,
- };
+ op_param.vpfn = start_pfn;
+ op_param.nr = nr;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ op_param.kctx_id = kctx_id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
unsigned long irq_flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+ op_param.flush_skip_levels =
+ pgd_level_to_skip_flush(dirty_pgds);
+ ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
} else {
mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+ ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
mmu_hw_operation_end(kbdev);
}
mutex_unlock(&kbdev->mmu_hw_mutex);
+ if (ret)
+ dev_err(kbdev->dev,
+ "Flush for GPU page fault due to write access did not complete");
+
kbase_mmu_hw_enable_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
}
@@ -412,8 +656,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
struct tagged_addr *fault_phys_addr;
struct kbase_fault *fault;
u64 fault_pfn, pfn_offset;
- int ret;
int as_no;
+ u64 dirty_pgds = 0;
as_no = faulting_as->number;
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
@@ -472,12 +716,11 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
}
/* Now make this faulting page writable to GPU. */
- ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
- fault_phys_addr,
- 1, region->flags, region->gpu_alloc->group_id);
+ kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
+ region->flags, region->gpu_alloc->group_id, &dirty_pgds);
kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
- kctx->id);
+ kctx->id, dirty_pgds);
kbase_gpu_vm_unlock(kctx);
}
@@ -492,7 +735,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
break;
- case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+ case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Execute Permission fault", fault);
break;
@@ -508,31 +751,68 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
}
#endif
-#define MAX_POOL_LEVEL 2
+/**
+ * estimate_pool_space_required - Determine how much a pool should be grown by to support a future
+ * allocation
+ * @pool: The memory pool to check, including its linked pools
+ * @pages_required: Number of 4KiB pages require for the pool to support a future allocation
+ *
+ * The value returned is accounting for the size of @pool and the size of each memory pool linked to
+ * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to
+ * allocate from.
+ *
+ * Note: this is only an estimate, because even during the calculation the memory pool(s) involved
+ * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an
+ * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller
+ * should keep attempting an allocation and then re-growing with a new value queried form this
+ * function until the allocation succeeds.
+ *
+ * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an
+ * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the
+ * allocation.
+ */
+static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required)
+{
+ size_t pages_still_required;
+
+ for (pages_still_required = pages_required; pool != NULL && pages_still_required;
+ pool = pool->next_pool) {
+ size_t pool_size_4k;
+
+ kbase_mem_pool_lock(pool);
+
+ pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
+ if (pool_size_4k >= pages_still_required)
+ pages_still_required = 0;
+ else
+ pages_still_required -= pool_size_4k;
+
+ kbase_mem_pool_unlock(pool);
+ }
+ return pages_still_required;
+}
/**
* page_fault_try_alloc - Try to allocate memory from a context pool
* @kctx: Context pointer
* @region: Region to grow
- * @new_pages: Number of 4 kB pages to allocate
- * @pages_to_grow: Pointer to variable to store number of outstanding pages on
- * failure. This can be either 4 kB or 2 MB pages, depending on
- * the number of pages requested.
- * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true
- * for 2 MB, false for 4 kB.
+ * @new_pages: Number of 4 KiB pages to allocate
+ * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be
+ * either 4 KiB or 2 MiB pages, depending on the number of pages requested.
+ * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
+ * 4 KiB.
* @prealloc_sas: Pointer to kbase_sub_alloc structures
*
- * This function will try to allocate as many pages as possible from the context
- * pool, then if required will try to allocate the remaining pages from the
- * device pool.
+ * This function will try to allocate as many pages as possible from the context pool, then if
+ * required will try to allocate the remaining pages from the device pool.
*
- * This function will not allocate any new memory beyond that is already
- * present in the context or device pools. This is because it is intended to be
- * called with the vm_lock held, which could cause recursive locking if the
- * allocation caused the out-of-memory killer to run.
+ * This function will not allocate any new memory beyond that is already present in the context or
+ * device pools. This is because it is intended to be called whilst the thread has acquired the
+ * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is
+ * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close().
*
- * If 2 MB pages are enabled and new_pages is >= 2 MB then pages_to_grow will be
- * a count of 2 MB pages, otherwise it will be a count of 4 kB pages.
+ * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB
+ * pages, otherwise it will be a count of 4 KiB pages.
*
* Return: true if successful, false on failure
*/
@@ -541,13 +821,15 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
int *pages_to_grow, bool *grow_2mb_pool,
struct kbase_sub_alloc **prealloc_sas)
{
- struct tagged_addr *gpu_pages[MAX_POOL_LEVEL] = {NULL};
- struct tagged_addr *cpu_pages[MAX_POOL_LEVEL] = {NULL};
- size_t pages_alloced[MAX_POOL_LEVEL] = {0};
+ size_t total_gpu_pages_alloced = 0;
+ size_t total_cpu_pages_alloced = 0;
struct kbase_mem_pool *pool, *root_pool;
- int pool_level = 0;
bool alloc_failed = false;
size_t pages_still_required;
+ size_t total_mempools_free_4k = 0;
+
+ lockdep_assert_held(&kctx->reg_lock);
+ lockdep_assert_held(&kctx->mem_partials_lock);
if (WARN_ON(region->gpu_alloc->group_id >=
MEMORY_GROUP_MANAGER_NR_GROUPS)) {
@@ -556,42 +838,21 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return false;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (new_pages >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
*grow_2mb_pool = true;
} else {
-#endif
root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
*grow_2mb_pool = false;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (region->gpu_alloc != region->cpu_alloc)
new_pages *= 2;
- pages_still_required = new_pages;
-
/* Determine how many pages are in the pools before trying to allocate.
* Don't attempt to allocate & free if the allocation can't succeed.
*/
- for (pool = root_pool; pool != NULL; pool = pool->next_pool) {
- size_t pool_size_4k;
-
- kbase_mem_pool_lock(pool);
-
- pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
- if (pool_size_4k >= pages_still_required)
- pages_still_required = 0;
- else
- pages_still_required -= pool_size_4k;
-
- kbase_mem_pool_unlock(pool);
-
- if (!pages_still_required)
- break;
- }
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
if (pages_still_required) {
/* Insufficient pages in pools. Don't try to allocate - just
@@ -602,11 +863,11 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return false;
}
- /* Since we've dropped the pool locks, the amount of memory in the pools
- * may change between the above check and the actual allocation.
+ /* Since we're not holding any of the mempool locks, the amount of memory in the pools may
+ * change between the above estimate and the actual allocation.
*/
- pool = root_pool;
- for (pool_level = 0; pool_level < MAX_POOL_LEVEL; pool_level++) {
+ pages_still_required = new_pages;
+ for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) {
size_t pool_size_4k;
size_t pages_to_alloc_4k;
size_t pages_to_alloc_4k_per_alloc;
@@ -615,94 +876,92 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
/* Allocate as much as possible from this pool*/
pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
- pages_to_alloc_4k = MIN(new_pages, pool_size_4k);
+ total_mempools_free_4k += pool_size_4k;
+ pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k);
if (region->gpu_alloc == region->cpu_alloc)
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
else
pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
- pages_alloced[pool_level] = pages_to_alloc_4k;
if (pages_to_alloc_4k) {
- gpu_pages[pool_level] =
- kbase_alloc_phy_pages_helper_locked(
- region->gpu_alloc, pool,
- pages_to_alloc_4k_per_alloc,
- &prealloc_sas[0]);
+ struct tagged_addr *gpu_pages =
+ kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool,
+ pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[0]);
- if (!gpu_pages[pool_level]) {
+ if (!gpu_pages)
alloc_failed = true;
- } else if (region->gpu_alloc != region->cpu_alloc) {
- cpu_pages[pool_level] =
- kbase_alloc_phy_pages_helper_locked(
- region->cpu_alloc, pool,
- pages_to_alloc_4k_per_alloc,
- &prealloc_sas[1]);
-
- if (!cpu_pages[pool_level])
+ else
+ total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc;
+
+ if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) {
+ struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked(
+ region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc,
+ &prealloc_sas[1]);
+
+ if (!cpu_pages)
alloc_failed = true;
+ else
+ total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc;
}
}
kbase_mem_pool_unlock(pool);
if (alloc_failed) {
- WARN_ON(!new_pages);
- WARN_ON(pages_to_alloc_4k >= new_pages);
- WARN_ON(pages_to_alloc_4k_per_alloc >= new_pages);
+ WARN_ON(!pages_still_required);
+ WARN_ON(pages_to_alloc_4k >= pages_still_required);
+ WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required);
break;
}
- new_pages -= pages_to_alloc_4k;
-
- if (!new_pages)
- break;
-
- pool = pool->next_pool;
- if (!pool)
- break;
+ pages_still_required -= pages_to_alloc_4k;
}
- if (new_pages) {
- /* Allocation was unsuccessful */
- int max_pool_level = pool_level;
-
- pool = root_pool;
-
- /* Free memory allocated so far */
- for (pool_level = 0; pool_level <= max_pool_level;
- pool_level++) {
- kbase_mem_pool_lock(pool);
+ if (pages_still_required) {
+ /* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation,
+ * so must in any case use kbase_free_phy_pages_helper() rather than
+ * kbase_free_phy_pages_helper_locked()
+ */
+ if (total_gpu_pages_alloced > 0)
+ kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced);
+ if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0)
+ kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced);
- if (region->gpu_alloc != region->cpu_alloc) {
- if (pages_alloced[pool_level] &&
- cpu_pages[pool_level])
- kbase_free_phy_pages_helper_locked(
- region->cpu_alloc,
- pool, cpu_pages[pool_level],
- pages_alloced[pool_level]);
+ if (alloc_failed) {
+ /* Note that in allocating from the above memory pools, we always ensure
+ * never to request more than is available in each pool with the pool's
+ * lock held. Hence failing to allocate in such situations would be unusual
+ * and we should cancel the growth instead (as re-growing the memory pool
+ * might not fix the situation)
+ */
+ dev_warn(
+ kctx->kbdev->dev,
+ "Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
+ new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
+ total_mempools_free_4k);
+ *pages_to_grow = 0;
+ } else {
+ /* Tell the caller to try to grow the memory pool
+ *
+ * Freeing pages above may have spilled or returned them to the OS, so we
+ * have to take into account how many are still in the pool before giving a
+ * new estimate for growth required of the pool. We can just re-estimate a
+ * new value.
+ */
+ pages_still_required = estimate_pool_space_required(root_pool, new_pages);
+ if (pages_still_required) {
+ *pages_to_grow = pages_still_required;
+ } else {
+ /* It's possible another thread could've grown the pool to be just
+ * big enough after we rolled back the allocation. Request at least
+ * one more page to ensure the caller doesn't fail the growth by
+ * conflating it with the alloc_failed case above
+ */
+ *pages_to_grow = 1u;
}
-
- if (pages_alloced[pool_level] && gpu_pages[pool_level])
- kbase_free_phy_pages_helper_locked(
- region->gpu_alloc,
- pool, gpu_pages[pool_level],
- pages_alloced[pool_level]);
-
- kbase_mem_pool_unlock(pool);
-
- pool = pool->next_pool;
}
- /*
- * If the allocation failed despite there being enough memory in
- * the pool, then just fail. Otherwise, try to grow the memory
- * pool.
- */
- if (alloc_failed)
- *pages_to_grow = 0;
- else
- *pages_to_grow = new_pages;
-
return false;
}
@@ -712,18 +971,6 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
return true;
}
-/* Small wrapper function to factor out GPU-dependent context releasing */
-static void release_ctx(struct kbase_device *kbdev,
- struct kbase_context *kctx)
-{
-#if MALI_USE_CSF
- CSTD_UNUSED(kbdev);
- kbase_ctx_sched_release_ctx_lock(kctx);
-#else /* MALI_USE_CSF */
- kbasep_js_runpool_release_ctx(kbdev, kctx);
-#endif /* MALI_USE_CSF */
-}
-
void kbase_mmu_page_fault_worker(struct work_struct *data)
{
u64 fault_pfn;
@@ -758,9 +1005,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
as_no = faulting_as->number;
kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
- dev_dbg(kbdev->dev,
- "Entering %s %pK, fault_pfn %lld, as_no %d\n",
- __func__, (void *)data, fault_pfn, as_no);
+ dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
+ fault_pfn, as_no);
/* Grab the context that was already refcounted in kbase_mmu_interrupt()
* Therefore, it cannot be scheduled out of this AS until we explicitly
@@ -783,8 +1029,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/* check if we still have GPU */
if (unlikely(kbase_is_gpu_removed(kbdev))) {
- dev_dbg(kbdev->dev,
- "%s: GPU has been removed\n", __func__);
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
goto fault_done;
}
#endif
@@ -847,20 +1092,24 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
goto fault_done;
}
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs if necessary */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i]) {
- kbase_mmu_report_fault_and_kill(kctx, faulting_as,
- "Failed pre-allocating memory for sub-allocations' metadata",
- fault);
- goto fault_done;
+page_fault_retry:
+ if (kbdev->pagesize_2mb) {
+ /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ if (!prealloc_sas[i]) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+
+ if (!prealloc_sas[i]) {
+ kbase_mmu_report_fault_and_kill(
+ kctx, faulting_as,
+ "Failed pre-allocating memory for sub-allocations' metadata",
+ fault);
+ goto fault_done;
+ }
+ }
}
}
-#endif /* CONFIG_MALI_2MB_ALLOC */
-page_fault_retry:
/* so we have a translation fault,
* let's see if it is for growable memory
*/
@@ -938,16 +1187,29 @@ page_fault_retry:
* transaction (which should cause the other page fault to be
* raised again).
*/
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = 0,
- .nr = 0,
- .op = KBASE_MMU_OP_UNLOCK,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
- mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
- mmu_hw_operation_end(kbdev);
+ op_param.mmu_sync_info = mmu_sync_info;
+ op_param.kctx_id = kctx->id;
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_hw_operation_begin(kbdev);
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
+ &op_param);
+ mmu_hw_operation_end(kbdev);
+ } else {
+ /* Can safely skip the invalidate for all levels in case
+ * of duplicate page faults.
+ */
+ op_param.flush_skip_levels = 0xF;
+ op_param.vpfn = fault_pfn;
+ op_param.nr = 1;
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
+ }
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
+ fault->addr);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -962,8 +1224,7 @@ page_fault_retry:
/* cap to max vsize */
new_pages = min(new_pages, region->nr_pages - current_backed_size);
- dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
- new_pages);
+ dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
if (new_pages == 0) {
struct kbase_mmu_hw_op_param op_param;
@@ -975,16 +1236,29 @@ page_fault_retry:
KBASE_MMU_FAULT_TYPE_PAGE);
/* See comment [1] about UNLOCK usage */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = 0,
- .nr = 0,
- .op = KBASE_MMU_OP_UNLOCK,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
- mmu_hw_operation_begin(kbdev);
- kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
- mmu_hw_operation_end(kbdev);
+ op_param.mmu_sync_info = mmu_sync_info;
+ op_param.kctx_id = kctx->id;
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ mmu_hw_operation_begin(kbdev);
+ err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
+ &op_param);
+ mmu_hw_operation_end(kbdev);
+ } else {
+ /* Can safely skip the invalidate for all levels in case
+ * of duplicate page faults.
+ */
+ op_param.flush_skip_levels = 0xF;
+ op_param.vpfn = fault_pfn;
+ op_param.nr = 1;
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
+ }
+
+ if (err) {
+ dev_err(kbdev->dev,
+ "Invalidation for MMU did not complete on handling page fault @ 0x%llx",
+ fault->addr);
+ }
mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -1009,6 +1283,7 @@ page_fault_retry:
spin_unlock(&kctx->mem_partials_lock);
if (grown) {
+ u64 dirty_pgds = 0;
u64 pfn_offset;
struct kbase_mmu_hw_op_param op_param;
@@ -1026,10 +1301,11 @@ page_fault_retry:
* so the no_flush version of insert_pages is used which allows
* us to unlock the MMU as we see fit.
*/
- err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
- region->start_pfn + pfn_offset,
- &kbase_get_gpu_phy_pages(region)[pfn_offset],
- new_pages, region->flags, region->gpu_alloc->group_id);
+ err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+ &kbase_get_gpu_phy_pages(region)[pfn_offset],
+ new_pages, region->flags,
+ region->gpu_alloc->group_id, &dirty_pgds, region,
+ false);
if (err) {
kbase_free_phy_pages_helper(region->gpu_alloc,
new_pages);
@@ -1048,23 +1324,18 @@ page_fault_retry:
(u64)new_pages);
trace_mali_mmu_page_fault_grow(region, fault, new_pages);
-#if MALI_INCREMENTAL_RENDERING
+#if MALI_INCREMENTAL_RENDERING_JM
/* Switch to incremental rendering if we have nearly run out of
* memory in a JIT memory allocation.
*/
if (region->threshold_pages &&
kbase_reg_current_backed_size(region) >
region->threshold_pages) {
-
- dev_dbg(kctx->kbdev->dev,
- "%zu pages exceeded IR threshold %zu\n",
- new_pages + current_backed_size,
- region->threshold_pages);
+ dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
+ new_pages + current_backed_size, region->threshold_pages);
if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
- dev_dbg(kctx->kbdev->dev,
- "Get region %pK for IR\n",
- (void *)region);
+ dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
kbase_va_region_alloc_get(kctx, region);
}
}
@@ -1084,25 +1355,22 @@ page_fault_retry:
kbase_mmu_hw_clear_fault(kbdev, faulting_as,
KBASE_MMU_FAULT_TYPE_PAGE);
- /* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = fault->addr >> PAGE_SHIFT,
- .nr = new_pages,
- .op = KBASE_MMU_OP_FLUSH_PT,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
+ op_param.vpfn = region->start_pfn + pfn_offset;
+ op_param.nr = new_pages;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as,
- &op_param);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+ /* Unlock to invalidate the TLB (and resume the MMU) */
+ op_param.flush_skip_levels =
+ pgd_level_to_skip_flush(dirty_pgds);
+ err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
+ &op_param);
} else {
+ /* flush L2 and unlock the VA (resumes the MMU) */
mmu_hw_operation_begin(kbdev);
- err = kbase_mmu_hw_do_operation(kbdev, faulting_as,
- &op_param);
+ err = kbase_mmu_hw_do_flush(kbdev, faulting_as,
+ &op_param);
mmu_hw_operation_end(kbdev);
}
@@ -1148,6 +1416,7 @@ page_fault_retry:
kbase_gpu_vm_unlock(kctx);
} else {
int ret = -ENOMEM;
+ const u8 group_id = region->gpu_alloc->group_id;
kbase_gpu_vm_unlock(kctx);
@@ -1155,37 +1424,31 @@ page_fault_retry:
* Otherwise fail the allocation.
*/
if (pages_to_grow > 0) {
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (grow_2mb_pool) {
+ if (kbdev->pagesize_2mb && grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
- &kctx->mem_pools.large[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.large[group_id];
pages_to_grow = (pages_to_grow +
((1 << lp_mem_pool->order) - 1))
>> lp_mem_pool->order;
ret = kbase_mem_pool_grow(lp_mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
} else {
-#endif
struct kbase_mem_pool *const mem_pool =
- &kctx->mem_pools.small[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.small[group_id];
ret = kbase_mem_pool_grow(mem_pool,
- pages_to_grow);
-#ifdef CONFIG_MALI_2MB_ALLOC
+ pages_to_grow, kctx->task);
}
-#endif
}
if (ret < 0) {
/* failed to extend, handle as a normal PF */
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Page allocation failure", fault);
} else {
- dev_dbg(kbdev->dev, "Try again after pool_grow\n");
+ dev_dbg(kbdev->dev, "Try again after pool_grow");
goto page_fault_retry;
}
}
@@ -1212,24 +1475,27 @@ fault_done:
release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
- dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
+ dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
}
static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut)
{
u64 *page;
- int i;
struct page *p;
+ phys_addr_t pgd;
p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
if (!p)
- return 0;
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
+
+ page = kbase_kmap(p);
- page = kmap(p);
if (page == NULL)
goto alloc_free;
+ pgd = page_to_phys(p);
+
/* If the MMU tables belong to a context then account the memory usage
* to that context, otherwise the MMU tables are device wide and are
* only accounted to the device.
@@ -1250,33 +1516,43 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
- for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
- kbdev->mmu_mode->entry_invalidate(&page[i]);
+ kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES);
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
+ /* As this page is newly created, therefore there is no content to
+ * clean or invalidate in the GPU caches.
+ */
+ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
- kunmap(p);
- return page_to_phys(p);
+ kbase_kunmap(p, page);
+ return pgd;
alloc_free:
kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
- return 0;
+ return KBASE_MMU_INVALID_PGD_ADDRESS;
}
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
- * new table from the pool if needed and possible
+/**
+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @pgd: Physical addresse of level N page directory.
+ * @vpfn: The virtual page frame number.
+ * @level: The level of MMU page table (N).
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
*/
-static int mmu_get_next_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- phys_addr_t *pgd, u64 vpfn, int level)
+static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t *pgd, u64 vpfn, int level)
{
u64 *page;
phys_addr_t target_pgd;
struct page *p;
- KBASE_DEBUG_ASSERT(*pgd);
-
lockdep_assert_held(&mmut->mmu_lock);
/*
@@ -1287,43 +1563,92 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev,
vpfn &= 0x1FF;
p = pfn_to_page(PFN_DOWN(*pgd));
- page = kmap(p);
+ page = kbase_kmap(p);
if (page == NULL) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
return -EINVAL;
}
- target_pgd = kbdev->mmu_mode->pte_to_phy_addr(page[vpfn]);
+ if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
+ dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
+ vpfn);
+ kbase_kunmap(p, page);
+ return -EFAULT;
+ } else {
+ target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
+ }
- if (!target_pgd) {
- target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- if (!target_pgd) {
- dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
- __func__);
- kunmap(p);
- return -ENOMEM;
- }
+ kbase_kunmap(p, page);
+ *pgd = target_pgd;
- kbdev->mmu_mode->entry_set_pte(page, vpfn, target_pgd);
+ return 0;
+}
- kbase_mmu_sync_pgd(kbdev, kbase_dma_addr(p), PAGE_SIZE);
- /* Rely on the caller to update the address space flags. */
+/**
+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @vpfn: The virtual page frame number.
+ * @in_level: The level of MMU page table (N).
+ * @out_level: Set to the level of the lowest valid PGD found on success.
+ * Invalid on error.
+ * @out_pgd: Set to the lowest valid PGD found on success.
+ * Invalid on error.
+ *
+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
+ * closest to in_level
+ *
+ * Terminology:
+ * Level-0 = Top-level = highest
+ * Level-3 = Bottom-level = lowest
+ *
+ * Return:
+ * * 0 - OK
+ * * -EINVAL - kmap() failed during page table walk.
+ */
+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
+{
+ phys_addr_t pgd;
+ int l;
+ int err = 0;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+ pgd = mmut->pgd;
+
+ for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
+ err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+
+ /* Handle failure condition */
+ if (err) {
+ dev_dbg(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
+ break;
+ }
}
- kunmap(p);
- *pgd = target_pgd;
+ *out_pgd = pgd;
+ *out_level = l;
- return 0;
+ /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
+ * This implies that we have found the lowest valid pgd. Reset the error code.
+ */
+ if (err == -EFAULT)
+ err = 0;
+
+ return err;
}
/*
- * Returns the PGD for the specified level of translation
+ * On success, sets out_pgd to the PGD for the specified level of translation
+ * Returns -EFAULT if a valid PGD is not found
*/
-static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- u64 vpfn,
- int level,
- phys_addr_t *out_pgd)
+static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ int level, phys_addr_t *out_pgd)
{
phys_addr_t pgd;
int l;
@@ -1335,9 +1660,9 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
/* Handle failure condition */
if (err) {
- dev_dbg(kbdev->dev,
- "%s: mmu_get_next_pgd failure at level %d\n",
- __func__, l);
+ dev_err(kbdev->dev,
+ "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+ __func__, l + 1);
return err;
}
}
@@ -1347,20 +1672,11 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev,
return 0;
}
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- u64 vpfn,
- phys_addr_t *out_pgd)
-{
- return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL,
- out_pgd);
-}
-
static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- u64 from_vpfn, u64 to_vpfn)
+ struct kbase_mmu_table *mmut, u64 from_vpfn,
+ u64 to_vpfn, u64 *dirty_pgds,
+ struct tagged_addr *phys, bool ignore_page_migration)
{
- phys_addr_t pgd;
u64 vpfn = from_vpfn;
struct kbase_mmu_mode const *mmu_mode;
@@ -1371,9 +1687,9 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
lockdep_assert_held(&mmut->mmu_lock);
mmu_mode = kbdev->mmu_mode;
+ kbase_mmu_reset_free_pgds_list(mmut);
while (vpfn < to_vpfn) {
- unsigned int i;
unsigned int idx = vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
unsigned int pcount = 0;
@@ -1381,6 +1697,8 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
int level;
u64 *page;
phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ phys_addr_t pgd = mmut->pgd;
+ struct page *p = phys_to_page(pgd);
register unsigned int num_of_valid_entries;
@@ -1388,17 +1706,17 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
count = left;
/* need to check if this is a 2MB page or a 4kB */
- pgd = mmut->pgd;
-
for (level = MIDGARD_MMU_TOPLEVEL;
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
pgds[level] = pgd;
- page = kmap(phys_to_page(pgd));
+ page = kbase_kmap(p);
if (mmu_mode->ate_is_valid(page[idx], level))
break; /* keep the mapping */
- kunmap(phys_to_page(pgd));
- pgd = mmu_mode->pte_to_phy_addr(page[idx]);
+ kbase_kunmap(p, page);
+ pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
+ p = phys_to_page(pgd);
}
switch (level) {
@@ -1411,68 +1729,312 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
pcount = count;
break;
default:
- dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
- __func__, level);
+ dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
goto next;
}
+ if (dirty_pgds && pcount > 0)
+ *dirty_pgds |= 1ULL << level;
+
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
num_of_valid_entries = 0;
else
num_of_valid_entries -= pcount;
+ /* Invalidate the entries we added */
+ mmu_mode->entries_invalidate(&page[idx], pcount);
+
if (!num_of_valid_entries) {
- kunmap(phys_to_page(pgd));
+ kbase_kunmap(p, page);
- kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
- kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds,
- vpfn, level);
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
+ KBASE_MMU_OP_NONE, dirty_pgds);
vpfn += count;
continue;
}
- /* Invalidate the entries we added */
- for (i = 0; i < pcount; i++)
- mmu_mode->entry_invalidate(&page[idx + i]);
-
mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(phys_to_page(pgd)) + 8 * idx,
- 8 * pcount);
- kunmap(phys_to_page(pgd));
+ /* MMU cache flush strategy is NONE because GPU cache maintenance is
+ * going to be done by the caller
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
+ kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
+ KBASE_MMU_OP_NONE);
+ kbase_kunmap(p, page);
next:
vpfn += count;
}
+
+ /* If page migration is enabled: the only way to recover from failure
+ * is to mark all pages as not movable. It is not predictable what's
+ * going to happen to these pages at this stage. They might return
+ * movable once they are returned to a memory pool.
+ */
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
+ const u64 num_pages = to_vpfn - from_vpfn + 1;
+ u64 i;
+
+ for (i = 0; i < num_pages; i++) {
+ struct page *phys_page = as_page(phys[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+ }
}
-/*
- * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, const u64 vpfn,
+ size_t nr, u64 dirty_pgds,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool insert_pages_failed)
+{
+ struct kbase_mmu_hw_op_param op_param;
+ int as_nr = 0;
+
+ op_param.vpfn = vpfn;
+ op_param.nr = nr;
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ op_param.mmu_sync_info = mmu_sync_info;
+ op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF;
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
+
+#if MALI_USE_CSF
+ as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR;
+#else
+ WARN_ON(!mmut->kctx);
+#endif
+
+ /* MMU cache flush strategy depends on whether GPU control commands for
+ * flushing physical address ranges are supported. The new physical pages
+ * are not present in GPU caches therefore they don't need any cache
+ * maintenance, but PGDs in the page table may or may not be created anew.
+ *
+ * Operations that affect the whole GPU cache shall only be done if it's
+ * impossible to update physical ranges.
+ *
+ * On GPUs where flushing by physical address range is supported,
+ * full cache flush is done when an error occurs during
+ * insert_pages() to keep the error handling simpler.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
+ mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
+ else
+ mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
+}
+
+/**
+ * update_parent_pgds() - Updates the page table from bottom level towards
+ * the top level to insert a new ATE
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @cur_level: The level of MMU page table where the ATE needs to be added.
+ * The bottom PGD level.
+ * @insert_level: The level of MMU page table where the chain of newly allocated
+ * PGDs needs to be linked-in/inserted.
+ * @insert_vpfn: The virtual page frame number for the ATE.
+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
+ * the physical addresses of newly allocated PGDs from index
+ * insert_level+1 to cur_level, and an existing PGD at index
+ * insert_level.
+ *
+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
+ * at insert_level which already exists in the MMU Page Tables. Migration status is also
+ * updated for all the newly allocated PGD pages.
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
+ */
+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ int cur_level, int insert_level, u64 insert_vpfn,
+ phys_addr_t *pgds_to_insert)
+{
+ int pgd_index;
+ int err = 0;
+
+ /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
+ * Loop runs from the bottom-most to the top-most level so that all entries in the chain
+ * are valid when they are inserted into the MMU Page table via the insert_level PGD.
+ */
+ for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
+ int parent_index = pgd_index - 1;
+ phys_addr_t parent_pgd = pgds_to_insert[parent_index];
+ unsigned int current_valid_entries;
+ u64 pte;
+ phys_addr_t target_pgd = pgds_to_insert[pgd_index];
+ u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
+ struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
+ u64 *parent_page_va;
+
+ if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
+ err = -EFAULT;
+ goto failure_recovery;
+ }
+
+ parent_page_va = kbase_kmap(parent_page);
+
+ if (unlikely(parent_page_va == NULL)) {
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
+ err = -EINVAL;
+ goto failure_recovery;
+ }
+
+ current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
+
+ kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
+ parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+ kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
+ kbase_kunmap(parent_page, parent_page_va);
+
+ if (parent_index != insert_level) {
+ /* Newly allocated PGDs */
+ kbase_mmu_sync_pgd_cpu(
+ kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64));
+ } else {
+ /* A new valid entry is added to an existing PGD. Perform the
+ * invalidate operation for GPU cache as it could be having a
+ * cacheline that contains the entry (in an invalid form).
+ */
+ kbase_mmu_sync_pgd(
+ kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
+ kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+ sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+ }
+
+ /* Update the new target_pgd page to its stable state */
+ if (kbase_is_page_migration_enabled()) {
+ struct kbase_page_metadata *page_md =
+ kbase_page_private(phys_to_page(target_pgd));
+
+ spin_lock(&page_md->migrate_lock);
+
+ WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+ IS_PAGE_ISOLATED(page_md->status));
+
+ if (mmut->kctx) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+ page_md->data.pt_mapped.mmut = mmut;
+ page_md->data.pt_mapped.pgd_vpfn_level =
+ PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
+ } else {
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+
+ return 0;
+
+failure_recovery:
+ /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
+ for (; pgd_index < cur_level; pgd_index++) {
+ phys_addr_t pgd = pgds_to_insert[pgd_index];
+ struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
+ u64 *pgd_page_va = kbase_kmap(pgd_page);
+ u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
+ kbase_kunmap(pgd_page, pgd_page_va);
+ }
+
+ return err;
+}
+
+/**
+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
+ * level_high (inclusive)
+ *
+ * @kbdev: Device pointer.
+ * @mmut: GPU MMU page table.
+ * @level_low: The lower bound for the levels for which the PGD allocs are required
+ * @level_high: The higher bound for the levels for which the PGD allocs are required
+ * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
+ * newly allocated PGD addresses to.
+ *
+ * Numerically, level_low < level_high, not to be confused with top level and
+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds
+ * in an incrementing for-loop.
+ *
+ * Return:
+ * * 0 - OK
+ * * -ENOMEM - allocation failed for a PGD.
*/
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr phys, size_t nr,
- unsigned long flags, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t *new_pgds, int level_low, int level_high)
+{
+ int err = 0;
+ int i;
+
+ lockdep_assert_held(&mmut->mmu_lock);
+
+ for (i = level_low; i <= level_high; i++) {
+ do {
+ new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ break;
+
+ rt_mutex_unlock(&mmut->mmu_lock);
+ err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
+ level_high, NULL);
+ rt_mutex_lock(&mmut->mmu_lock);
+ if (err) {
+ dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
+ __func__, err);
+
+ /* Free all PGDs allocated in previous successful iterations
+ * from (i-1) to level_low
+ */
+ for (i = (i - 1); i >= level_low; i--) {
+ if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
+ }
+
+ return err;
+ }
+ } while (1);
+ }
+
+ return 0;
+}
+
+static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
- /* In case the insert_single_page only partially completes
- * we need to be able to recover
- */
- bool recover_required = false;
- u64 start_vpfn = vpfn;
- size_t recover_count = 0;
+ u64 insert_vpfn = start_vpfn;
size_t remain = nr;
int err;
struct kbase_device *kbdev;
+ u64 dirty_pgds = 0;
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ enum kbase_mmu_op_type flush_op;
+ struct kbase_mmu_table *mmut = &kctx->mmu;
+ int l, cur_level, insert_level;
if (WARN_ON(kctx == NULL))
return -EINVAL;
/* 64-bit address range is the max */
- KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+ KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
kbdev = kctx->kbdev;
@@ -1480,77 +2042,88 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
if (nr == 0)
return 0;
- rt_mutex_lock(&kctx->mmu.mmu_lock);
+ /* If page migration is enabled, pages involved in multiple GPU mappings
+ * are always treated as not movable.
+ */
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration) {
+ struct page *phys_page = as_page(phys);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ if (page_md) {
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
+
+ rt_mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
- unsigned int index = vpfn & 0x1FF;
- unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+ unsigned int vindex = insert_vpfn & 0x1FF;
+ unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
register unsigned int num_of_valid_entries;
+ bool newly_created_pgd = false;
if (count > remain)
count = remain;
+ cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_bottom_pgd() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
- vpfn, &pgd);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
- err = kbase_mem_pool_grow(
- &kbdev->mem_pools.small[
- kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
- rt_mutex_lock(&kctx->mmu.mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
+
if (err) {
- dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n",
- __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev,
- &kctx->mmu,
- start_vpfn,
- start_vpfn + recover_count);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+
+ pgd_page = kbase_kmap(p);
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
- if (recover_required) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev,
- &kctx->mmu,
- start_vpfn,
- start_vpfn + recover_count);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
kbdev->mmu_mode->get_num_valid_entries(pgd_page);
for (i = 0; i < count; i++) {
- unsigned int ofs = index + i;
+ unsigned int ofs = vindex + i;
/* Fail if the current page is a valid ATE entry */
KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
@@ -1562,55 +2135,170 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
kbdev->mmu_mode->set_num_valid_entries(
pgd_page, num_of_valid_entries + count);
- vpfn += count;
- remain -= count;
+ dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (index * sizeof(u64)),
- count * sizeof(u64));
-
- kunmap(p);
- /* We have started modifying the page table.
- * If further pages need inserting and fail we need to undo what
- * has already taken place
+ /* MMU cache flush operation here will depend on whether bottom level
+ * PGD is newly created or not.
+ *
+ * If bottom level PGD is newly created then no GPU cache maintenance is
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+ * maintenance is required for existing PGD.
*/
- recover_required = true;
- recover_count += count;
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
+ kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
+ flush_op);
+
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kbase_kunmap(p, pgd_page);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
+ insert_vpfn += count;
+ remain -= count;
+ kbase_kunmap(p, pgd_page);
}
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false, mmu_sync_info);
+
+ rt_mutex_unlock(&mmut->mmu_lock);
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ false);
+
return 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
fail_unlock:
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
- kbase_mmu_flush_invalidate(kctx, start_vpfn, nr, false, mmu_sync_info);
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
+ NULL, true);
+ }
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+ true);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+ rt_mutex_unlock(&mmut->mmu_lock);
+
return err;
}
-static void kbase_mmu_free_pgd(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
- bool dirty)
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
{
- struct page *p;
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
- lockdep_assert_held(&mmut->mmu_lock);
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+ /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+ return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+ false);
+}
- p = pfn_to_page(PFN_DOWN(pgd));
+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
+ struct kbase_va_region *reg,
+ struct kbase_mmu_table *mmut, const u64 vpfn)
+{
+ struct page *phys_page = as_page(phys);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
- kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id],
- p, dirty);
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
- atomic_sub(1, &kbdev->memdev.used_pages);
+ spin_lock(&page_md->migrate_lock);
- /* If MMU tables belong to a context then pages will have been accounted
- * against it, so we must decrement the usage counts here.
+ /* If no GPU va region is given: the metadata provided are
+ * invalid.
+ *
+ * If the page is already allocated and mapped: this is
+ * an additional GPU mapping, probably to create a memory
+ * alias, which means it is no longer possible to migrate
+ * the page easily because tracking all the GPU mappings
+ * would be too costly.
+ *
+ * In any case: the page becomes not movable. It is kept
+ * alive, but attempts to migrate it will fail. The page
+ * will be freed if it is still not movable when it returns
+ * to a memory pool. Notice that the movable flag is not
+ * cleared because that would require taking the page lock.
*/
- if (mmut->kctx) {
- kbase_process_page_usage_dec(mmut->kctx, 1);
- atomic_sub(1, &mmut->kctx->used_pages);
+ if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+ } else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
+ page_md->data.mapped.reg = reg;
+ page_md->data.mapped.mmut = mmut;
+ page_md->data.mapped.vpfn = vpfn;
}
- kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+ spin_unlock(&page_md->migrate_lock);
+}
+
+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
+ struct tagged_addr *phys, size_t requested_nr)
+{
+ size_t i;
+
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
+
+ for (i = 0; i < requested_nr; i++) {
+ struct page *phys_page = as_page(phys[i]);
+ struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(phys[i]) || is_partial(phys[i]))
+ continue;
+
+ if (page_md) {
+ u8 status;
+
+ spin_lock(&page_md->migrate_lock);
+ status = PAGE_STATUS_GET(page_md->status);
+
+ if (status == ALLOCATED_MAPPED) {
+ if (IS_PAGE_ISOLATED(page_md->status)) {
+ page_md->status = PAGE_STATUS_SET(
+ page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
+ page_md->data.free_isolated.kbdev = kbdev;
+ /* At this point, we still have a reference
+ * to the page via its page migration metadata,
+ * and any page with the FREE_ISOLATED_IN_PROGRESS
+ * status will subsequently be freed in either
+ * kbase_page_migrate() or kbase_page_putback()
+ */
+ phys[i] = as_tagged(0);
+ } else
+ page_md->status = PAGE_STATUS_SET(page_md->status,
+ (u8)FREE_IN_PROGRESS);
+ }
+
+ spin_unlock(&page_md->migrate_lock);
+ }
+ }
}
u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
@@ -1624,12 +2312,10 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
group_id, level, entry);
}
-int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- const u64 start_vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags,
- int const group_id)
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -1637,6 +2323,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
size_t remain = nr;
int err;
struct kbase_mmu_mode const *mmu_mode;
+ unsigned int i;
+ phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+ int l, cur_level, insert_level;
/* Note that 0 is a valid start_vpfn */
/* 64-bit address range is the max */
@@ -1651,12 +2340,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
rt_mutex_lock(&mmut->mmu_lock);
while (remain) {
- unsigned int i;
unsigned int vindex = insert_vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
struct page *p;
- int cur_level;
register unsigned int num_of_valid_entries;
+ bool newly_created_pgd = false;
+ enum kbase_mmu_op_type flush_op;
if (count > remain)
count = remain;
@@ -1666,55 +2355,54 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
else
cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+ insert_level = cur_level;
+
/*
- * Repeatedly calling mmu_get_pgd_at_level() is clearly
+ * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
* suboptimal. We don't have to re-parse the whole tree
* each time (just cache the l0-l2 sequence).
* On the other hand, it's only a gain when we map more than
* 256 pages at once (on average). Do we really care?
*/
- do {
- err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn,
- cur_level, &pgd);
- if (err != -ENOMEM)
- break;
- /* Fill the memory pool with enough pages for
- * the page walk to succeed
- */
- rt_mutex_unlock(&mmut->mmu_lock);
- err = kbase_mem_pool_grow(
- &kbdev->mem_pools.small[mmut->group_id],
- cur_level);
- rt_mutex_lock(&mmut->mmu_lock);
- } while (!err);
+ /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+ err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+ &pgd);
if (err) {
- dev_warn(kbdev->dev,
- "%s: mmu_get_bottom_pgd failure\n", __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev,
- mmut, start_vpfn, insert_vpfn);
- }
+ dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+ __func__, err);
goto fail_unlock;
}
+ /* No valid pgd at cur_level */
+ if (insert_level != cur_level) {
+ /* Allocate new pgds for all missing levels from the required level
+ * down to the lowest valid pgd at insert_level
+ */
+ err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+ cur_level);
+ if (err)
+ goto fail_unlock;
+
+ newly_created_pgd = true;
+
+ new_pgds[insert_level] = pgd;
+
+ /* If we didn't find an existing valid pgd at cur_level,
+ * we've now allocated one. The ATE in the next step should
+ * be inserted in this newly allocated pgd.
+ */
+ pgd = new_pgds[cur_level];
+ }
+
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+ pgd_page = kbase_kmap(p);
+
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n",
- __func__);
- if (insert_vpfn != start_vpfn) {
- /* Invalidate the pages we have partially
- * completed
- */
- mmu_insert_pages_failure_recovery(kbdev,
- mmut, start_vpfn, insert_vpfn);
- }
+ dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
- goto fail_unlock;
+
+ goto fail_unlock_free_pgds;
}
num_of_valid_entries =
@@ -1722,18 +2410,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
if (cur_level == MIDGARD_MMU_LEVEL(2)) {
int level_index = (insert_vpfn >> 9) & 0x1FF;
- u64 *target = &pgd_page[level_index];
-
- if (mmu_mode->pte_is_valid(*target, cur_level)) {
- kbase_mmu_free_pgd(
- kbdev, mmut,
- kbdev->mmu_mode->pte_to_phy_addr(
- *target),
- false);
- num_of_valid_entries--;
- }
- *target = kbase_mmu_create_ate(kbdev, *phys, flags,
- cur_level, group_id);
+ pgd_page[level_index] =
+ kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id);
num_of_valid_entries++;
} else {
@@ -1752,27 +2430,94 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
*target = kbase_mmu_create_ate(kbdev,
phys[i], flags, cur_level, group_id);
+
+ /* If page migration is enabled, this is the right time
+ * to update the status of the page.
+ */
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration &&
+ !is_huge(phys[i]) && !is_partial(phys[i]))
+ kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
+ insert_vpfn + i);
}
num_of_valid_entries += count;
}
mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
+
+ /* MMU cache flush operation here will depend on whether bottom level
+ * PGD is newly created or not.
+ *
+ * If bottom level PGD is newly created then no GPU cache maintenance is
+ * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+ * maintenance is required for existing PGD.
+ */
+ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
+ kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
+ flush_op);
+
+ if (newly_created_pgd) {
+ err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+ new_pgds);
+ if (err) {
+ dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+ __func__, err);
+
+ kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+ kbase_kunmap(p, pgd_page);
+ goto fail_unlock_free_pgds;
+ }
+ }
+
phys += count;
insert_vpfn += count;
remain -= count;
+ kbase_kunmap(p, pgd_page);
+ }
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (vindex * sizeof(u64)),
- count * sizeof(u64));
+ rt_mutex_unlock(&mmut->mmu_lock);
- kunmap(p);
- }
+ return 0;
- err = 0;
+fail_unlock_free_pgds:
+ /* Free the pgds allocated by us from insert_level+1 to bottom level */
+ for (l = cur_level; l > insert_level; l--)
+ kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
fail_unlock:
+ if (insert_vpfn != start_vpfn) {
+ /* Invalidate the pages we have partially completed */
+ mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
+ phys, ignore_page_migration);
+ }
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
+ dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
+ kbase_mmu_free_pgds_list(kbdev, mmut);
rt_mutex_unlock(&mmut->mmu_lock);
+
+ return err;
+}
+
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg)
+{
+ int err;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id,
+ dirty_pgds, reg, false);
+
return err;
}
@@ -1780,31 +2525,86 @@ fail_unlock:
* Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
* number 'as_nr'.
*/
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+ int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
{
int err;
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
- err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn,
- phys, nr, flags, group_id);
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, false);
+ if (err)
+ return err;
- if (mmut->kctx)
- kbase_mmu_flush_invalidate(mmut->kctx, vpfn, nr, false,
- mmu_sync_info);
- else
- kbase_mmu_flush_invalidate_no_ctx(kbdev, vpfn, nr, false, as_nr,
- mmu_sync_info);
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
- return err;
+ return 0;
}
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
+{
+ int err;
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ /* Imported allocations don't have metadata and therefore always ignore the
+ * page migration logic.
+ */
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, true);
+ if (err)
+ return err;
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+ return 0;
+}
+
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
+{
+ int err;
+ u64 dirty_pgds = 0;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ /* Memory aliases are always built on top of existing allocations,
+ * therefore the state of physical pages shall be updated.
+ */
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, false);
+ if (err)
+ return err;
+
+ mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+ return 0;
+}
+
+#if !MALI_USE_CSF
/**
- * kbase_mmu_flush_invalidate_noretain() - Flush and invalidate the GPU caches
+ * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
* without retaining the kbase context.
* @kctx: The KBase context.
* @vpfn: The virtual page frame number to start the flush on.
@@ -1813,17 +2613,15 @@ KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
* As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
* other locking.
*/
-static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
- u64 vpfn, size_t nr)
+static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr)
{
struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_mmu_hw_op_param op_param;
int err;
-
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ struct kbase_mmu_hw_op_param op_param;
lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
@@ -1833,154 +2631,32 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
return;
/* flush L2 and unlock the VA (resumes the MMU) */
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = vpfn,
- .nr = nr,
- .op = KBASE_MMU_OP_FLUSH_MEM,
- .kctx_id = kctx->id,
- .mmu_sync_info = mmu_sync_info,
- };
-
+ op_param.vpfn = vpfn;
+ op_param.nr = nr;
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- err = mmu_flush_invalidate_on_gpu_ctrl(
- kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ /* Value used to prevent skipping of any levels when flushing */
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+ err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
} else {
- err = kbase_mmu_hw_do_operation(kbdev, &kbdev->as[kctx->as_nr],
- &op_param);
+ err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr],
+ &op_param);
}
if (err) {
/* Flush failed to complete, assume the
* GPU has hung and perform a reset to recover
*/
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+ dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
}
}
-
-/* Perform a flush/invalidate on a particular address space
- */
-static void
-kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
- u64 vpfn, size_t nr, bool sync, u32 kctx_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
-{
- int err;
- bool gpu_powered;
- unsigned long flags;
- struct kbase_mmu_hw_op_param op_param;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- gpu_powered = kbdev->pm.backend.gpu_powered;
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- /* GPU is off so there's no need to perform flush/invalidate.
- * But even if GPU is not actually powered down, after gpu_powered flag
- * was set to false, it is still safe to skip the flush/invalidate.
- * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
- * which is sent when address spaces are restored after gpu_powered flag
- * is set to true. Flushing of L2 cache is certainly not required as L2
- * cache is definitely off if gpu_powered is false.
- */
- if (!gpu_powered)
- return;
-
- if (kbase_pm_context_active_handle_suspend(kbdev,
- KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
- /* GPU has just been powered off due to system suspend.
- * So again, no need to perform flush/invalidate.
- */
- return;
- }
-
- /* AS transaction begin */
- mutex_lock(&kbdev->mmu_hw_mutex);
-
- op_param = (struct kbase_mmu_hw_op_param){
- .vpfn = vpfn,
- .nr = nr,
- .kctx_id = kctx_id,
- .mmu_sync_info = mmu_sync_info,
- };
-
- if (sync)
- op_param.op = KBASE_MMU_OP_FLUSH_MEM;
- else
- op_param.op = KBASE_MMU_OP_FLUSH_PT;
-
- if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
- } else {
- mmu_hw_operation_begin(kbdev);
- err = kbase_mmu_hw_do_operation(kbdev, as, &op_param);
- mmu_hw_operation_end(kbdev);
- }
-
- if (err) {
- /* Flush failed to complete, assume the GPU has hung and
- * perform a reset to recover
- */
- dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
-
- if (kbase_prepare_to_reset_gpu(
- kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
- }
-
- mutex_unlock(&kbdev->mmu_hw_mutex);
- /* AS transaction end */
-
- kbase_pm_context_idle(kbdev);
-}
-
-static void
-kbase_mmu_flush_invalidate_no_ctx(struct kbase_device *kbdev, u64 vpfn,
- size_t nr, bool sync, int as_nr,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
-{
- /* Skip if there is nothing to do */
- if (nr) {
- kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], vpfn,
- nr, sync, 0xFFFFFFFF,
- mmu_sync_info);
- }
-}
-
-static void
-kbase_mmu_flush_invalidate(struct kbase_context *kctx, u64 vpfn, size_t nr,
- bool sync,
- enum kbase_caller_mmu_sync_info mmu_sync_info)
-{
- struct kbase_device *kbdev;
- bool ctx_is_in_runpool;
-
- /* Early out if there is nothing to do */
- if (nr == 0)
- return;
-
- kbdev = kctx->kbdev;
-#if !MALI_USE_CSF
- rt_mutex_lock(&kbdev->js_data.queue_mutex);
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
- rt_mutex_unlock(&kbdev->js_data.queue_mutex);
-#else
- ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
-#endif /* !MALI_USE_CSF */
-
- if (ctx_is_in_runpool) {
- KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
-
- kbase_mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr],
- vpfn, nr, sync, kctx->id,
- mmu_sync_info);
-
- release_ctx(kbdev, kctx);
- }
-}
+#endif
void kbase_mmu_update(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut,
@@ -2002,6 +2678,88 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
kbdev->mmu_mode->disable_as(kbdev, as_nr);
}
+#if MALI_USE_CSF
+void kbase_mmu_disable(struct kbase_context *kctx)
+{
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_mmu_hw_op_param op_param = { 0 };
+ int lock_err, flush_err;
+
+ /* ASSERT that the context has a valid as_nr, which is only the case
+ * when it's scheduled in.
+ *
+ * as_nr won't change because the caller has the hwaccess_lock
+ */
+ KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+ lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+
+ op_param.vpfn = 0;
+ op_param.nr = ~0;
+ op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+ op_param.kctx_id = kctx->id;
+ op_param.mmu_sync_info = mmu_sync_info;
+
+#if MALI_USE_CSF
+ /* 0xF value used to prevent skipping of any levels when flushing */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+#endif
+
+ /* lock MMU to prevent existing jobs on GPU from executing while the AS is
+ * not yet disabled
+ */
+ lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ if (lock_err)
+ dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
+ kctx->id);
+
+ /* Issue the flush command only when L2 cache is in stable power on state.
+ * Any other state for L2 cache implies that shader cores are powered off,
+ * which in turn implies there is no execution happening on the GPU.
+ */
+ if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
+ flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ if (flush_err)
+ dev_err(kbdev->dev,
+ "Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
+ kctx->as_nr, kctx->tgid, kctx->id);
+ }
+ kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
+
+ if (!lock_err) {
+ /* unlock the MMU to allow it to resume */
+ lock_err =
+ kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+ if (lock_err)
+ dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
+ kctx->tgid, kctx->id);
+ }
+
+#if !MALI_USE_CSF
+ /*
+ * JM GPUs has some L1 read only caches that need to be invalidated
+ * with START_FLUSH configuration. Purge the MMU disabled kctx from
+ * the slot_rb tracking field so such invalidation is performed when
+ * a new katom is executed on the affected slots.
+ */
+ kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
+#endif
+
+ /* kbase_gpu_cache_flush_and_busy_wait() will reset the GPU on timeout. Only
+ * reset the GPU if locking or unlocking fails.
+ */
+ if (lock_err)
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu_locked(kbdev);
+}
+#else
void kbase_mmu_disable(struct kbase_context *kctx)
{
/* ASSERT that the context has a valid as_nr, which is only the case
@@ -2021,7 +2779,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
* The job scheduler code will already be holding the locks and context
* so just do the flush.
*/
- kbase_mmu_flush_invalidate_noretain(kctx, 0, ~0);
+ kbase_mmu_flush_noretain(kctx, 0, ~0);
kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
#if !MALI_USE_CSF
@@ -2034,12 +2792,13 @@ void kbase_mmu_disable(struct kbase_context *kctx)
kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
#endif
}
+#endif
KBASE_EXPORT_TEST_API(kbase_mmu_disable);
static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut,
- phys_addr_t *pgds, u64 vpfn,
- int level)
+ struct kbase_mmu_table *mmut, phys_addr_t *pgds,
+ u64 vpfn, int level,
+ enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
{
int current_level;
@@ -2047,83 +2806,116 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0);
current_level--) {
- u64 *current_page = kmap(phys_to_page(pgds[current_level]));
+ phys_addr_t current_pgd = pgds[current_level];
+ struct page *p = phys_to_page(current_pgd);
+
+ u64 *current_page = kbase_kmap(p);
unsigned int current_valid_entries =
kbdev->mmu_mode->get_num_valid_entries(current_page);
+ int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
+ /* We need to track every level that needs updating */
+ if (dirty_pgds)
+ *dirty_pgds |= 1ULL << current_level;
+
+ kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
if (current_valid_entries == 1 &&
current_level != MIDGARD_MMU_LEVEL(0)) {
- kunmap(phys_to_page(pgds[current_level]));
+ kbase_kunmap(p, current_page);
- kbase_mmu_free_pgd(kbdev, mmut, pgds[current_level],
- true);
- } else {
- int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
-
- kbdev->mmu_mode->entry_invalidate(&current_page[index]);
+ /* Ensure the cacheline containing the last valid entry
+ * of PGD is invalidated from the GPU cache, before the
+ * PGD page is freed.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
+ current_pgd + (index * sizeof(u64)),
+ sizeof(u64), flush_op);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
+ } else {
current_valid_entries--;
kbdev->mmu_mode->set_num_valid_entries(
current_page, current_valid_entries);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(phys_to_page(
- pgds[current_level])) +
- 8 * index,
- 8 * 1);
+ kbase_kunmap(p, current_page);
- kunmap(phys_to_page(pgds[current_level]));
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
+ flush_op);
break;
}
}
}
-/*
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
+/**
+ * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
*
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
+ * @kbdev: Pointer to kbase device.
+ * @kctx: Pointer to kbase context.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @phys: Array of physical pages to flush.
+ * @phys_page_nr: Number of physical pages to flush.
+ * @op_param: Non-NULL pointer to struct containing information about the flush
+ * operation to perform.
+ *
+ * This function will do one of three things:
+ * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
+ * individual pages that were unmapped if feature is supported on GPU.
+ * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
+ * supported on GPU or,
+ * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ *
+ * When performing a partial GPU cache flush, the number of physical
+ * pages does not have to be identical to the number of virtual pages on the MMU,
+ * to support a single physical address flush for an aliased page.
*/
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, u64 vpfn, size_t nr, int as_nr)
+static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
+ struct kbase_context *kctx, int as_nr,
+ struct tagged_addr *phys, size_t phys_page_nr,
+ struct kbase_mmu_hw_op_param *op_param)
{
- phys_addr_t pgd;
- u64 start_vpfn = vpfn;
- size_t requested_nr = nr;
- struct kbase_mmu_mode const *mmu_mode;
- int err = -EFAULT;
+ if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ /* Full cache flush through the MMU_COMMAND */
+ mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
+ } else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
+ /* Full cache flush through the GPU_CONTROL */
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
+ }
+#if MALI_USE_CSF
+ else {
+ /* Partial GPU cache flush with MMU cache invalidation */
+ unsigned long irq_flags;
+ unsigned int i;
+ bool flush_done = false;
- /* Calls to this function are inherently asynchronous, with respect to
- * MMU operations.
- */
- const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ mmu_invalidate(kbdev, kctx, as_nr, op_param);
- if (nr == 0) {
- /* early out if nothing to do */
- return 0;
+ for (i = 0; !flush_done && i < phys_page_nr; i++) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
+ mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
+ KBASE_MMU_OP_FLUSH_MEM);
+ else
+ flush_done = true;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+ }
}
+#endif
+}
- if (!rt_mutex_trylock(&mmut->mmu_lock)) {
- /*
- * Sometimes, mmu_lock takes long time to be released.
- * In that case, kswapd is stuck until it can hold
- * the lock. Instead, just bail out here so kswapd
- * could reclaim other pages.
- */
- if (current_is_kswapd())
- return -EBUSY;
- rt_mutex_lock(&mmut->mmu_lock);
- }
+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, size_t nr, u64 *dirty_pgds,
+ struct list_head *free_pgds_list,
+ enum kbase_mmu_op_type flush_op)
+{
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
- mmu_mode = kbdev->mmu_mode;
+ lockdep_assert_held(&mmut->mmu_lock);
+ kbase_mmu_reset_free_pgds_list(mmut);
while (nr) {
- unsigned int i;
unsigned int index = vpfn & 0x1FF;
unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
unsigned int pcount;
@@ -2131,19 +2923,19 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
u64 *page;
phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
register unsigned int num_of_valid_entries;
+ phys_addr_t pgd = mmut->pgd;
+ struct page *p = phys_to_page(pgd);
if (count > nr)
count = nr;
- /* need to check if this is a 2MB or a 4kB page */
- pgd = mmut->pgd;
-
+ /* need to check if this is a 2MB page or a 4kB */
for (level = MIDGARD_MMU_TOPLEVEL;
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
phys_addr_t next_pgd;
index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
- page = kmap(phys_to_page(pgd));
+ page = kbase_kmap(p);
if (mmu_mode->ate_is_valid(page[index], level))
break; /* keep the mapping */
else if (!mmu_mode->pte_is_valid(page[index], level)) {
@@ -2166,28 +2958,31 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
count = nr;
goto next;
}
- next_pgd = mmu_mode->pte_to_phy_addr(page[index]);
+ next_pgd = mmu_mode->pte_to_phy_addr(
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
+ kbase_kunmap(p, page);
pgds[level] = pgd;
- kunmap(phys_to_page(pgd));
pgd = next_pgd;
+ p = phys_to_page(pgd);
}
switch (level) {
case MIDGARD_MMU_LEVEL(0):
case MIDGARD_MMU_LEVEL(1):
- dev_warn(kbdev->dev,
- "%s: No support for ATEs at level %d\n",
- __func__, level);
- kunmap(phys_to_page(pgd));
+ dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
+ level);
+ kbase_kunmap(p, page);
goto out;
case MIDGARD_MMU_LEVEL(2):
/* can only teardown if count >= 512 */
if (count >= 512) {
pcount = 1;
} else {
- dev_warn(kbdev->dev,
- "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
- __func__, count);
+ dev_warn(
+ kbdev->dev,
+ "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
+ __func__, count);
pcount = 0;
}
break;
@@ -2196,68 +2991,205 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev,
pcount = count;
break;
default:
- dev_err(kbdev->dev,
- "%s: found non-mapped memory, early out\n",
- __func__);
+ dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
vpfn += count;
nr -= count;
continue;
}
+ if (pcount > 0)
+ *dirty_pgds |= 1ULL << level;
+
num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
if (WARN_ON_ONCE(num_of_valid_entries < pcount))
num_of_valid_entries = 0;
else
num_of_valid_entries -= pcount;
+ /* Invalidate the entries we added */
+ mmu_mode->entries_invalidate(&page[index], pcount);
+
if (!num_of_valid_entries) {
- kunmap(phys_to_page(pgd));
+ kbase_kunmap(p, page);
+
+ /* Ensure the cacheline(s) containing the last valid entries
+ * of PGD is invalidated from the GPU cache, before the
+ * PGD page is freed.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
+ pgd + (index * sizeof(u64)),
+ pcount * sizeof(u64), flush_op);
- kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+ kbase_mmu_add_to_free_pgds_list(mmut, p);
- kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds,
- vpfn, level);
+ kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
+ flush_op, dirty_pgds);
vpfn += count;
nr -= count;
continue;
}
- /* Invalidate the entries we added */
- for (i = 0; i < pcount; i++)
- mmu_mode->entry_invalidate(&page[index + i]);
-
mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
- kbase_mmu_sync_pgd(
- kbdev, kbase_dma_addr(phys_to_page(pgd)) + 8 * index,
- 8 * pcount);
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
+ flush_op);
next:
- kunmap(phys_to_page(pgd));
- vpfn += count;
- nr -= count;
+ kbase_kunmap(p, page);
+ vpfn += count;
+ nr -= count;
}
- err = 0;
out:
- rt_mutex_unlock(&mmut->mmu_lock);
+ return 0;
+}
- if (mmut->kctx)
- kbase_mmu_flush_invalidate(mmut->kctx, start_vpfn, requested_nr,
- true, mmu_sync_info);
- else
- kbase_mmu_flush_invalidate_no_ctx(kbdev, start_vpfn,
- requested_nr, true, as_nr,
- mmu_sync_info);
+/**
+ * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev: Pointer to kbase device.
+ * @mmut: Pointer to GPU MMU page table.
+ * @vpfn: Start page frame number of the GPU virtual pages to unmap.
+ * @phys: Array of physical pages currently mapped to the virtual
+ * pages to unmap, or NULL. This is used for GPU cache maintenance
+ * and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in the PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration)
+{
+ u64 start_vpfn = vpfn;
+ enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
+ struct kbase_mmu_hw_op_param op_param;
+ int err = -EFAULT;
+ u64 dirty_pgds = 0;
+ LIST_HEAD(free_pgds_list);
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ /* This function performs two operations: MMU maintenance and flushing
+ * the caches. To ensure internal consistency between the caches and the
+ * MMU, it does not make sense to be able to flush only the physical pages
+ * from the cache and keep the PTE, nor does it make sense to use this
+ * function to remove a PTE and keep the physical pages in the cache.
+ *
+ * However, we have legitimate cases where we can try to tear down a mapping
+ * with zero virtual and zero physical pages, so we must have the following
+ * behaviour:
+ * - if both physical and virtual page counts are zero, return early
+ * - if either physical and virtual page counts are zero, return early
+ * - if there are fewer physical pages than virtual pages, return -EINVAL
+ */
+ if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
+ return 0;
+
+ if (unlikely(nr_virt_pages < nr_phys_pages))
+ return -EINVAL;
+
+ /* MMU cache flush strategy depends on the number of pages to unmap. In both cases
+ * the operation is invalidate but the granularity of cache maintenance may change
+ * according to the situation.
+ *
+ * If GPU control command operations are present and the number of pages is "small",
+ * then the optimal strategy is flushing on the physical address range of the pages
+ * which are affected by the operation. That implies both the PGDs which are modified
+ * or removed from the page table and the physical pages which are freed from memory.
+ *
+ * Otherwise, there's no alternative to invalidating the whole GPU cache.
+ */
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
+ nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
+ flush_op = KBASE_MMU_OP_FLUSH_PT;
+
+ if (!rt_mutex_trylock(&mmut->mmu_lock)) {
+ /*
+ * Sometimes, mmu_lock takes long time to be released.
+ * In that case, kswapd is stuck until it can hold
+ * the lock. Instead, just bail out here so kswapd
+ * could reclaim other pages.
+ */
+ if (current_is_kswapd())
+ return -EBUSY;
+ rt_mutex_lock(&mmut->mmu_lock);
+ }
+
+ err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
+ &free_pgds_list, flush_op);
+
+ /* Set up MMU operation parameters. See above about MMU cache flush strategy. */
+ op_param = (struct kbase_mmu_hw_op_param){
+ .vpfn = start_vpfn,
+ .nr = nr_virt_pages,
+ .mmu_sync_info = mmu_sync_info,
+ .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
+ .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
+ KBASE_MMU_OP_FLUSH_MEM,
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
+ };
+ mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
+ &op_param);
+
+ /* If page migration is enabled: the status of all physical pages involved
+ * shall be updated, unless they are not movable. Their status shall be
+ * updated before releasing the lock to protect against concurrent
+ * requests to migrate the pages, if they have been isolated.
+ */
+ if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration)
+ kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
+
+ kbase_mmu_free_pgds_list(kbdev, mmut);
+
+ rt_mutex_unlock(&mmut->mmu_lock);
return err;
}
-KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr)
+{
+ return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+ false);
+}
+
+int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
+ size_t nr_virt_pages, int as_nr)
+{
+ return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+ true);
+}
/**
- * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries
+ * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
+ * page table entries
*
- * @kctx: Kbase context
+ * @kbdev: Pointer to kbase device.
+ * @mmut: The involved MMU table
* @vpfn: Virtual PFN (Page Frame Number) of the first page to update
* @phys: Pointer to the array of tagged physical addresses of the physical
* pages that are pointed to by the page table entries (that need to
@@ -2267,28 +3199,25 @@ KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
* @flags: Flags
* @group_id: The physical memory group in which the page was allocated.
* Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
+ * @dirty_pgds: Flags to track every level where a PGD has been updated.
*
* This will update page table entries that already exist on the GPU based on
- * the new flags that are passed (the physical pages pointed to by the page
- * table entries remain unchanged). It is used as a response to the changes of
- * the memory attributes.
+ * new flags and replace any existing phy pages that are passed (the PGD pages
+ * remain unchanged). It is used as a response to the changes of phys as well
+ * as the the memory attributes.
*
* The caller is responsible for validating the memory attributes.
*
* Return: 0 if the attributes data in page table entries were updated
* successfully, otherwise an error code.
*/
-static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id)
+int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds)
{
phys_addr_t pgd;
u64 *pgd_page;
int err;
- struct kbase_device *kbdev;
-
- if (WARN_ON(kctx == NULL))
- return -EINVAL;
KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
@@ -2296,9 +3225,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (nr == 0)
return 0;
- rt_mutex_lock(&kctx->mmu.mmu_lock);
-
- kbdev = kctx->kbdev;
+ rt_mutex_lock(&mmut->mmu_lock);
while (nr) {
unsigned int i;
@@ -2314,12 +3241,12 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
if (is_huge(*phys) && (index == index_in_large_page(*phys)))
cur_level = MIDGARD_MMU_LEVEL(2);
- err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd);
+ err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
if (WARN_ON(err))
goto fail_unlock;
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+ pgd_page = kbase_kmap(p);
if (!pgd_page) {
dev_warn(kbdev->dev, "kmap failure on update_pages");
err = -ENOMEM;
@@ -2341,9 +3268,9 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
*target_phys, flags, MIDGARD_MMU_LEVEL(2),
group_id);
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (level_index * sizeof(u64)),
- sizeof(u64));
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
+ kbase_dma_addr(p) + (level_index * sizeof(u64)),
+ sizeof(u64), KBASE_MMU_OP_NONE);
} else {
for (i = 0; i < count; i++) {
#ifdef CONFIG_MALI_DEBUG
@@ -2355,148 +3282,568 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
group_id);
}
- kbase_mmu_sync_pgd(kbdev,
- kbase_dma_addr(p) + (index * sizeof(u64)),
- count * sizeof(u64));
+
+ /* MMU cache flush strategy is NONE because GPU cache maintenance
+ * will be done by the caller.
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ count * sizeof(u64), KBASE_MMU_OP_NONE);
}
kbdev->mmu_mode->set_num_valid_entries(pgd_page,
num_of_valid_entries);
+ if (dirty_pgds && count > 0)
+ *dirty_pgds |= 1ULL << cur_level;
+
phys += count;
vpfn += count;
nr -= count;
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
}
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
+ rt_mutex_unlock(&mmut->mmu_lock);
return 0;
fail_unlock:
- rt_mutex_unlock(&kctx->mmu.mmu_lock);
+ rt_mutex_unlock(&mmut->mmu_lock);
return err;
}
-int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
- struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id)
+static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
+ u64 vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id)
{
int err;
+ u64 dirty_pgds = 0;
+ struct kbase_mmu_table *mmut;
+#if !MALI_USE_CSF
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ mmut = &kctx->mmu;
+#else
+ mmut = kctx ? &kctx->mmu : &kbdev->csf.mcu_mmu;
+#endif
+
+ err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+ &dirty_pgds);
+
+ kbase_mmu_flush_invalidate_update_pages(kbdev, kctx, vpfn, nr, dirty_pgds);
+
+ return err;
+}
+
+void kbase_mmu_flush_invalidate_update_pages(struct kbase_device *kbdev, struct kbase_context *kctx, u64 vpfn,
+ size_t nr, u64 dirty_pgds)
+{
+ struct kbase_mmu_hw_op_param op_param;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ int as_nr;
- err = kbase_mmu_update_pages_no_flush(kctx, vpfn, phys, nr, flags,
- group_id);
- kbase_mmu_flush_invalidate(kctx, vpfn, nr, true, mmu_sync_info);
- return err;
+#if !MALI_USE_CSF
+ if (unlikely(kctx == NULL))
+ return;
+
+ as_nr = kctx->as_nr;
+#else
+ as_nr = kctx ? kctx->as_nr : MCU_AS_NR;
+#endif
+
+ op_param = (const struct kbase_mmu_hw_op_param){
+ .vpfn = vpfn,
+ .nr = nr,
+ .op = KBASE_MMU_OP_FLUSH_MEM,
+ .kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
+ .mmu_sync_info = mmu_sync_info,
+ .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
+ };
+
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+ mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
+ else
+ mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
}
-static void mmu_teardown_level(struct kbase_device *kbdev,
- struct kbase_mmu_table *mmut, phys_addr_t pgd,
- int level)
+int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ if (unlikely(kctx == NULL))
+ return -EINVAL;
+
+ return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
+}
+
+#if MALI_USE_CSF
+int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
+ size_t nr, unsigned long flags, int const group_id)
+{
+ return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
+}
+#endif /* MALI_USE_CSF */
+
+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
+ kbdev->mmu_page_migrate_in_progress = true;
+}
+
+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
+{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
+ kbdev->mmu_page_migrate_in_progress = false;
+ /* Invoke the PM state machine, as the MMU page migration session
+ * may have deferred a transition in L2 state machine.
+ */
+ kbase_pm_update_state(kbdev);
+}
+
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+ dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
+{
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
+ struct kbase_mmu_hw_op_param op_param;
+ struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+ page_md->data.mapped.mmut :
+ page_md->data.pt_mapped.mmut;
+ struct kbase_device *kbdev;
+ phys_addr_t pgd;
+ u64 *old_page, *new_page, *pgd_page, *target, vpfn;
+ int index, check_state, ret = 0;
+ unsigned long hwaccess_flags = 0;
+ unsigned int num_of_valid_entries;
+ u8 vmap_count = 0;
+
+ /* If page migration support is not compiled in, return with fault */
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return -EINVAL;
+ /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
+ * here we skip the no kctx case, which is only used with MCU's mmut.
+ */
+ if (!mmut->kctx)
+ return -EINVAL;
+
+ if (level > MIDGARD_MMU_BOTTOMLEVEL)
+ return -EINVAL;
+ else if (level == MIDGARD_MMU_BOTTOMLEVEL)
+ vpfn = page_md->data.mapped.vpfn;
+ else
+ vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
+
+ kbdev = mmut->kctx->kbdev;
+ index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+
+ /* Create all mappings before copying content.
+ * This is done as early as possible because it is the only operation that may
+ * fail. It is possible to do this before taking any locks because the
+ * pages to migrate are not going to change and even the parent PGD is not
+ * going to be affected by any other concurrent operation, since the page
+ * has been isolated before migration and therefore it cannot disappear in
+ * the middle of this function.
+ */
+ old_page = kbase_kmap(as_page(old_phys));
+ if (!old_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
+ ret = -EINVAL;
+ goto old_page_map_error;
+ }
+
+ new_page = kbase_kmap(as_page(new_phys));
+ if (!new_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
+ ret = -EINVAL;
+ goto new_page_map_error;
+ }
+
+ /* GPU cache maintenance affects both memory content and page table,
+ * but at two different stages. A single virtual memory page is affected
+ * by the migration.
+ *
+ * Notice that the MMU maintenance is done in the following steps:
+ *
+ * 1) The MMU region is locked without performing any other operation.
+ * This lock must cover the entire migration process, in order to
+ * prevent any GPU access to the virtual page whose physical page
+ * is being migrated.
+ * 2) Immediately after locking: the MMU region content is flushed via
+ * GPU control while the lock is taken and without unlocking.
+ * The region must stay locked for the duration of the whole page
+ * migration procedure.
+ * This is necessary to make sure that pending writes to the old page
+ * are finalized before copying content to the new page.
+ * 3) Before unlocking: changes to the page table are flushed.
+ * Finer-grained GPU control operations are used if possible, otherwise
+ * the whole GPU cache shall be flushed again.
+ * This is necessary to make sure that the GPU accesses the new page
+ * after migration.
+ * 4) The MMU region is unlocked.
+ */
+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
+ op_param.mmu_sync_info = CALLER_MMU_ASYNC;
+ op_param.kctx_id = mmut->kctx->id;
+ op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
+ op_param.nr = 1 << ((3 - level) * 9);
+ op_param.op = KBASE_MMU_OP_FLUSH_PT;
+ /* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
+ op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+ pgd_level_to_skip_flush(1ULL << level) :
+ pgd_level_to_skip_flush(3ULL << level);
+
+ rt_mutex_lock(&mmut->mmu_lock);
+
+ /* The state was evaluated before entering this function, but it could
+ * have changed before the mmu_lock was taken. However, the state
+ * transitions which are possible at this point are only two, and in both
+ * cases it is a stable state progressing to a "free in progress" state.
+ *
+ * After taking the mmu_lock the state can no longer change: read it again
+ * and make sure that it hasn't changed before continuing.
+ */
+ spin_lock(&page_md->migrate_lock);
+ check_state = PAGE_STATUS_GET(page_md->status);
+ if (level == MIDGARD_MMU_BOTTOMLEVEL)
+ vmap_count = page_md->vmap_count;
+ spin_unlock(&page_md->migrate_lock);
+
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ if (check_state != ALLOCATED_MAPPED) {
+ dev_dbg(kbdev->dev,
+ "%s: state changed to %d (was %d), abort page migration", __func__,
+ check_state, ALLOCATED_MAPPED);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ } else if (vmap_count > 0) {
+ dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
+ __func__);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ }
+ } else {
+ if (check_state != PT_MAPPED) {
+ dev_dbg(kbdev->dev,
+ "%s: state changed to %d (was %d), abort PGD page migration",
+ __func__, check_state, PT_MAPPED);
+ WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
+ ret = -EAGAIN;
+ goto page_state_change_out;
+ }
+ }
+
+ ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
+ if (ret) {
+ dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+ goto get_pgd_at_level_error;
+ }
+
+ pgd_page = kbase_kmap(phys_to_page(pgd));
+ if (!pgd_page) {
+ dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
+ ret = -EINVAL;
+ goto pgd_page_map_error;
+ }
+
+ mutex_lock(&kbdev->mmu_hw_mutex);
+
+ /* Lock MMU region and flush GPU cache by using GPU control,
+ * in order to keep MMU region locked.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
+ /* Defer the migration as L2 is in a transitional phase */
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
+ ret = -EAGAIN;
+ goto l2_state_defer_out;
+ }
+ /* Prevent transitional phases in L2 by starting the transaction */
+ mmu_page_migration_transaction_begin(kbdev);
+ if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
+ int as_nr = mmut->kctx->as_nr;
+ struct kbase_as *as = &kbdev->as[as_nr];
+
+ ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
+ if (!ret) {
+ ret = kbase_gpu_cache_flush_and_busy_wait(
+ kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+ }
+ if (ret)
+ mmu_page_migration_transaction_end(kbdev);
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+ if (ret < 0) {
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+ dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
+ goto undo_mappings;
+ }
+
+ /* Copy memory content.
+ *
+ * It is necessary to claim the ownership of the DMA buffer for the old
+ * page before performing the copy, to make sure of reading a consistent
+ * version of its content, before copying. After the copy, ownership of
+ * the DMA buffer for the new page is given to the GPU in order to make
+ * the content visible to potential GPU access that may happen as soon as
+ * this function releases the lock on the MMU region.
+ */
+ dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ memcpy(new_page, old_page, PAGE_SIZE);
+ dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+ /* Remap GPU virtual page.
+ *
+ * This code rests on the assumption that page migration is only enabled
+ * for 4 kB pages, that necessarily live in the bottom level of the MMU
+ * page table. For this reason, the PGD level tells us inequivocably
+ * whether the page being migrated is a "content page" or another PGD
+ * of the page table:
+ *
+ * - Bottom level implies ATE (Address Translation Entry)
+ * - Any other level implies PTE (Page Table Entry)
+ *
+ * The current implementation doesn't handle the case of a level 0 PGD,
+ * that is: the root PGD of the page table.
+ */
+ target = &pgd_page[index];
+
+ /* Certain entries of a page table page encode the count of valid entries
+ * present in that page. So need to save & restore the count information
+ * when updating the PTE/ATE to point to the new page.
+ */
+ num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ WARN_ON_ONCE((*target & 1UL) == 0);
+ *target =
+ kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
+ level, page_md->data.mapped.reg->gpu_alloc->group_id);
+ } else {
+ u64 managed_pte;
+
+#ifdef CONFIG_MALI_DEBUG
+ /* The PTE should be pointing to the page being migrated */
+ WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
+#endif
+ kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
+ *target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
+ }
+
+ kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+
+ /* This function always updates a single entry inside an existing PGD,
+ * therefore cache maintenance is necessary and affects a single entry.
+ */
+ kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+ kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
+ KBASE_MMU_OP_FLUSH_PT);
+
+ /* Unlock MMU region.
+ *
+ * Notice that GPUs which don't issue flush commands via GPU control
+ * still need an additional GPU cache flush here, this time only
+ * for the page table, because the function call above to sync PGDs
+ * won't have any effect on them.
+ */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
+ int as_nr = mmut->kctx->as_nr;
+ struct kbase_as *as = &kbdev->as[as_nr];
+
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+ ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
+ } else {
+ ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+ GPU_COMMAND_CACHE_CLN_INV_L2);
+ if (!ret)
+ ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
+ }
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+ /* Releasing locks before checking the migration transaction error state */
+ mutex_unlock(&kbdev->mmu_hw_mutex);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+ /* Release the transition prevention in L2 by ending the transaction */
+ mmu_page_migration_transaction_end(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+ /* Checking the final migration transaction error state */
+ if (ret < 0) {
+ dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
+ goto undo_mappings;
+ }
+
+ /* Undertaking metadata transfer, while we are holding the mmu_lock */
+ spin_lock(&page_md->migrate_lock);
+ if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+ size_t page_array_index =
+ page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
+
+ WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
+
+ /* Replace page in array of pages of the physical allocation. */
+ page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+ }
+ /* Update the new page dma_addr with the transferred metadata from the old_page */
+ page_md->dma_addr = new_dma_addr;
+ page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+ spin_unlock(&page_md->migrate_lock);
+ set_page_private(as_page(new_phys), (unsigned long)page_md);
+ /* Old page metatdata pointer cleared as it now owned by the new page */
+ set_page_private(as_page(old_phys), 0);
+
+l2_state_defer_out:
+ kbase_kunmap(phys_to_page(pgd), pgd_page);
+pgd_page_map_error:
+get_pgd_at_level_error:
+page_state_change_out:
+ rt_mutex_unlock(&mmut->mmu_lock);
+
+ kbase_kunmap(as_page(new_phys), new_page);
+new_page_map_error:
+ kbase_kunmap(as_page(old_phys), old_page);
+old_page_map_error:
+ return ret;
+
+undo_mappings:
+ /* Unlock the MMU table and undo mappings. */
+ rt_mutex_unlock(&mmut->mmu_lock);
+ kbase_kunmap(phys_to_page(pgd), pgd_page);
+ kbase_kunmap(as_page(new_phys), new_page);
+ kbase_kunmap(as_page(old_phys), old_page);
+
+ return ret;
+}
+
+static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ phys_addr_t pgd, unsigned int level)
{
- phys_addr_t target_pgd;
u64 *pgd_page;
int i;
- struct kbase_mmu_mode const *mmu_mode;
- u64 *pgd_page_buffer;
+ struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
+ struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
+ u64 *pgd_page_buffer = NULL;
+ struct page *p = phys_to_page(pgd);
lockdep_assert_held(&mmut->mmu_lock);
- /* Early-out. No need to kmap to check entries for L3 PGD. */
- if (level == MIDGARD_MMU_BOTTOMLEVEL) {
- kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+ pgd_page = kbase_kmap_atomic(p);
+ /* kmap_atomic should NEVER fail. */
+ if (WARN_ON_ONCE(pgd_page == NULL))
return;
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+ /* Copy the page to our preallocated buffer so that we can minimize
+ * kmap_atomic usage
+ */
+ pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
+ memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
}
- pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
- /* kmap_atomic should NEVER fail. */
- if (WARN_ON(pgd_page == NULL))
- return;
- /* Copy the page to our preallocated buffer so that we can minimize
- * kmap_atomic usage
+ /* When page migration is enabled, kbase_region_tracker_term() would ensure
+ * there are no pages left mapped on the GPU for a context. Hence the count
+ * of valid entries is expected to be zero here.
*/
- pgd_page_buffer = mmut->mmu_teardown_pages[level];
- memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
- kunmap_atomic(pgd_page);
+ if (kbase_is_page_migration_enabled() && mmut->kctx)
+ WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
+ /* Invalidate page after copying */
+ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
+ kbase_kunmap_atomic(pgd_page);
pgd_page = pgd_page_buffer;
- mmu_mode = kbdev->mmu_mode;
-
- for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
- target_pgd = mmu_mode->pte_to_phy_addr(pgd_page[i]);
-
- if (target_pgd) {
+ if (level < MIDGARD_MMU_BOTTOMLEVEL) {
+ for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
- mmu_teardown_level(kbdev, mmut,
- target_pgd,
- level + 1);
+ phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr(
+ mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev,
+ MGM_DEFAULT_PTE_GROUP,
+ level, pgd_page[i]));
+
+ mmu_teardown_level(kbdev, mmut, target_pgd, level + 1);
}
}
}
- kbase_mmu_free_pgd(kbdev, mmut, pgd, true);
+ kbase_mmu_free_pgd(kbdev, mmut, pgd);
+}
+
+static void kbase_mmu_mark_non_movable(struct page *page)
+{
+ struct kbase_page_metadata *page_md;
+
+ if (!kbase_is_page_migration_enabled())
+ return;
+
+ page_md = kbase_page_private(page);
+
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+
+ if (IS_PAGE_MOVABLE(page_md->status))
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+
+ spin_unlock(&page_md->migrate_lock);
}
int kbase_mmu_init(struct kbase_device *const kbdev,
struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
int const group_id)
{
- int level;
-
if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
WARN_ON(group_id < 0))
return -EINVAL;
+ compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
+ "List of free PGDs may not be large enough.");
+ compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
+ "Array of MMU levels is not large enough.");
+
mmut->group_id = group_id;
rt_mutex_init(&mmut->mmu_lock);
mmut->kctx = kctx;
- mmut->pgd = 0;
-
- /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- mmut->mmu_teardown_pages[level] =
- kmalloc(PAGE_SIZE, GFP_KERNEL);
-
- if (!mmut->mmu_teardown_pages[level]) {
- kbase_mmu_term(kbdev, mmut);
- return -ENOMEM;
- }
- }
+ mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
/* We allocate pages into the kbdev memory pool, then
* kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
* avoid allocations from the kernel happening with the lock held.
*/
- while (!mmut->pgd) {
+ while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
int err;
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;
}
- rt_mutex_lock(&mmut->mmu_lock);
mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- rt_mutex_unlock(&mmut->mmu_lock);
}
+ kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd)));
return 0;
}
void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
{
- int level;
+ WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
+ mmut->kctx->tgid, mmut->kctx->id);
- if (mmut->pgd) {
+ if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
rt_mutex_lock(&mmut->mmu_lock);
mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
rt_mutex_unlock(&mmut->mmu_lock);
@@ -2504,20 +3851,29 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
if (mmut->kctx)
KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
}
-
- for (level = MIDGARD_MMU_TOPLEVEL;
- level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
- if (!mmut->mmu_teardown_pages[level])
- break;
- kfree(mmut->mmu_teardown_pages[level]);
- }
}
-void kbase_mmu_as_term(struct kbase_device *kbdev, int i)
+void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
{
destroy_workqueue(kbdev->as[i].pf_wq);
}
+void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
+ phys_addr_t phys, size_t size,
+ enum kbase_mmu_op_type flush_op)
+{
+#if MALI_USE_CSF
+ unsigned long irq_flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+ if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
+ kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
+ mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#endif
+}
+
+#ifdef CONFIG_MALI_VECTOR_DUMP
static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
int level, char ** const buffer, size_t *size_left)
{
@@ -2536,9 +3892,9 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
kbdev = kctx->kbdev;
mmu_mode = kbdev->mmu_mode;
- pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+ pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd)));
if (!pgd_page) {
- dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+ dev_warn(kbdev->dev, "%s: kmap failure", __func__);
return 0;
}
@@ -2563,13 +3919,15 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
target_pgd = mmu_mode->pte_to_phy_addr(
- pgd_page[i]);
+ kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+ kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP,
+ level, pgd_page[i]));
dump_size = kbasep_mmu_dump_level(kctx,
target_pgd, level + 1,
buffer, size_left);
if (!dump_size) {
- kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
return 0;
}
size += dump_size;
@@ -2577,7 +3935,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
}
}
- kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
return size;
}
@@ -2657,6 +4015,7 @@ fail_free:
return NULL;
}
KBASE_EXPORT_TEST_API(kbase_mmu_dump);
+#endif /* CONFIG_MALI_VECTOR_DUMP */
void kbase_mmu_bus_fault_worker(struct work_struct *data)
{
@@ -2689,8 +4048,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/* check if we still have GPU */
if (unlikely(kbase_is_gpu_removed(kbdev))) {
- dev_dbg(kbdev->dev,
- "%s: GPU has been removed\n", __func__);
+ dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
release_ctx(kbdev, kctx);
atomic_dec(&kbdev->faults_pending);
return;