diff options
author | Toby Sunrise <tobyrs@google.com> | 2023-05-01 13:31:16 +0000 |
---|---|---|
committer | Toby Sunrise <tobyrs@google.com> | 2023-05-01 13:33:19 +0000 |
commit | bce5281a0408a175137c08dc93028e2a2c0fb69b (patch) | |
tree | edc640500ccdf781a123e7fae22fac9c44ddbe46 /mali_kbase/mmu | |
parent | f7a77046d77266482dedf54d134102e6031a7438 (diff) | |
download | gpu-bce5281a0408a175137c08dc93028e2a2c0fb69b.tar.gz |
Mali Valhall Android DDK r43p0-01eac0 KMD
Provenance: 48a9c7e25986318c8475bc245de51e7bec2606e8 (ipdelivery/EAC/v_r43p0)
VX504X08X-BU-00000-r43p0-01eac0 - Valhall Android DDK
VX504X08X-BU-60000-r43p0-01eac0 - Valhall Android Document Bundle
VX504X08X-DC-11001-r43p0-01eac0 - Valhall Android DDK Software Errata
VX504X08X-SW-99006-r43p0-01eac0 - Valhall Android Renderscript AOSP parts
Change-Id: I5df1914eba386e0bf507d4951240e1744f666a29
Diffstat (limited to 'mali_kbase/mmu')
-rw-r--r-- | mali_kbase/mmu/backend/mali_kbase_mmu_csf.c | 8 | ||||
-rw-r--r-- | mali_kbase/mmu/backend/mali_kbase_mmu_jm.c | 8 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu.c | 1229 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu.h | 64 | ||||
-rw-r--r-- | mali_kbase/mmu/mali_kbase_mmu_hw_direct.c | 121 |
5 files changed, 858 insertions, 572 deletions
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index 4a09265..4cac787 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -150,17 +150,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, "true" : "false"; int as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "PA_VALID: %s\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "access type 0x%X: %s\n" "source id 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, addr_valid, status, exception_type, kbase_gpu_exception_name(exception_type), @@ -557,6 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; + kbdev->as[i].is_unresponsive = false; kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); if (!kbdev->as[i].pf_wq) diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index 83605c3..d716ce0 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, u32 const exception_data = (status >> 8) & 0xFFFFFF; int const as_no = as->number; unsigned long flags; + const uintptr_t fault_addr = fault->addr; /* terminal fault, print info about the fault */ dev_err(kbdev->dev, - "GPU bus fault in AS%d at PA 0x%016llX\n" + "GPU bus fault in AS%d at PA %pK\n" "raw fault status: 0x%X\n" "exception type 0x%X: %s\n" "exception data 0x%X\n" "pid: %d\n", - as_no, fault->addr, + as_no, (void *)fault_addr, status, exception_type, kbase_gpu_exception_name(exception_type), exception_data, @@ -428,6 +429,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; + kbdev->as[i].is_unresponsive = false; kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); if (!kbdev->as[i].pf_wq) diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index 41876ff..ea58381 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -144,35 +144,21 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz enum kbase_mmu_op_type op) { u32 flush_op; - int ret; - - if (WARN_ON(kbdev == NULL)) - return; lockdep_assert_held(&kbdev->hwaccess_lock); /* Translate operation to command */ - if (op == KBASE_MMU_OP_FLUSH_PT) { + if (op == KBASE_MMU_OP_FLUSH_PT) flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2; - } else if (op == KBASE_MMU_OP_FLUSH_MEM) { + else if (op == KBASE_MMU_OP_FLUSH_MEM) flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC; - } else { + else { dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op); return; } - ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op); - - if (ret) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover - */ - dev_err(kbdev->dev, - "Flush for physical address range did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } + if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op)) + dev_err(kbdev->dev, "Flush for physical address range did not complete"); } #endif @@ -190,21 +176,15 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; - err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param); - } - - if (err) { - dev_err(kbdev->dev, - "Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover"); - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param)) + dev_err(kbdev->dev, + "Invalidate after GPU page table update did not complete"); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -215,25 +195,14 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; /* AS transaction begin */ mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered) - err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param); - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover. - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu_locked(kbdev); - } + if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) + dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); @@ -308,7 +277,6 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, const struct kbase_mmu_hw_op_param *op_param) { - int err = 0; unsigned long flags; /* AS transaction begin */ @@ -317,19 +285,8 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) { as_nr = kctx ? kctx->as_nr : as_nr; - err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], - op_param); - } - - if (err) { - /* Flush failed to complete, assume the GPU has hung and - * perform a reset to recover. - */ - dev_err(kbdev->dev, - "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); + if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param)) + dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); } spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -405,13 +362,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb * @level: The level of MMU page table. * @flush_op: The type of MMU flush operation to perform. * @dirty_pgds: Flags to track every level where a PGD has been updated. - * @free_pgds_list: Linked list of the page directory pages to free. */ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, - struct list_head *free_pgds_list); + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds); static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { @@ -485,14 +440,17 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl phys_addr_t pgd) { struct page *p; + bool page_is_isolated = false; lockdep_assert_held(&mmut->mmu_lock); p = pfn_to_page(PFN_DOWN(pgd)); + page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); - kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); - - kbase_mmu_account_freed_pgd(kbdev, mmut); + if (likely(!page_is_isolated)) { + kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true); + kbase_mmu_account_freed_pgd(kbdev, mmut); + } } /** @@ -500,41 +458,42 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl * * @kbdev: Device pointer. * @mmut: GPU MMU page table. - * @free_pgds_list: Linked list of the page directory pages to free. * * This function will call kbase_mmu_free_pgd() on each page directory page - * present in the @free_pgds_list. + * present in the list of free PGDs inside @mmut. * * The function is supposed to be called after the GPU cache and MMU TLB has * been invalidated post the teardown loop. + * + * The mmu_lock shall be held prior to calling the function. */ -static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - struct list_head *free_pgds_list) +static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { - struct page *page, *next_page; + size_t i; - mutex_lock(&mmut->mmu_lock); + lockdep_assert_held(&mmut->mmu_lock); - list_for_each_entry_safe(page, next_page, free_pgds_list, lru) { - list_del_init(&page->lru); - kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page)); - } + for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++) + kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i])); - mutex_unlock(&mmut->mmu_lock); + mmut->scratch_mem.free_pgds.head_index = 0; } -static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev, - struct kbase_mmu_table *mmut, - struct page *p, struct list_head *free_pgds_list) +static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p) { - bool page_is_isolated = false; - lockdep_assert_held(&mmut->mmu_lock); - page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); + if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1))) + return; + + mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p; +} + +static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut) +{ + lockdep_assert_held(&mmut->mmu_lock); - if (likely(!page_is_isolated)) - list_add(&p->lru, free_pgds_list); + mmut->scratch_mem.free_pgds.head_index = 0; } /** @@ -627,6 +586,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC; struct kbase_mmu_hw_op_param op_param; + int ret = 0; mutex_lock(&kbdev->mmu_hw_mutex); @@ -645,16 +605,20 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev, spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds); - kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); + ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); } else { mmu_hw_operation_begin(kbdev); - kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); + ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param); mmu_hw_operation_end(kbdev); } mutex_unlock(&kbdev->mmu_hw_mutex); + if (ret) + dev_err(kbdev->dev, + "Flush for GPU page fault due to write access did not complete"); + kbase_mmu_hw_enable_fault(kbdev, faulting_as, KBASE_MMU_FAULT_TYPE_PAGE); } @@ -869,17 +833,13 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, return false; } -#ifdef CONFIG_MALI_2MB_ALLOC - if (new_pages >= (SZ_2M / SZ_4K)) { + if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) { root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id]; *grow_2mb_pool = true; } else { -#endif root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id]; *grow_2mb_pool = false; -#ifdef CONFIG_MALI_2MB_ALLOC } -#endif if (region->gpu_alloc != region->cpu_alloc) new_pages *= 2; @@ -1128,22 +1088,22 @@ void kbase_mmu_page_fault_worker(struct work_struct *data) } page_fault_retry: -#ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - if (!prealloc_sas[i]) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); - + if (kbdev->pagesize_2mb) { + /* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { if (!prealloc_sas[i]) { - kbase_mmu_report_fault_and_kill( - kctx, faulting_as, - "Failed pre-allocating memory for sub-allocations' metadata", - fault); - goto fault_done; + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + + if (!prealloc_sas[i]) { + kbase_mmu_report_fault_and_kill( + kctx, faulting_as, + "Failed pre-allocating memory for sub-allocations' metadata", + fault); + goto fault_done; + } } } } -#endif /* CONFIG_MALI_2MB_ALLOC */ /* so we have a translation fault, * let's see if it is for growable memory @@ -1457,8 +1417,7 @@ page_fault_retry: * Otherwise fail the allocation. */ if (pages_to_grow > 0) { -#ifdef CONFIG_MALI_2MB_ALLOC - if (grow_2mb_pool) { + if (kbdev->pagesize_2mb && grow_2mb_pool) { /* Round page requirement up to nearest 2 MB */ struct kbase_mem_pool *const lp_mem_pool = &kctx->mem_pools.large[ @@ -1469,18 +1428,15 @@ page_fault_retry: >> lp_mem_pool->order; ret = kbase_mem_pool_grow(lp_mem_pool, - pages_to_grow); + pages_to_grow, kctx->task); } else { -#endif struct kbase_mem_pool *const mem_pool = &kctx->mem_pools.small[ region->gpu_alloc->group_id]; ret = kbase_mem_pool_grow(mem_pool, - pages_to_grow); -#ifdef CONFIG_MALI_2MB_ALLOC + pages_to_grow, kctx->task); } -#endif } if (ret < 0) { /* failed to extend, handle as a normal PF */ @@ -1570,15 +1526,24 @@ alloc_free: return KBASE_MMU_INVALID_PGD_ADDRESS; } -/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the - * new table from the pool if needed and possible +/** + * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1 + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @pgd: Physical addresse of level N page directory. + * @vpfn: The virtual page frame number. + * @level: The level of MMU page table (N). + * + * Return: + * * 0 - OK + * * -EFAULT - level N+1 PGD does not exist + * * -EINVAL - kmap() failed for level N PGD PFN */ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd, - u64 *dirty_pgds) + phys_addr_t *pgd, u64 vpfn, int level) { u64 *page; - u64 pgd_vpfn = vpfn; phys_addr_t target_pgd; struct page *p; @@ -1594,67 +1559,15 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * p = pfn_to_page(PFN_DOWN(*pgd)); page = kmap(p); if (page == NULL) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); + dev_err(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; } if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { - unsigned int current_valid_entries; - u64 managed_pte; - - target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) { - dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__); - kunmap(p); - return -ENOMEM; - } - - current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page); - kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd); - page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( - kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte); - kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1); - - /* Rely on the caller to update the address space flags. */ - if (newly_created_pgd && !*newly_created_pgd) { - *newly_created_pgd = true; - if (dirty_pgds) - *dirty_pgds |= 1ULL << level; - } - - /* A new valid entry is added to an existing PGD. Perform the - * invalidate operation for GPU cache as it could be having a - * cacheline that contains the entry (in an invalid form). - * Even if the parent PGD was newly created, invalidation of - * GPU cache is still needed. For explanation, please refer - * the comment in kbase_mmu_insert_pages_no_flush(). - */ - kbase_mmu_sync_pgd(kbdev, mmut->kctx, - *pgd + (vpfn * sizeof(u64)), - kbase_dma_addr(p) + (vpfn * sizeof(u64)), - sizeof(u64), KBASE_MMU_OP_FLUSH_PT); - - /* Update the new target_pgd page to its stable state */ - if (kbase_page_migration_enabled) { - struct kbase_page_metadata *page_md = - kbase_page_private(phys_to_page(target_pgd)); - - spin_lock(&page_md->migrate_lock); - - WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || - IS_PAGE_ISOLATED(page_md->status)); - - if (mmut->kctx) { - page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); - page_md->data.pt_mapped.mmut = mmut; - page_md->data.pt_mapped.pgd_vpfn_level = - PGD_VPFN_LEVEL_SET(pgd_vpfn, level); - } else { - page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); - } - - spin_unlock(&page_md->migrate_lock); - } + dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, + vpfn); + kunmap(p); + return -EFAULT; } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( @@ -1667,12 +1580,69 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * return 0; } +/** + * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @vpfn: The virtual page frame number. + * @in_level: The level of MMU page table (N). + * @out_level: Set to the level of the lowest valid PGD found on success. + * Invalid on error. + * @out_pgd: Set to the lowest valid PGD found on success. + * Invalid on error. + * + * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or + * closest to in_level + * + * Terminology: + * Level-0 = Top-level = highest + * Level-3 = Bottom-level = lowest + * + * Return: + * * 0 - OK + * * -EINVAL - kmap() failed during page table walk. + */ +static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd) +{ + phys_addr_t pgd; + int l; + int err = 0; + + lockdep_assert_held(&mmut->mmu_lock); + pgd = mmut->pgd; + + for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) { + err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); + + /* Handle failure condition */ + if (err) { + dev_dbg(kbdev->dev, + "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", + __func__, l + 1); + break; + } + } + + *out_pgd = pgd; + *out_level = l; + + /* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid. + * This implies that we have found the lowest valid pgd. Reset the error code. + */ + if (err == -EFAULT) + err = 0; + + return err; +} + /* - * Returns the PGD for the specified level of translation + * On success, sets out_pgd to the PGD for the specified level of translation + * Returns -EFAULT if a valid PGD is not found */ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - int level, phys_addr_t *out_pgd, bool *newly_created_pgd, - u64 *dirty_pgds) + int level, phys_addr_t *out_pgd) { phys_addr_t pgd; int l; @@ -1681,12 +1651,12 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab pgd = mmut->pgd; for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) { - int err = - mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds); + int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l); /* Handle failure condition */ if (err) { - dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__, - l); + dev_err(kbdev->dev, + "%s: mmu_get_next_pgd() failed to find a valid pgd at level %d", + __func__, l + 1); return err; } } @@ -1696,17 +1666,9 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab return 0; } -static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds) -{ - return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd, - newly_created_pgd, dirty_pgds); -} - static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 from_vpfn, u64 to_vpfn, u64 *dirty_pgds, - struct list_head *free_pgds_list, struct tagged_addr *phys, bool ignore_page_migration) { u64 vpfn = from_vpfn; @@ -1719,6 +1681,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, lockdep_assert_held(&mmut->mmu_lock); mmu_mode = kbdev->mmu_mode; + kbase_mmu_reset_free_pgds_list(mmut); while (vpfn < to_vpfn) { unsigned int idx = vpfn & 0x1FF; @@ -1779,11 +1742,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, if (!num_of_valid_entries) { kunmap(p); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - KBASE_MMU_OP_NONE, dirty_pgds, - free_pgds_list); + KBASE_MMU_OP_NONE, dirty_pgds); vpfn += count; continue; } @@ -1863,34 +1825,209 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param); } -/* - * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn' +/** + * update_parent_pgds() - Updates the page table from bottom level towards + * the top level to insert a new ATE + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @cur_level: The level of MMU page table where the ATE needs to be added. + * The bottom PGD level. + * @insert_level: The level of MMU page table where the chain of newly allocated + * PGDs needs to be linked-in/inserted. + * The top-most PDG level to be updated. + * @insert_vpfn: The virtual page frame number for the ATE. + * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains + * the physical addresses of newly allocated PGDs from index + * insert_level+1 to cur_level, and an existing PGD at index + * insert_level. + * + * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD + * at insert_level which already exists in the MMU Page Tables.Migration status is also + * updated for all the newly allocated PGD pages. + * + * Return: + * * 0 - OK + * * -EFAULT - level N+1 PGD does not exist + * * -EINVAL - kmap() failed for level N PGD PFN + */ +static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + int cur_level, int insert_level, u64 insert_vpfn, + phys_addr_t *pgds_to_insert) +{ + int pgd_index; + int err = 0; + + /* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1) + * Loop runs from the bottom-most to the top-most level so that all entries in the chain + * are valid when they are inserted into the MMU Page table via the insert_level PGD. + */ + for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) { + int parent_index = pgd_index - 1; + phys_addr_t parent_pgd = pgds_to_insert[parent_index]; + unsigned int current_valid_entries; + u64 pte; + phys_addr_t target_pgd = pgds_to_insert[pgd_index]; + u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF; + struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd)); + u64 *parent_page_va; + + if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) { + err = -EFAULT; + goto failure_recovery; + } + + parent_page_va = kmap(parent_page); + if (unlikely(parent_page_va == NULL)) { + dev_err(kbdev->dev, "%s: kmap failure", __func__); + err = -EINVAL; + goto failure_recovery; + } + + current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va); + + kbdev->mmu_mode->entry_set_pte(&pte, target_pgd); + parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( + kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); + kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); + kunmap(parent_page); + + if (parent_index != insert_level) { + /* Newly allocated PGDs */ + kbase_mmu_sync_pgd_cpu( + kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), + sizeof(u64)); + } else { + /* A new valid entry is added to an existing PGD. Perform the + * invalidate operation for GPU cache as it could be having a + * cacheline that contains the entry (in an invalid form). + */ + kbase_mmu_sync_pgd( + kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)), + kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)), + sizeof(u64), KBASE_MMU_OP_FLUSH_PT); + } + + /* Update the new target_pgd page to its stable state */ + if (kbase_page_migration_enabled) { + struct kbase_page_metadata *page_md = + kbase_page_private(phys_to_page(target_pgd)); + + spin_lock(&page_md->migrate_lock); + + WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS || + IS_PAGE_ISOLATED(page_md->status)); + + if (mmut->kctx) { + page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED); + page_md->data.pt_mapped.mmut = mmut; + page_md->data.pt_mapped.pgd_vpfn_level = + PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index); + } else { + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + } + + spin_unlock(&page_md->migrate_lock); + } + } + + return 0; + +failure_recovery: + /* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */ + for (; pgd_index < cur_level; pgd_index++) { + phys_addr_t pgd = pgds_to_insert[pgd_index]; + struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); + u64 *pgd_page_va = kmap(pgd_page); + u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; + + kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); + kunmap(pgd_page); + } + + return err; +} + +/** + * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to + * level_high (inclusive) + * + * @kbdev: Device pointer. + * @mmut: GPU MMU page table. + * @level_low: The lower bound for the levels for which the PGD allocs are required + * @level_high: The higher bound for the levels for which the PGD allocs are required + * @new_pgds: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the + * newly allocated PGD addresses to. + * + * Numerically, level_low < level_high, not to be confused with top level and + * bottom level concepts for MMU PGDs. They are only used as low and high bounds + * in an incrementing for-loop. + * + * Return: + * * 0 - OK + * * -ENOMEM - allocation failed for a PGD. */ -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info) +static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + phys_addr_t *new_pgds, int level_low, int level_high) +{ + int err = 0; + int i; + + lockdep_assert_held(&mmut->mmu_lock); + + for (i = level_low; i <= level_high; i++) { + do { + new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut); + if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + break; + + mutex_unlock(&mmut->mmu_lock); + err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id], + level_high, NULL); + mutex_lock(&mmut->mmu_lock); + if (err) { + dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d", + __func__, err); + + /* Free all PGDs allocated in previous successful iterations + * from (i-1) to level_low + */ + for (i = (i - 1); i >= level_low; i--) { + if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]); + } + + return err; + } + } while (1); + } + + return 0; +} + +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; - /* In case the insert_single_page only partially completes - * we need to be able to recover - */ - bool recover_required = false; - u64 start_vpfn = vpfn; - size_t recover_count = 0; + u64 insert_vpfn = start_vpfn; size_t remain = nr; int err; struct kbase_device *kbdev; - enum kbase_mmu_op_type flush_op; u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); + unsigned int i; + phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + enum kbase_mmu_op_type flush_op; + struct kbase_mmu_table *mmut = &kctx->mmu; + int l, cur_level, insert_level; if (WARN_ON(kctx == NULL)) return -EINVAL; /* 64-bit address range is the max */ - KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE)); + KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE)); kbdev = kctx->kbdev; @@ -1901,7 +2038,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, /* If page migration is enabled, pages involved in multiple GPU mappings * are always treated as not movable. */ - if (kbase_page_migration_enabled) { + if (kbase_page_migration_enabled && !ignore_page_migration) { struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -1912,12 +2049,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, } } - mutex_lock(&kctx->mmu.mmu_lock); + mutex_lock(&mmut->mmu_lock); while (remain) { - unsigned int i; - unsigned int index = vpfn & 0x1FF; - unsigned int count = KBASE_MMU_PAGE_ENTRIES - index; + unsigned int vindex = insert_vpfn & 0x1FF; + unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; register unsigned int num_of_valid_entries; bool newly_created_pgd = false; @@ -1925,64 +2061,61 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, if (count > remain) count = remain; + cur_level = MIDGARD_MMU_BOTTOMLEVEL; + insert_level = cur_level; + /* - * Repeatedly calling mmu_get_bottom_pgd() is clearly + * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree * each time (just cache the l0-l2 sequence). * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd, - &dirty_pgds); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&kctx->mmu.mmu_lock); - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[ - kctx->mmu.group_id], - MIDGARD_MMU_BOTTOMLEVEL); - mutex_lock(&kctx->mmu.mmu_lock); - } while (!err); + /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ + err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, + &pgd); + if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__); - if (recover_required) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, - start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list, - NULL, true); - } + dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", + __func__, err); goto fail_unlock; } + /* No valid pgd at cur_level */ + if (insert_level != cur_level) { + /* Allocate new pgds for all missing levels from the required level + * down to the lowest valid pgd at insert_level + */ + err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), + cur_level); + if (err) + goto fail_unlock; + + newly_created_pgd = true; + + new_pgds[insert_level] = pgd; + + /* If we didn't find an existing valid pgd at cur_level, + * we've now allocated one. The ATE in the next step should + * be inserted in this newly allocated pgd. + */ + pgd = new_pgds[cur_level]; + } + p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); - if (recover_required) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn, - start_vpfn + recover_count, - &dirty_pgds, &free_pgds_list, - NULL, true); - } + dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; - goto fail_unlock; + + goto fail_unlock_free_pgds; } num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page); for (i = 0; i < count; i++) { - unsigned int ofs = index + i; + unsigned int ofs = vindex + i; /* Fail if the current page is a valid ATE entry */ KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL)); @@ -1994,50 +2127,87 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, kbdev->mmu_mode->set_num_valid_entries( pgd_page, num_of_valid_entries + count); - vpfn += count; - remain -= count; - - if (count > 0 && !newly_created_pgd) - dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL; + dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL); /* MMU cache flush operation here will depend on whether bottom level * PGD is newly created or not. * - * If bottom level PGD is newly created then no cache maintenance is + * If bottom level PGD is newly created then no GPU cache maintenance is * required as the PGD will not exist in GPU cache. Otherwise GPU cache * maintenance is required for existing PGD. */ flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; - kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)), - kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64), + kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)), + kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), flush_op); + if (newly_created_pgd) { + err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, + new_pgds); + if (err) { + dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", + __func__, err); + + kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); + + kunmap(p); + goto fail_unlock_free_pgds; + } + } + + insert_vpfn += count; + remain -= count; kunmap(p); - /* We have started modifying the page table. - * If further pages need inserting and fail we need to undo what - * has already taken place - */ - recover_required = true; - recover_count += count; } - mutex_unlock(&kctx->mmu.mmu_lock); - mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info, false); + mutex_unlock(&mmut->mmu_lock); + + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, + false); return 0; +fail_unlock_free_pgds: + /* Free the pgds allocated by us from insert_level+1 to bottom level */ + for (l = cur_level; l > insert_level; l--) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + fail_unlock: - mutex_unlock(&kctx->mmu.mmu_lock); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially completed */ + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds, + NULL, true); + } - mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds, - mmu_sync_info, true); - kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list); + mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info, + true); + kbase_mmu_free_pgds_list(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); return err; } +int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ + return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, + false); +} + +int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info) +{ + /* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */ + return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info, + false); +} + static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, struct kbase_va_region *reg, struct kbase_mmu_table *mmut, const u64 vpfn) @@ -2139,7 +2309,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu size_t remain = nr; int err; struct kbase_mmu_mode const *mmu_mode; - LIST_HEAD(free_pgds_list); + unsigned int i; + phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1]; + int l, cur_level, insert_level; /* Note that 0 is a valid start_vpfn */ /* 64-bit address range is the max */ @@ -2154,13 +2326,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mutex_lock(&mmut->mmu_lock); while (remain) { - unsigned int i; unsigned int vindex = insert_vpfn & 0x1FF; unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex; struct page *p; - int cur_level; register unsigned int num_of_valid_entries; bool newly_created_pgd = false; + enum kbase_mmu_op_type flush_op; if (count > remain) count = remain; @@ -2170,57 +2341,53 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu else cur_level = MIDGARD_MMU_BOTTOMLEVEL; + insert_level = cur_level; + /* - * Repeatedly calling mmu_get_pgd_at_level() is clearly + * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly * suboptimal. We don't have to re-parse the whole tree * each time (just cache the l0-l2 sequence). * On the other hand, it's only a gain when we map more than * 256 pages at once (on average). Do we really care? */ - do { - err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd, - &newly_created_pgd, dirty_pgds); - if (err != -ENOMEM) - break; - /* Fill the memory pool with enough pages for - * the page walk to succeed - */ - mutex_unlock(&mmut->mmu_lock); - err = kbase_mem_pool_grow( - &kbdev->mem_pools.small[mmut->group_id], - cur_level); - mutex_lock(&mmut->mmu_lock); - } while (!err); + /* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */ + err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level, + &pgd); if (err) { - dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds, - &free_pgds_list, phys, - ignore_page_migration); - } + dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d", + __func__, err); goto fail_unlock; } + /* No valid pgd at cur_level */ + if (insert_level != cur_level) { + /* Allocate new pgds for all missing levels from the required level + * down to the lowest valid pgd at insert_level + */ + err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1), + cur_level); + if (err) + goto fail_unlock; + + newly_created_pgd = true; + + new_pgds[insert_level] = pgd; + + /* If we didn't find an existing valid pgd at cur_level, + * we've now allocated one. The ATE in the next step should + * be inserted in this newly allocated pgd. + */ + pgd = new_pgds[cur_level]; + } + p = pfn_to_page(PFN_DOWN(pgd)); pgd_page = kmap(p); if (!pgd_page) { - dev_warn(kbdev->dev, "%s: kmap failure", __func__); - if (insert_vpfn != start_vpfn) { - /* Invalidate the pages we have partially - * completed - */ - mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, - insert_vpfn, dirty_pgds, - &free_pgds_list, phys, - ignore_page_migration); - } + dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; - goto fail_unlock; + + goto fail_unlock_free_pgds; } num_of_valid_entries = @@ -2262,34 +2429,39 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries); - if (dirty_pgds && !newly_created_pgd) - *dirty_pgds |= 1ULL << cur_level; - - phys += count; - insert_vpfn += count; - remain -= count; + if (dirty_pgds) + *dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level); - /* Even if mmu_get_pgd_at_level() allocated a new bottom level - * table page, the invalidation of L2 cache is still needed for - * for the valid entries written in that page. This is because a - * race can happen as soon as the entry of parent level table is - * updated to point to the page of bottom level table. - * GPU can try to access within the the same virtual range that - * is being mapped, before the valid entries of bottom level table - * page are flushed to the memory from the CPU's cache. And if that - * happens then the invalid entries from memory could get fetched - * into the L2 cache and so those entries won't be affected by the - * MMU TLB invalidation done by sending the UNLOCK command. - * If the memory is growable then this could result in unexpected - * page faults happening repeatedly, until the invalid entry is - * evicted from the L2 cache, as Driver would consider the page - * faults for mapped memory as duplicate and won't take any action - * effectively. + /* MMU cache flush operation here will depend on whether bottom level + * PGD is newly created or not. + * + * If bottom level PGD is newly created then no GPU cache maintenance is + * required as the PGD will not exist in GPU cache. Otherwise GPU cache + * maintenance is required for existing PGD. */ + flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT; + kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)), kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64), - KBASE_MMU_OP_FLUSH_PT); + flush_op); + + if (newly_created_pgd) { + err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn, + new_pgds); + if (err) { + dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)", + __func__, err); + kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); + + kunmap(p); + goto fail_unlock_free_pgds; + } + } + + phys += count; + insert_vpfn += count; + remain -= count; kunmap(p); } @@ -2297,12 +2469,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu return 0; +fail_unlock_free_pgds: + /* Free the pgds allocated by us from insert_level+1 to bottom level */ + for (l = cur_level; l > insert_level; l--) + kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]); + fail_unlock: - mutex_unlock(&mmut->mmu_lock); + if (insert_vpfn != start_vpfn) { + /* Invalidate the pages we have partially completed */ + mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds, + phys, ignore_page_migration); + } mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true); - kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + kbase_mmu_free_pgds_list(kbdev, mmut); + mutex_unlock(&mmut->mmu_lock); return err; } @@ -2318,7 +2500,6 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m { int err; u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); /* Early out if there is nothing to do */ if (nr == 0) @@ -2336,58 +2517,56 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -/** - * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches - * without retaining the kbase context. - * @kctx: The KBase context. - * @vpfn: The virtual page frame number to start the flush on. - * @nr: The number of pages to flush. - * - * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any - * other locking. - */ -static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr) +int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) { - struct kbase_device *kbdev = kctx->kbdev; int err; - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. + u64 dirty_pgds = 0; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + /* Imported allocations don't have metadata and therefore always ignore the + * page migration logic. */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - struct kbase_mmu_hw_op_param op_param; + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds, reg, true); + if (err) + return err; - lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); + + return 0; +} + +int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) +{ + int err; + u64 dirty_pgds = 0; /* Early out if there is nothing to do */ if (nr == 0) - return; + return 0; - /* flush L2 and unlock the VA (resumes the MMU) */ - op_param.vpfn = vpfn; - op_param.nr = nr; - op_param.op = KBASE_MMU_OP_FLUSH_MEM; - op_param.kctx_id = kctx->id; - op_param.mmu_sync_info = mmu_sync_info; - if (mmu_flush_cache_on_gpu_ctrl(kbdev)) { - /* Value used to prevent skipping of any levels when flushing */ - op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); - err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr], - &op_param); - } else { - err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr], - &op_param); - } + /* Memory aliases are always built on top of existing allocations, + * therefore the state of physical pages shall be updated. + */ + err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, + &dirty_pgds, reg, false); + if (err) + return err; - if (err) { - /* Flush failed to complete, assume the - * GPU has hung and perform a reset to recover - */ - dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover"); + mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false); - if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) - kbase_reset_gpu_locked(kbdev); - } + return 0; } void kbase_mmu_update(struct kbase_device *kbdev, @@ -2412,6 +2591,14 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr) void kbase_mmu_disable(struct kbase_context *kctx) { + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + struct kbase_device *kbdev = kctx->kbdev; + struct kbase_mmu_hw_op_param op_param = { 0 }; + int lock_err, flush_err; + /* ASSERT that the context has a valid as_nr, which is only the case * when it's scheduled in. * @@ -2422,16 +2609,49 @@ void kbase_mmu_disable(struct kbase_context *kctx) lockdep_assert_held(&kctx->kbdev->hwaccess_lock); lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex); - /* - * The address space is being disabled, drain all knowledge of it out - * from the caches as pages and page tables might be freed after this. - * - * The job scheduler code will already be holding the locks and context - * so just do the flush. + op_param.vpfn = 0; + op_param.nr = ~0; + op_param.op = KBASE_MMU_OP_FLUSH_MEM; + op_param.kctx_id = kctx->id; + op_param.mmu_sync_info = mmu_sync_info; + +#if MALI_USE_CSF + /* 0xF value used to prevent skipping of any levels when flushing */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev)) + op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF); +#endif + + /* lock MMU to prevent existing jobs on GPU from executing while the AS is + * not yet disabled + */ + lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid, + kctx->id); + + /* Issue the flush command only when L2 cache is in stable power on state. + * Any other state for L2 cache implies that shader cores are powered off, + * which in turn implies there is no execution happening on the GPU. */ - kbase_mmu_flush_noretain(kctx, 0, ~0); + if (kbdev->pm.backend.l2_state == KBASE_L2_ON) { + flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev, + GPU_COMMAND_CACHE_CLN_INV_L2_LSC); + if (flush_err) + dev_err(kbdev->dev, + "Failed to flush GPU cache when disabling AS %d for ctx %d_%d", + kctx->as_nr, kctx->tgid, kctx->id); + } + kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr); + + if (!lock_err) { + /* unlock the MMU to allow it to resume */ + lock_err = + kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param); + if (lock_err) + dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr, + kctx->tgid, kctx->id); + } - kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr); #if !MALI_USE_CSF /* * JM GPUs has some L1 read only caches that need to be invalidated @@ -2439,7 +2659,7 @@ void kbase_mmu_disable(struct kbase_context *kctx) * the slot_rb tracking field so such invalidation is performed when * a new katom is executed on the affected slots. */ - kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx); + kbase_backend_slot_kctx_purge_locked(kbdev, kctx); #endif } KBASE_EXPORT_TEST_API(kbase_mmu_disable); @@ -2447,8 +2667,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable); static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t *pgds, u64 vpfn, int level, - enum kbase_mmu_op_type flush_op, u64 *dirty_pgds, - struct list_head *free_pgds_list) + enum kbase_mmu_op_type flush_op, u64 *dirty_pgds) { int current_level; @@ -2480,7 +2699,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_pgd + (index * sizeof(u64)), sizeof(u64), flush_op); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); } else { current_valid_entries--; @@ -2500,13 +2719,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, /** * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages. * - * @kbdev: Pointer to kbase device. - * @kctx: Pointer to kbase context. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @phys: Array of physical pages to flush. - * @op_param: Non-NULL pointer to struct containing information about the flush - * operation to perform. + * @kbdev: Pointer to kbase device. + * @kctx: Pointer to kbase context. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @phys: Array of physical pages to flush. + * @phys_page_nr: Number of physical pages to flush. + * @op_param: Non-NULL pointer to struct containing information about the flush + * operation to perform. * * This function will do one of three things: * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the @@ -2514,10 +2734,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is * supported on GPU or, * 3. Perform a full GPU cache flush through the MMU_CONTROL interface. + * + * When performing a partial GPU cache flush, the number of physical + * pages does not have to be identical to the number of virtual pages on the MMU, + * to support a single physical address flush for an aliased page. */ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr, - struct tagged_addr *phys, + struct tagged_addr *phys, size_t phys_page_nr, struct kbase_mmu_hw_op_param *op_param) { if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) { @@ -2536,7 +2760,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, mmu_invalidate(kbdev, kctx, as_nr, op_param); - for (i = 0; !flush_done && i < op_param->nr; i++) { + for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, @@ -2549,76 +2773,15 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, #endif } -/** - * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table - * - * @kbdev: Pointer to kbase device. - * @mmut: Pointer to GPU MMU page table. - * @vpfn: Start page frame number of the GPU virtual pages to unmap. - * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is used for GPU cache maintenance - * and page migration support. - * @nr: Number of pages to unmap. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @ignore_page_migration: Whether page migration metadata should be ignored. - * - * We actually discard the ATE and free the page table pages if no valid entries - * exist in PGD. - * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. - * - * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, - * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, - * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches - * instead of specific physical address ranges. - * - * Return: 0 on success, otherwise an error code. - */ -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr, - bool ignore_page_migration) +static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, size_t nr, u64 *dirty_pgds, + struct list_head *free_pgds_list, + enum kbase_mmu_op_type flush_op) { - const size_t requested_nr = nr; - u64 start_vpfn = vpfn; - enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; - struct kbase_mmu_mode const *mmu_mode; - struct kbase_mmu_hw_op_param op_param; - int err = -EFAULT; - u64 dirty_pgds = 0; - LIST_HEAD(free_pgds_list); - - /* Calls to this function are inherently asynchronous, with respect to - * MMU operations. - */ - const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; - - if (nr == 0) { - /* early out if nothing to do */ - return 0; - } - - /* MMU cache flush strategy depends on the number of pages to unmap. In both cases - * the operation is invalidate but the granularity of cache maintenance may change - * according to the situation. - * - * If GPU control command operations are present and the number of pages is "small", - * then the optimal strategy is flushing on the physical address range of the pages - * which are affected by the operation. That implies both the PGDs which are modified - * or removed from the page table and the physical pages which are freed from memory. - * - * Otherwise, there's no alternative to invalidating the whole GPU cache. - */ - if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) - flush_op = KBASE_MMU_OP_FLUSH_PT; - - mutex_lock(&mmut->mmu_lock); + struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; - mmu_mode = kbdev->mmu_mode; + lockdep_assert_held(&mmut->mmu_lock); + kbase_mmu_reset_free_pgds_list(mmut); while (nr) { unsigned int index = vpfn & 0x1FF; @@ -2703,7 +2866,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table } if (pcount > 0) - dirty_pgds |= 1ULL << level; + *dirty_pgds |= 1ULL << level; num_of_valid_entries = mmu_mode->get_num_valid_entries(page); if (WARN_ON_ONCE(num_of_valid_entries < pcount)) @@ -2725,11 +2888,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table pgd + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); - kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list); + kbase_mmu_add_to_free_pgds_list(mmut, p); kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level, - flush_op, &dirty_pgds, - &free_pgds_list); + flush_op, dirty_pgds); vpfn += count; nr -= count; @@ -2746,19 +2908,77 @@ next: vpfn += count; nr -= count; } - err = 0; out: + return 0; +} + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration) +{ + u64 start_vpfn = vpfn; + enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; + struct kbase_mmu_hw_op_param op_param; + int err = -EFAULT; + u64 dirty_pgds = 0; + LIST_HEAD(free_pgds_list); + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + /* This function performs two operations: MMU maintenance and flushing + * the caches. To ensure internal consistency between the caches and the + * MMU, it does not make sense to be able to flush only the physical pages + * from the cache and keep the PTE, nor does it make sense to use this + * function to remove a PTE and keep the physical pages in the cache. + * + * However, we have legitimate cases where we can try to tear down a mapping + * with zero virtual and zero physical pages, so we must have the following + * behaviour: + * - if both physical and virtual page counts are zero, return early + * - if either physical and virtual page counts are zero, return early + * - if there are fewer physical pages than virtual pages, return -EINVAL + */ + if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0)) + return 0; + + if (unlikely(nr_virt_pages < nr_phys_pages)) + return -EINVAL; + + /* MMU cache flush strategy depends on the number of pages to unmap. In both cases + * the operation is invalidate but the granularity of cache maintenance may change + * according to the situation. + * + * If GPU control command operations are present and the number of pages is "small", + * then the optimal strategy is flushing on the physical address range of the pages + * which are affected by the operation. That implies both the PGDs which are modified + * or removed from the page table and the physical pages which are freed from memory. + * + * Otherwise, there's no alternative to invalidating the whole GPU cache. + */ + if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && + nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES) + flush_op = KBASE_MMU_OP_FLUSH_PT; + + mutex_lock(&mmut->mmu_lock); + + err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds, + &free_pgds_list, flush_op); + /* Set up MMU operation parameters. See above about MMU cache flush strategy. */ op_param = (struct kbase_mmu_hw_op_param){ .vpfn = start_vpfn, - .nr = requested_nr, + .nr = nr_virt_pages, .mmu_sync_info = mmu_sync_info, .kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF, .op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT : KBASE_MMU_OP_FLUSH_MEM, .flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds), }; - mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param); + mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages, + &op_param); /* If page migration is enabled: the status of all physical pages involved * shall be updated, unless they are not movable. Their status shall be @@ -2766,15 +2986,14 @@ out: * requests to migrate the pages, if they have been isolated. */ if (kbase_page_migration_enabled && phys && !ignore_page_migration) - kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr); + kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); - mutex_unlock(&mmut->mmu_lock); + kbase_mmu_free_pgds_list(kbdev, mmut); - kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list); + mutex_unlock(&mmut->mmu_lock); return err; } - KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); /** @@ -2834,7 +3053,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb if (is_huge(*phys) && (index == index_in_large_page(*phys))) cur_level = MIDGARD_MMU_LEVEL(2); - err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds); + err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd); if (WARN_ON(err)) goto fail_unlock; @@ -3119,9 +3338,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p } } - ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL); + ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd); if (ret) { - dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__); + dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__); goto get_pgd_at_level_error; } @@ -3167,10 +3386,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p if (ret < 0) { mutex_unlock(&kbdev->mmu_hw_mutex); mutex_unlock(&kbdev->pm.lock); - dev_err(kbdev->dev, - "%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.", - __func__); - goto gpu_reset; + dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); + goto undo_mappings; } /* Copy memory content. @@ -3270,7 +3487,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p /* Checking the final migration transaction error state */ if (ret < 0) { dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__); - goto gpu_reset; + goto undo_mappings; } /* Undertaking metadata transfer, while we are holding the mmu_lock */ @@ -3305,19 +3522,13 @@ new_page_map_error: old_page_map_error: return ret; -gpu_reset: - /* Unlock the MMU table before resetting the GPU and undo - * mappings. - */ +undo_mappings: + /* Unlock the MMU table and undo mappings. */ mutex_unlock(&mmut->mmu_lock); kunmap(phys_to_page(pgd)); kunmap(as_page(new_phys)); kunmap(as_page(old_phys)); - /* Reset the GPU because of an unrecoverable error in locking or flushing. */ - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - return ret; } @@ -3329,7 +3540,6 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev; struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode; u64 *pgd_page_buffer = NULL; - bool page_is_isolated = false; struct page *p = phys_to_page(pgd); lockdep_assert_held(&mmut->mmu_lock); @@ -3342,7 +3552,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl /* Copy the page to our preallocated buffer so that we can minimize * kmap_atomic usage */ - pgd_page_buffer = mmut->mmu_teardown_pages[level]; + pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level]; memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE); } @@ -3370,41 +3580,27 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl } } - /* Top level PGD page is excluded from migration process. */ - if (level != MIDGARD_MMU_TOPLEVEL) - page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p); - - if (likely(!page_is_isolated)) - kbase_mmu_free_pgd(kbdev, mmut, pgd); + kbase_mmu_free_pgd(kbdev, mmut, pgd); } int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, int const group_id) { - int level; - if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) || WARN_ON(group_id < 0)) return -EINVAL; + compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)), + "List of free PGDs may not be large enough."); + compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL, + "Array of MMU levels is not large enough."); + mmut->group_id = group_id; mutex_init(&mmut->mmu_lock); mmut->kctx = kctx; mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS; - /* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */ - for (level = MIDGARD_MMU_TOPLEVEL; - level < MIDGARD_MMU_BOTTOMLEVEL; level++) { - mmut->mmu_teardown_pages[level] = - kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (!mmut->mmu_teardown_pages[level]) { - kbase_mmu_term(kbdev, mmut); - return -ENOMEM; - } - } - /* We allocate pages into the kbdev memory pool, then * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to * avoid allocations from the kernel happening with the lock held. @@ -3414,7 +3610,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev, err = kbase_mem_pool_grow( &kbdev->mem_pools.small[mmut->group_id], - MIDGARD_MMU_BOTTOMLEVEL); + MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL); if (err) { kbase_mmu_term(kbdev, mmut); return -ENOMEM; @@ -3430,8 +3626,6 @@ int kbase_mmu_init(struct kbase_device *const kbdev, void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) { - int level; - WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID), "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables", mmut->kctx->tgid, mmut->kctx->id); @@ -3445,13 +3639,6 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut) KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0); } - for (level = MIDGARD_MMU_TOPLEVEL; - level < MIDGARD_MMU_BOTTOMLEVEL; level++) { - if (!mmut->mmu_teardown_pages[level]) - break; - kfree(mmut->mmu_teardown_pages[level]); - } - mutex_destroy(&mmut->mmu_lock); } diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 2b3e6c0..49b42e0 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -152,21 +152,71 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, struct tagged_addr phy, unsigned long flags, int level, int group_id); int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int group_id, u64 *dirty_pgds, struct kbase_va_region *reg, bool ignore_page_migration); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, struct kbase_va_region *reg, bool ignore_page_migration); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, - struct tagged_addr phys, size_t nr, - unsigned long flags, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info); +int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); +int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); +int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, + size_t nr, unsigned long flags, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration); +int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); +int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info); +/** + * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table + * + * @kbdev: Pointer to kbase device. + * @mmut: Pointer to GPU MMU page table. + * @vpfn: Start page frame number of the GPU virtual pages to unmap. + * @phys: Array of physical pages currently mapped to the virtual + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. + * @nr_phys_pages: Number of physical pages to flush. + * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. + * + * We actually discard the ATE and free the page table pages if no valid entries + * exist in PGD. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + * + * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, + * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, + * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches + * instead of specific physical address ranges. + * + * Return: 0 on success, otherwise an error code. + */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr, int as_nr, - bool ignore_page_migration); + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration); + int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int const group_id); diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index 68bc697..1a892dc 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -24,6 +24,7 @@ #include <mali_kbase.h> #include <mali_kbase_ctx_sched.h> #include <mali_kbase_mem.h> +#include <mali_kbase_reset_gpu.h> #include <mmu/mali_kbase_mmu_hw.h> #include <tl/mali_kbase_tracepoints.h> #include <linux/delay.h> @@ -156,37 +157,60 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, return 0; } -static int wait_ready(struct kbase_device *kbdev, - unsigned int as_nr) +/** + * wait_ready() - Wait for previously issued MMU command to complete. + * + * @kbdev: Kbase device to wait for a MMU command to complete. + * @as_nr: Address space to wait for a MMU command to complete. + * + * Reset GPU if the wait for previously issued command fails. + * + * Return: 0 on successful completion. negative error on failure. + */ +static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { - u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + s64 diff; - /* Wait for the MMU status to indicate there is no active command. */ - while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & - AS_STATUS_AS_ACTIVE) { - ; - } + if (unlikely(kbdev->as[as_nr].is_unresponsive)) + return -EBUSY; - if (WARN_ON_ONCE(max_loops == 0)) { - dev_err(kbdev->dev, - "AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector", - as_nr); - return -1; - } + do { + unsigned int i; - return 0; + for (i = 0; i < 1000; i++) { + /* Wait for the MMU status to indicate there is no active command */ + if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & + AS_STATUS_AS_ACTIVE)) + return 0; + } + + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while (diff < mmu_as_inactive_wait_time_ms); + + dev_err(kbdev->dev, + "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", + as_nr); + kbdev->as[as_nr].is_unresponsive = true; + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + return -ETIMEDOUT; } static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) { - int status; - /* write AS_COMMAND when MMU is ready to accept another command */ - status = wait_ready(kbdev, as_nr); - if (status == 0) + const int status = wait_ready(kbdev, as_nr); + + if (likely(status == 0)) kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); - else { + else if (status == -EBUSY) { + dev_dbg(kbdev->dev, + "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", + as_nr, cmd); + } else { dev_err(kbdev->dev, "Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u", as_nr, cmd); @@ -259,17 +283,21 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c /* Wait for the LOCK MMU command to complete, issued by the caller */ ret = wait_ready(kbdev, as_nr); - if (ret) + if (unlikely(ret)) return ret; ret = kbase_gpu_cache_flush_and_busy_wait(kbdev, GPU_COMMAND_CACHE_CLN_INV_LSC); - if (ret) + if (unlikely(ret)) return ret; ret = wait_cores_power_trans_complete(kbdev); - if (ret) + if (unlikely(ret)) { + if (kbase_prepare_to_reset_gpu_locked(kbdev, + RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); return ret; + } /* As LSC is guaranteed to have been flushed we can use FLUSH_PT * MMU command to only flush the L2. @@ -397,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param); - if (!ret) - write_cmd(kbdev, as->number, AS_COMMAND_LOCK); + if (likely(!ret)) + ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK); return ret; } +/** + * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to a struct containing information about the MMU operation. + * + * Return: 0 if issuing the LOCK command was successful, otherwise an error code. + */ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param) { @@ -443,10 +480,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK); /* Wait for UNLOCK command to complete */ - if (!ret) + if (likely(!ret)) ret = wait_ready(kbdev, as->number); - if (!ret) { + if (likely(!ret)) { u64 lock_addr = 0x0; /* read MMU_AS_CONTROL.LOCKADDR register */ lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) @@ -478,6 +515,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as, return ret; } +/** + * mmu_hw_do_flush - Flush MMU and wait for its completion. + * + * @kbdev: Kbase device to issue the MMU operation on. + * @as: Address space to issue the MMU operation on. + * @op_param: Pointer to a struct containing information about the MMU operation. + * @hwaccess_locked: Flag to indicate if the lock has been held. + * + * Return: 0 if flushing MMU was successful, otherwise an error code. + */ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked) { @@ -508,12 +555,9 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, return ret; #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI) - /* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here - * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is - * supported, and this function doesn't gets called for the GPUs where - * FLUSH_MEM/PT command is deprecated. - */ - if (mmu_cmd == AS_COMMAND_FLUSH_MEM) { + /* WA for the BASE_HW_ISSUE_GPU2019_3901. */ + if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) && + mmu_cmd == AS_COMMAND_FLUSH_MEM) { if (!hwaccess_locked) { unsigned long flags = 0; @@ -529,12 +573,13 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, } #endif - write_cmd(kbdev, as->number, mmu_cmd); + ret = write_cmd(kbdev, as->number, mmu_cmd); /* Wait for the command to complete */ - ret = wait_ready(kbdev, as->number); + if (likely(!ret)) + ret = wait_ready(kbdev, as->number); - if (!ret) + if (likely(!ret)) mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr, op_param->mmu_sync_info); |