summaryrefslogtreecommitdiff
path: root/mali_kbase/mmu
diff options
context:
space:
mode:
authorVamsidhar reddy Gaddam <gvamsi@google.com>2024-04-10 17:14:26 +0000
committerVamsidhar reddy Gaddam <gvamsi@google.com>2024-04-10 17:14:26 +0000
commit157548df5743bfca8bba1bbd148453a7b7fabac1 (patch)
treea3efc8e2e4be8806e340670bb67af927c137c51f /mali_kbase/mmu
parent656ed97ecba51a1656d1f1deb5b0659ebf073a59 (diff)
parenta999cd8fd398aed7390c8e5d99795e9b735d6ba7 (diff)
downloadgpu-157548df5743bfca8bba1bbd148453a7b7fabac1.tar.gz
Bring in R48P0 KMD update along. Bug: 323316466 Test: go/pixel-gpu-kmd-r48p0 Change-Id: I32484fe3e881ec9ec60cfeac27befc84d15b7904 Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
Diffstat (limited to 'mali_kbase/mmu')
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c92
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c2
2 files changed, 68 insertions, 26 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 6bab554..8a5b92c 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -774,7 +774,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg
return false;
}
- if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+ if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
*grow_2mb_pool = true;
} else {
@@ -921,7 +921,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
int err;
bool grown = false;
size_t pages_to_grow;
- bool grow_2mb_pool;
+ bool grow_2mb_pool = false;
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
size_t current_backed_size;
@@ -1091,7 +1091,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
}
page_fault_retry:
- if (kbdev->pagesize_2mb) {
+ if (kbase_is_large_pages_enabled()) {
/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
if (!prealloc_sas[i]) {
@@ -1181,10 +1181,14 @@ page_fault_retry:
*/
op_param.mmu_sync_info = mmu_sync_info;
op_param.kctx_id = kctx->id;
- /* Can safely skip the invalidate for all levels in case
- * of duplicate page faults.
+ /* Usually it is safe to skip the MMU cache invalidate for all levels
+ * in case of duplicate page faults. But for the pathological scenario
+ * where the faulty VA gets mapped by the time page fault worker runs it
+ * becomes imperative to invalidate MMU cache for all levels, otherwise
+ * there is a possibility of repeated page faults on GPUs which supports
+ * fine grained MMU cache invalidation.
*/
- op_param.flush_skip_levels = 0xF;
+ op_param.flush_skip_levels = 0x0;
op_param.vpfn = fault_pfn;
op_param.nr = 1;
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1218,10 +1222,14 @@ page_fault_retry:
/* See comment [1] about UNLOCK usage */
op_param.mmu_sync_info = mmu_sync_info;
op_param.kctx_id = kctx->id;
- /* Can safely skip the invalidate for all levels in case
- * of duplicate page faults.
+ /* Usually it is safe to skip the MMU cache invalidate for all levels
+ * in case of duplicate page faults. But for the pathological scenario
+ * where the faulty VA gets mapped by the time page fault worker runs it
+ * becomes imperative to invalidate MMU cache for all levels, otherwise
+ * there is a possibility of repeated page faults on GPUs which supports
+ * fine grained MMU cache invalidation.
*/
- op_param.flush_skip_levels = 0xF;
+ op_param.flush_skip_levels = 0x0;
op_param.vpfn = fault_pfn;
op_param.nr = 1;
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1383,7 +1391,7 @@ page_fault_retry:
* Otherwise fail the allocation.
*/
if (pages_to_grow > 0) {
- if (kbdev->pagesize_2mb && grow_2mb_pool) {
+ if (kbase_is_large_pages_enabled() && grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
&kctx->mem_pools.large[group_id];
@@ -1597,6 +1605,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu
return err;
}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO);
/*
* On success, sets out_pgd to the PGD for the specified level of translation
@@ -1702,8 +1711,16 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
mmu_mode->entries_invalidate(&page[idx], pcount);
if (!num_of_valid_entries) {
+ mmu_mode->set_num_valid_entries(page, 0);
+
kbase_kunmap(p, page);
+ /* No CPU and GPU cache maintenance is done here as caller would do the
+ * complete flush of GPU cache and invalidation of TLB before the PGD
+ * page is freed. CPU cache flush would be done when the PGD page is
+ * returned to the memory pool.
+ */
+
kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
@@ -1730,7 +1747,8 @@ next:
* going to happen to these pages at this stage. They might return
* movable once they are returned to a memory pool.
*/
- if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys &&
+ !is_huge(*phys) && !is_partial(*phys)) {
const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE;
u64 i;
@@ -2645,6 +2663,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
}
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO);
int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
@@ -2702,6 +2721,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
return 0;
}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO);
#if !MALI_USE_CSF
/**
@@ -2918,15 +2938,25 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) {
+ kbdev->mmu_mode->set_num_valid_entries(current_page, 0);
+
kbase_kunmap(p, current_page);
- /* Ensure the cacheline containing the last valid entry
- * of PGD is invalidated from the GPU cache, before the
- * PGD page is freed.
- */
- kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
- current_pgd + (index * sizeof(u64)), sizeof(u64),
- flush_op);
+ /* Check if fine grained GPU cache maintenance is being used */
+ if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+ /* Ensure the invalidated PTE is visible in memory right away */
+ kbase_mmu_sync_pgd_cpu(kbdev,
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ sizeof(u64));
+ /* Invalidate the GPU cache for the whole PGD page and not just for
+ * the cacheline containing the invalidated PTE, as the PGD page is
+ * going to be freed. There is an extremely remote possibility that
+ * other cachelines (containing all invalid PTEs) of PGD page are
+ * also present in the GPU cache.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd,
+ 512 * sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+ }
kbase_mmu_add_to_free_pgds_list(mmut, p);
} else {
@@ -3108,14 +3138,25 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
mmu_mode->entries_invalidate(&page[index], pcount);
if (!num_of_valid_entries) {
+ mmu_mode->set_num_valid_entries(page, 0);
+
kbase_kunmap(p, page);
- /* Ensure the cacheline(s) containing the last valid entries
- * of PGD is invalidated from the GPU cache, before the
- * PGD page is freed.
- */
- kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
- pcount * sizeof(u64), flush_op);
+ /* Check if fine grained GPU cache maintenance is being used */
+ if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+ /* Ensure the invalidated ATEs are visible in memory right away */
+ kbase_mmu_sync_pgd_cpu(kbdev,
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ pcount * sizeof(u64));
+ /* Invalidate the GPU cache for the whole PGD page and not just for
+ * the cachelines containing the invalidated ATEs, as the PGD page
+ * is going to be freed. There is an extremely remote possibility
+ * that other cachelines (containing all invalid ATEs) of PGD page
+ * are also present in the GPU cache.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64),
+ KBASE_MMU_OP_FLUSH_PT);
+ }
kbase_mmu_add_to_free_pgds_list(mmut, p);
@@ -3272,6 +3313,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
false);
}
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index d19579d..e3ad78d 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -32,7 +32,7 @@
*/
#define ENTRY_IS_ATE_L3 3ULL
#define ENTRY_IS_ATE_L02 1ULL
-#define ENTRY_IS_INVAL 2ULL
+#define ENTRY_IS_INVAL 0ULL
#define ENTRY_IS_PTE 3ULL
#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */