Merge branch 'upstream' into HEADandroid-15-beta-3_r0.5 android-15-beta-3_r0.2 android-15-beta-3_r0.1 android-15-beta-2_r0.5 android-15-beta-2_r0.2 android-15-beta-2_r0.1 android-gs-raviole-5.10-android15-dp android-gs-raviole-5.10-android15-beta android-gs-felix-5.10-android15-dp android-gs-felix-5.10-android15-beta android-gs-bluejay-5.10-android15-dp android-gs-bluejay-5.10-android15-beta

Bring in R48P0 KMD update along. Bug: 323316466 Test: go/pixel-gpu-kmd-r48p0 Change-Id: I32484fe3e881ec9ec60cfeac27befc84d15b7904 Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
author: Vamsidhar reddy Gaddam <gvamsi@google.com> 2024-04-10 17:14:26 +0000
committer: Vamsidhar reddy Gaddam <gvamsi@google.com> 2024-04-10 17:14:26 +0000
commit: 157548df5743bfca8bba1bbd148453a7b7fabac1 (patch)
tree: a3efc8e2e4be8806e340670bb67af927c137c51f /mali_kbase/mmu
parent: 656ed97ecba51a1656d1f1deb5b0659ebf073a59 (diff)
parent: a999cd8fd398aed7390c8e5d99795e9b735d6ba7 (diff)
download: gpu-157548df5743bfca8bba1bbd148453a7b7fabac1.tar.gz
2 files changed, 68 insertions, 26 deletions
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 6bab554..8a5b92c 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -774,7 +774,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg
 		return false;
 	}
 
-	if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+	if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
 		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
 		*grow_2mb_pool = true;
 	} else {
@@ -921,7 +921,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	int err;
 	bool grown = false;
 	size_t pages_to_grow;
-	bool grow_2mb_pool;
+	bool grow_2mb_pool = false;
 	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
 	int i;
 	size_t current_backed_size;
@@ -1091,7 +1091,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	}
 
 page_fault_retry:
-	if (kbdev->pagesize_2mb) {
+	if (kbase_is_large_pages_enabled()) {
 		/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
 		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
 			if (!prealloc_sas[i]) {
@@ -1181,10 +1181,14 @@ page_fault_retry:
 		 */
 		op_param.mmu_sync_info = mmu_sync_info;
 		op_param.kctx_id = kctx->id;
-		/* Can safely skip the invalidate for all levels in case
-		 * of duplicate page faults.
+		/* Usually it is safe to skip the MMU cache invalidate for all levels
+		 * in case of duplicate page faults. But for the pathological scenario
+		 * where the faulty VA gets mapped by the time page fault worker runs it
+		 * becomes imperative to invalidate MMU cache for all levels, otherwise
+		 * there is a possibility of repeated page faults on GPUs which supports
+		 * fine grained MMU cache invalidation.
 		 */
-		op_param.flush_skip_levels = 0xF;
+		op_param.flush_skip_levels = 0x0;
 		op_param.vpfn = fault_pfn;
 		op_param.nr = 1;
 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1218,10 +1222,14 @@ page_fault_retry:
 		/* See comment [1] about UNLOCK usage */
 		op_param.mmu_sync_info = mmu_sync_info;
 		op_param.kctx_id = kctx->id;
-		/* Can safely skip the invalidate for all levels in case
-		 * of duplicate page faults.
+		/* Usually it is safe to skip the MMU cache invalidate for all levels
+		 * in case of duplicate page faults. But for the pathological scenario
+		 * where the faulty VA gets mapped by the time page fault worker runs it
+		 * becomes imperative to invalidate MMU cache for all levels, otherwise
+		 * there is a possibility of repeated page faults on GPUs which supports
+		 * fine grained MMU cache invalidation.
 		 */
-		op_param.flush_skip_levels = 0xF;
+		op_param.flush_skip_levels = 0x0;
 		op_param.vpfn = fault_pfn;
 		op_param.nr = 1;
 		spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1383,7 +1391,7 @@ page_fault_retry:
 		 * Otherwise fail the allocation.
 		 */
 		if (pages_to_grow > 0) {
-			if (kbdev->pagesize_2mb && grow_2mb_pool) {
+			if (kbase_is_large_pages_enabled() && grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
 					&kctx->mem_pools.large[group_id];
@@ -1597,6 +1605,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu
 
 	return err;
 }
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO);
 
 /*
  * On success, sets out_pgd to the PGD for the specified level of translation
@@ -1702,8 +1711,16 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		mmu_mode->entries_invalidate(&page[idx], pcount);
 
 		if (!num_of_valid_entries) {
+			mmu_mode->set_num_valid_entries(page, 0);
+
 			kbase_kunmap(p, page);
 
+			/* No CPU and GPU cache maintenance is done here as caller would do the
+			 * complete flush of GPU cache and invalidation of TLB before the PGD
+			 * page is freed. CPU cache flush would be done when the PGD page is
+			 * returned to the memory pool.
+			 */
+
 			kbase_mmu_add_to_free_pgds_list(mmut, p);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
@@ -1730,7 +1747,8 @@ next:
 	 * going to happen to these pages at this stage. They might return
 	 * movable once they are returned to a memory pool.
 	 */
-	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
+	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys &&
+	    !is_huge(*phys) && !is_partial(*phys)) {
 		const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE;
 		u64 i;
 
@@ -2645,6 +2663,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 }
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO);
 
 int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, u64 vpfn,
@@ -2702,6 +2721,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
 
 	return 0;
 }
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO);
 
 #if !MALI_USE_CSF
 /**
@@ -2918,15 +2938,25 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 
 		kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
 		if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) {
+			kbdev->mmu_mode->set_num_valid_entries(current_page, 0);
+
 			kbase_kunmap(p, current_page);
 
-			/* Ensure the cacheline containing the last valid entry
-			 * of PGD is invalidated from the GPU cache, before the
-			 * PGD page is freed.
-			 */
-			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
-					       current_pgd + (index * sizeof(u64)), sizeof(u64),
-					       flush_op);
+			/* Check if fine grained GPU cache maintenance is being used */
+			if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+				/* Ensure the invalidated PTE is visible in memory right away */
+				kbase_mmu_sync_pgd_cpu(kbdev,
+						       kbase_dma_addr(p) + (index * sizeof(u64)),
+						       sizeof(u64));
+				/* Invalidate the GPU cache for the whole PGD page and not just for
+				 * the cacheline containing the invalidated PTE, as the PGD page is
+				 * going to be freed. There is an extremely remote possibility that
+				 * other cachelines (containing all invalid PTEs) of PGD page are
+				 * also present in the GPU cache.
+				 */
+				kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd,
+						       512 * sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+			}
 
 			kbase_mmu_add_to_free_pgds_list(mmut, p);
 		} else {
@@ -3108,14 +3138,25 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		mmu_mode->entries_invalidate(&page[index], pcount);
 
 		if (!num_of_valid_entries) {
+			mmu_mode->set_num_valid_entries(page, 0);
+
 			kbase_kunmap(p, page);
 
-			/* Ensure the cacheline(s) containing the last valid entries
-			 * of PGD is invalidated from the GPU cache, before the
-			 * PGD page is freed.
-			 */
-			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
-					       pcount * sizeof(u64), flush_op);
+			/* Check if fine grained GPU cache maintenance is being used */
+			if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+				/* Ensure the invalidated ATEs are visible in memory right away */
+				kbase_mmu_sync_pgd_cpu(kbdev,
+						       kbase_dma_addr(p) + (index * sizeof(u64)),
+						       pcount * sizeof(u64));
+				/* Invalidate the GPU cache for the whole PGD page and not just for
+				 * the cachelines containing the invalidated ATEs, as the PGD page
+				 * is going to be freed. There is an extremely remote possibility
+				 * that other cachelines (containing all invalid ATEs) of PGD page
+				 * are also present in the GPU cache.
+				 */
+				kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64),
+						       KBASE_MMU_OP_FLUSH_PT);
+			}
 
 			kbase_mmu_add_to_free_pgds_list(mmut, p);
 
@@ -3272,6 +3313,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
 				  false);
 }
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				      u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index d19579d..e3ad78d 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -32,7 +32,7 @@
  */
 #define ENTRY_IS_ATE_L3 3ULL
 #define ENTRY_IS_ATE_L02 1ULL
-#define ENTRY_IS_INVAL 2ULL
+#define ENTRY_IS_INVAL 0ULL
 #define ENTRY_IS_PTE 3ULL
 
 #define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */
author	Vamsidhar reddy Gaddam <gvamsi@google.com>	2024-04-10 17:14:26 +0000
committer	Vamsidhar reddy Gaddam <gvamsi@google.com>	2024-04-10 17:14:26 +0000
commit	157548df5743bfca8bba1bbd148453a7b7fabac1 (patch)
tree	a3efc8e2e4be8806e340670bb67af927c137c51f /mali_kbase/mmu
parent	656ed97ecba51a1656d1f1deb5b0659ebf073a59 (diff)
parent	a999cd8fd398aed7390c8e5d99795e9b735d6ba7 (diff)
download	gpu-157548df5743bfca8bba1bbd148453a7b7fabac1.tar.gz