Mali Valhall Android DDK r43p0-01eac0 KMD

Provenance: 48a9c7e25986318c8475bc245de51e7bec2606e8 (ipdelivery/EAC/v_r43p0) VX504X08X-BU-00000-r43p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r43p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r43p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r43p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I5df1914eba386e0bf507d4951240e1744f666a29
author: Toby Sunrise <tobyrs@google.com> 2023-05-01 13:31:16 +0000
committer: Toby Sunrise <tobyrs@google.com> 2023-05-01 13:33:19 +0000
commit: bce5281a0408a175137c08dc93028e2a2c0fb69b (patch)
tree: edc640500ccdf781a123e7fae22fac9c44ddbe46 /mali_kbase
parent: f7a77046d77266482dedf54d134102e6031a7438 (diff)
download: gpu-bce5281a0408a175137c08dc93028e2a2c0fb69b.tar.gz
75 files changed, 3159 insertions, 1607 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 32b4d37..73375f6 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ endif
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r42p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_DEBUG), y)
     MALI_UNIT_TEST = 1
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 1c5e1f8..3d5a14a 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -115,21 +115,6 @@ config MALI_MIDGARD_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
-config MALI_FW_CORE_DUMP
-	bool "Enable support for FW core dump"
-	depends on MALI_MIDGARD && MALI_CSF_SUPPORT
-	default y
-	help
-	  Adds ability to request firmware core dump through the "fw_core_dump"
-	  debugfs file
-
-	  Example:
-	  * To explicitly request core dump:
-	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
-	  * To output current core dump (after explicitly requesting a core dump,
-	    or kernel driver reported an internal firmware error):
-	      cat /sys/kernel/debug/mali0/fw_core_dump
-
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -181,7 +166,19 @@ menuconfig MALI_EXPERT
 
 if MALI_EXPERT
 
-config MALI_2MB_ALLOC
+config LARGE_PAGE_ALLOC_OVERRIDE
+	bool "Override default setting of 2MB pages"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default n
+	help
+	  An override config for LARGE_PAGE_ALLOC config.
+	  When LARGE_PAGE_ALLOC_OVERRIDE is Y, 2MB page allocation will be
+	  enabled by LARGE_PAGE_ALLOC. When this is N, the feature will be
+	  enabled when GPU HW satisfies requirements.
+
+	  If in doubt, say N
+
+config LARGE_PAGE_ALLOC
 	bool "Attempt to allocate 2MB pages"
 	depends on MALI_MIDGARD && MALI_EXPERT
 	default n
@@ -190,6 +187,10 @@ config MALI_2MB_ALLOC
 	  allocate 2MB pages from the kernel. This reduces TLB pressure and
 	  helps to prevent memory fragmentation.
 
+	  Note this config applies only when LARGE_PAGE_ALLOC_OVERRIDE config
+	  is enabled and enabling this on a GPU HW that does not satisfy
+	  requirements can cause serious problem.
+
 	  If in doubt, say N
 
 config MALI_MEMORY_FULLY_BACKED
@@ -225,14 +226,6 @@ config MALI_ERROR_INJECT
 	help
 	  Enables insertion of errors to test module failure and recovery mechanisms.
 
-config MALI_GEM5_BUILD
-	bool "Enable build of Mali kernel driver for GEM5"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  This option is to do a Mali GEM5 build.
-	  If unsure, say N.
-
 comment "Debug options"
 	depends on MALI_MIDGARD && MALI_EXPERT
 
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 5d88b14..5b3e99b 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -58,10 +58,7 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
     endif
 
     ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
-        CONFIG_MALI_FW_CORE_DUMP ?= y
         CONFIG_MALI_CORESIGHT ?= n
-    else
-        CONFIG_MALI_FW_CORE_DUMP ?= n
     endif
 
     #
@@ -101,7 +98,8 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
     else
         # Prevent misuse when CONFIG_MALI_EXPERT=n
         CONFIG_MALI_CORESTACK = n
-        CONFIG_MALI_2MB_ALLOC = n
+        CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+        CONFIG_LARGE_PAGE_ALLOC = n
         CONFIG_MALI_PWRSOFT_765 = n
         CONFIG_MALI_MEMORY_FULLY_BACKED = n
         CONFIG_MALI_JOB_DUMP = n
@@ -143,7 +141,6 @@ else
     CONFIG_MALI_KUTF_IRQ_TEST = n
     CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
-    CONFIG_MALI_FW_CORE_DUMP = n
 endif
 
 # All Mali CONFIG should be listed here
@@ -155,14 +152,14 @@ CONFIGS := \
     CONFIG_MALI_ARBITRATION \
     CONFIG_MALI_PARTITION_MANAGER \
     CONFIG_MALI_REAL_HW \
-    CONFIG_MALI_GEM5_BUILD \
     CONFIG_MALI_DEVFREQ \
     CONFIG_MALI_MIDGARD_DVFS \
     CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
     CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
     CONFIG_MALI_EXPERT \
     CONFIG_MALI_CORESTACK \
-    CONFIG_MALI_2MB_ALLOC \
+    CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+    CONFIG_LARGE_PAGE_ALLOC \
     CONFIG_MALI_PWRSOFT_765 \
     CONFIG_MALI_MEMORY_FULLY_BACKED \
     CONFIG_MALI_JOB_DUMP \
@@ -183,7 +180,6 @@ CONFIGS := \
     CONFIG_MALI_KUTF_CLK_RATE_TRACE \
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
     CONFIG_MALI_XEN \
-    CONFIG_MALI_FW_CORE_DUMP \
     CONFIG_MALI_CORESIGHT
 
 
@@ -267,6 +263,12 @@ ifeq ($(CONFIG_GCOV_KERNEL),y)
     EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif
 
+ifeq ($(CONFIG_MALI_KCOV),y)
+    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    EXTRA_CFLAGS += -DKCOV=1
+    EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
+endif
+
 all:
 	$(MAKE) -C $(KDIR) M=$(CURDIR) $(MAKE_ARGS) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" KBUILD_EXTRA_SYMBOLS="$(EXTRA_SYMBOLS)" modules
 
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index d5b3067..f398d1a 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -117,21 +117,6 @@ config MALI_MIDGARD_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
-config MALI_FW_CORE_DUMP
-	bool "Enable support for FW core dump"
-	depends on MALI_MIDGARD && MALI_CSF_SUPPORT
-	default y
-	help
-	  Adds ability to request firmware core dump through the "fw_core_dump"
-	  debugfs file
-
-	  Example:
-	  * To explicitly request core dump:
-	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
-	  * To output current core dump (after explicitly requesting a core dump,
-	    or kernel driver reported an internal firmware error):
-	      cat /sys/kernel/debug/mali0/fw_core_dump
-
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -250,14 +235,6 @@ config MALI_ERROR_INJECT
 	depends on MALI_MIDGARD && MALI_EXPERT
 	default y if !MALI_ERROR_INJECT_NONE
 
-config MALI_GEM5_BUILD
-	bool "Enable build of Mali kernel driver for GEM5"
-	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
-	help
-	  This option is to do a Mali GEM5 build.
-	  If unsure, say N.
-
 config MALI_DEBUG
 	bool "Enable debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -275,6 +252,14 @@ config MALI_GCOV_KERNEL
 	  coverage information. When built against a supporting kernel,
 	  the coverage information will be available via debugfs.
 
+config MALI_KCOV
+	bool "Enable kcov coverage to support fuzzers"
+	depends on MALI_MIDGARD && MALI_DEBUG
+	default n
+	help
+	  Choose this option to enable building with fuzzing-oriented
+	  coverage, to improve the random test cases that are generated.
+
 config MALI_FENCE_DEBUG
 	bool "Enable debug sync fence usage"
 	depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 9a17494..7df2173 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -190,6 +190,27 @@ static u64 select_job_chain(struct kbase_jd_atom *katom)
 	return jc;
 }
 
+static inline bool kbasep_jm_wait_js_free(struct kbase_device *kbdev, unsigned int js,
+					  struct kbase_context *kctx)
+{
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const s64 max_timeout = (s64)kbdev->js_data.js_free_wait_time_ms;
+	s64 diff = 0;
+
+	/* wait for the JS_COMMAND_NEXT register to reach the given status value */
+	do {
+		if (!kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT)))
+			return true;
+
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < max_timeout);
+
+	dev_err(kbdev->dev, "Timeout in waiting for job slot %u to become free for ctx %d_%u", js,
+		kctx->tgid, kctx->id);
+
+	return false;
+}
+
 int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom, unsigned int js)
 {
 	struct kbase_context *kctx;
@@ -203,8 +224,7 @@ int kbase_job_hw_submit(struct kbase_device *kbdev, struct kbase_jd_atom *katom,
 	kctx = katom->kctx;
 
 	/* Command register must be available */
-	if (WARN(!kbasep_jm_is_js_free(kbdev, js, kctx),
-		 "Attempting to assign to occupied slot %d in kctx %pK\n", js, (void *)kctx))
+	if (!kbasep_jm_wait_js_free(kbdev, js, kctx))
 		return -EPERM;
 
 	dev_dbg(kctx->kbdev->dev, "Write JS_HEAD_NEXT 0x%llx for atom %pK\n",
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index e4cff1f..bfd55a6 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2016, 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,14 +52,6 @@ static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string
 }
 #endif
 
-#if !MALI_USE_CSF
-static inline int kbasep_jm_is_js_free(struct kbase_device *kbdev, unsigned int js,
-				       struct kbase_context *kctx)
-{
-	return !kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT));
-}
-#endif
-
 /**
  * kbase_job_hw_submit() - Submit a job to the GPU
  * @kbdev:	Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 388b37f..7db2b35 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1001,17 +1001,11 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 						other_slots_busy(kbdev, js))
 					break;
 
-#ifdef CONFIG_MALI_GEM5_BUILD
-				if (!kbasep_jm_is_js_free(kbdev, js,
-						katom[idx]->kctx))
-					break;
-#endif
 				/* Check if this job needs the cycle counter
 				 * enabled before submission
 				 */
 				if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
-					kbase_pm_request_gpu_cycle_counter_l2_is_on(
-									kbdev);
+					kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
 
 				if (!kbase_job_hw_submit(kbdev, katom[idx], js)) {
 					katom[idx]->gpu_rb_state = KBASE_ATOM_GPU_RB_SUBMITTED;
@@ -1025,9 +1019,12 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 
 					/* Inform platform at start/finish of atom */
 					kbasep_platform_event_atom_submit(katom[idx]);
-				}
-				else
+				} else {
+					if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
+						kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+
 					break;
+				}
 
 				/* ***TRANSITION TO HIGHER STATE*** */
 				fallthrough;
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 9d5f15e..dd16fb2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -2024,8 +2024,6 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 
 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_SHADER,
 						    counter_index, is_low_word);
-	} else if (addr == USER_REG(LATEST_FLUSH)) {
-		*value = 0;
 	}
 #endif
 	else if (addr == GPU_CONTROL_REG(GPU_FEATURES_LO)) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
index 75b1e7e..f310cc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,14 +25,17 @@
 
 static struct kbase_error_atom *error_track_list;
 
-unsigned int rand_seed;
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+
+/** Kernel 6.1.0 has dropped prandom_u32(), use get_random_u32() */
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+#define prandom_u32 get_random_u32
+#endif
 
 /*following error probability are set quite high in order to stress the driver*/
-unsigned int error_probability = 50;	/* to be set between 0 and 100 */
+static unsigned int error_probability = 50; /* to be set between 0 and 100 */
 /* probability to have multiple error give that there is an error */
-unsigned int multiple_error_probability = 50;
-
-#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+static unsigned int multiple_error_probability = 50;
 
 /* all the error conditions supported by the model */
 #define TOTAL_FAULTS 27
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index b37680d..e90e4df 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -105,7 +105,7 @@ static void serve_mmu_irq(struct work_struct *work)
 	kmem_cache_free(kbdev->irq_slab, data);
 }
 
-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq)
+void gpu_device_raise_irq(void *model, u32 irq)
 {
 	struct model_irq_data *data;
 	struct kbase_device *kbdev = gpu_device_get_data(model);
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
index a24db17..4cf1235 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -124,7 +124,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
  *
  * This hook is global to the model Linux framework.
  */
-void gpu_device_raise_irq(void *model, enum model_linux_irqs irq);
+void gpu_device_raise_irq(void *model, u32 irq);
 
 /**
  * gpu_device_set_data() - Private model set data function.
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index c51b133..0caf63e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -2575,26 +2575,33 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
 #if MALI_USE_CSF
+/**
+ * update_user_reg_page_mapping - Update the mapping for USER Register page
+ *
+ * @kbdev: The kbase device structure for the device.
+ *
+ * This function must be called to unmap the dummy or real page from USER Register page
+ * mapping whenever GPU is powered up or down. The dummy or real page would get
+ * appropriately mapped in when Userspace reads the LATEST_FLUSH value.
+ */
 static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 {
+	struct kbase_context *kctx, *n;
+
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	mutex_lock(&kbdev->csf.reg_lock);
-
-	/* Only if the mappings for USER page exist, update all PTEs associated to it */
-	if (kbdev->csf.nr_user_page_mapped > 0) {
-		if (likely(kbdev->csf.mali_file_inode)) {
-			/* This would zap the pte corresponding to the mapping of User
-			 * register page for all the Kbase contexts.
-			 */
-			unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
-					    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE, PAGE_SIZE, 1);
-		} else {
-			dev_err(kbdev->dev,
-				"Device file inode not exist even if USER page previously mapped");
-		}
+	list_for_each_entry_safe(kctx, n, &kbdev->csf.user_reg.list, csf.user_reg.link) {
+		/* This would zap the PTE corresponding to the mapping of User
+		 * Register page of the kbase context. The mapping will be reestablished
+		 * when the context (user process) needs to access to the page.
+		 */
+		unmap_mapping_range(kbdev->csf.user_reg.filp->f_inode->i_mapping,
+				    kctx->csf.user_reg.file_offset << PAGE_SHIFT, PAGE_SIZE, 1);
+		list_del_init(&kctx->csf.user_reg.link);
+		dev_dbg(kbdev->dev, "Updated USER Reg page mapping of ctx %d_%d", kctx->tgid,
+			kctx->id);
 	}
-
 	mutex_unlock(&kbdev->csf.reg_lock);
 }
 #endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 5110e3d..7a4d662 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
 #include <mali_kbase.h>
 #include <mali_kbase_hwaccess_time.h>
 #if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#include <linux/gcd.h>
 #include <csf/mali_kbase_csf_timeout.h>
 #endif
 #include <device/mali_kbase_device.h>
@@ -121,20 +123,29 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
 	/* Only for debug messages, safe default in case it's mis-maintained */
 	const char *selector_str = "(unknown)";
 
-	if (WARN(!kbdev->lowest_gpu_freq_khz,
-		 "Lowest frequency uninitialized! Using reference frequency for scaling")) {
+	if (!kbdev->lowest_gpu_freq_khz) {
+		dev_dbg(kbdev->dev,
+			"Lowest frequency uninitialized! Using reference frequency for scaling");
 		freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
 	} else {
 		freq_khz = kbdev->lowest_gpu_freq_khz;
 	}
 
 	switch (selector) {
+	case MMU_AS_INACTIVE_WAIT_TIMEOUT:
+		selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
+		nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
+		break;
 	case KBASE_TIMEOUT_SELECTOR_COUNT:
 	default:
 #if !MALI_USE_CSF
 		WARN(1, "Invalid timeout selector used! Using default value");
 		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
 		break;
+	case JM_DEFAULT_JS_FREE_TIMEOUT:
+		selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
+		nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
+		break;
 #else
 		/* Use Firmware timeout if invalid selection */
 		WARN(1,
@@ -204,3 +215,65 @@ u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev)
 
 	return lo | (((u64) hi1) << 32);
 }
+
+#if MALI_USE_CSF
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts)
+{
+	if (WARN_ON(!kbdev))
+		return 0;
+
+	return div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor) +
+	       kbdev->backend_time.offset;
+}
+
+/**
+ * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
+ *
+ * @kbdev:	Kbase device.
+ * @cpu_ts:	Output CPU timestamp.
+ * @gpu_ts:	Output GPU timestamp.
+ * @gpu_cycle:  Output GPU cycle counts.
+ */
+static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_ts, u64 *gpu_cycle)
+{
+	struct timespec64 ts;
+
+	kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
+
+	if (cpu_ts)
+		*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+#endif
+
+int kbase_backend_time_init(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+	u64 cpu_ts = 0;
+	u64 gpu_ts = 0;
+	u64 freq;
+	u64 common_factor;
+
+	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
+	freq = arch_timer_get_cntfrq();
+
+	if (!freq) {
+		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
+		return -EINVAL;
+	}
+
+	common_factor = gcd(NSEC_PER_SEC, freq);
+
+	kbdev->backend_time.multiplier = div64_u64(NSEC_PER_SEC, common_factor);
+	kbdev->backend_time.divisor = div64_u64(freq, common_factor);
+
+	if (!kbdev->backend_time.divisor) {
+		dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
+		return -EINVAL;
+	}
+
+	kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
+							kbdev->backend_time.divisor);
+#endif
+
+	return 0;
+}
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index a563058..4f475ab 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -62,8 +62,11 @@ bob_defaults {
     mali_dma_buf_legacy_compat: {
         kbuild_options: ["CONFIG_MALI_DMA_BUF_LEGACY_COMPAT=y"],
     },
+    large_page_alloc_override: {
+        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC_OVERRIDE=y"],
+    },
     large_page_alloc: {
-        kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
+        kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
     },
     mali_memory_fully_backed: {
         kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
@@ -86,9 +89,6 @@ bob_defaults {
     mali_error_inject: {
         kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
     },
-    mali_gem5_build: {
-        kbuild_options: ["CONFIG_MALI_GEM5_BUILD=y"],
-    },
     mali_debug: {
         kbuild_options: [
             "CONFIG_MALI_DEBUG=y",
@@ -137,9 +137,6 @@ bob_defaults {
     platform_is_fpga: {
         kbuild_options: ["CONFIG_MALI_IS_FPGA=y"],
     },
-    mali_fw_core_dump: {
-        kbuild_options: ["CONFIG_MALI_FW_CORE_DUMP=y"],
-    },
     mali_coresight: {
         kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
     },
@@ -194,6 +191,15 @@ bob_kernel_module {
         "platform/*/*.c",
         "platform/*/*.h",
         "platform/*/Kbuild",
+        "platform/*/*/*.c",
+        "platform/*/*/*.h",
+        "platform/*/*/Kbuild",
+        "platform/*/*/*.c",
+        "platform/*/*/*.h",
+        "platform/*/*/Kbuild",
+        "platform/*/*/*/*.c",
+        "platform/*/*/*/*.h",
+        "platform/*/*/*/Kbuild",
         "thirdparty/*.c",
         "thirdparty/Kbuild",
         "debug/*.c",
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 792f724..b8036b8 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
 /*
  * Base kernel context APIs
  */
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -129,13 +135,51 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
+	kctx->task = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;
 
+	/* Check if this is a Userspace created context */
+	if (likely(kctx->filp)) {
+		struct pid *pid_struct;
+
+		rcu_read_lock();
+		pid_struct = find_get_pid(kctx->tgid);
+		if (likely(pid_struct)) {
+			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+			if (likely(task)) {
+				/* Take a reference on the task to avoid slow lookup
+				 * later on from the page allocation loop.
+				 */
+				get_task_struct(task);
+				kctx->task = task;
+			} else {
+				dev_err(kctx->kbdev->dev,
+					"Failed to get task pointer for %s/%d",
+					current->comm, current->pid);
+				err = -ESRCH;
+			}
+
+			put_pid(pid_struct);
+		} else {
+			dev_err(kctx->kbdev->dev,
+				"Failed to get pid pointer for %s/%d",
+				current->comm, current->pid);
+			err = -ESRCH;
+		}
+		rcu_read_unlock();
+
+		if (unlikely(err))
+			return err;
+
+		kbase_mem_mmgrab();
+		kctx->process_mm = current->mm;
+	}
+
 	atomic_set(&kctx->used_pages, 0);
 
 	mutex_init(&kctx->reg_lock);
@@ -168,13 +212,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-
 	err = kbase_insert_kctx_to_process(kctx);
-	if (err)
-		dev_err(kctx->kbdev->dev,
-		"(err:%d) failed to insert kctx to kbase_process\n", err);
-
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+	if (err) {
+		dev_err(kctx->kbdev->dev,
+			"(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->filp)) {
+			mmdrop(kctx->process_mm);
+			put_task_struct(kctx->task);
+		}
+	}
 
 	return err;
 }
@@ -260,6 +307,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
+	if (likely(kctx->filp)) {
+		mmdrop(kctx->process_mm);
+		put_task_struct(kctx->task);
+	}
+
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
 }
 
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index a0c51c9..7c90e27 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -93,6 +93,19 @@ static inline bool kbase_ctx_flag(struct kbase_context *kctx,
 }
 
 /**
+ * kbase_ctx_compat_mode - Indicate whether a kbase context needs to operate
+ *                         in compatibility mode for 32-bit userspace.
+ * @kctx: kbase context
+ *
+ * Return: True if needs to maintain compatibility, False otherwise.
+ */
+static inline bool kbase_ctx_compat_mode(struct kbase_context *kctx)
+{
+	return !IS_ENABLED(CONFIG_64BIT) ||
+	       (IS_ENABLED(CONFIG_64BIT) && kbase_ctx_flag(kctx, KCTX_COMPAT));
+}
+
+/**
  * kbase_ctx_flag_clear - Clear @flag on @kctx
  * @kctx: Pointer to kbase context
  * @flag: Flag to clear
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index dbfcfde..88a3975 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -39,7 +39,9 @@
 
 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+
+#define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
+#define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
 
 #define PROTM_ALLOC_MAX_RETRIES ((u8)5)
 
@@ -73,6 +75,38 @@ struct irq_idle_and_protm_track {
 	s8 idle_slot;
 };
 
+/**
+ * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
+ *
+ * @kctx:   Pointer to the kbase context
+ */
+static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+
+	if (unlikely(kctx->csf.user_reg.vma))
+		dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
+			kctx->tgid, kctx->id);
+	if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
+		list_del_init(&kctx->csf.user_reg.link);
+}
+
+/**
+ * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
+ *
+ * @kctx:   Pointer to the kbase context
+ *
+ * @return: 0 on success.
+ */
+static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
+{
+	INIT_LIST_HEAD(&kctx->csf.user_reg.link);
+	kctx->csf.user_reg.vma = NULL;
+	kctx->csf.user_reg.file_offset = 0;
+
+	return 0;
+}
+
 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
 			struct kbase_queue *queue)
 {
@@ -262,7 +296,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct
 
 	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
 					 KBASEP_NUM_CS_USER_IO_PAGES,
-					 queue->phys, false);
+					 queue->phys, false, kctx->task);
 	if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
 		/* Marking both the phys to zero for indicating there is no phys allocated */
 		queue->phys[0].tagged_addr = 0;
@@ -288,11 +322,8 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct
 
 	queue->db_file_offset = kbdev->csf.db_file_offsets;
 	kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#else
-	WARN(refcount_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
-#endif
+	WARN(kbase_refcount_read(&queue->refcount) != 1,
+	     "Incorrect refcounting for queue object\n");
 	/* This is the second reference taken on the queue object and
 	 * would be dropped only when the IO mapping is removed either
 	 * explicitly by userspace or implicitly by kernel on process exit.
@@ -364,21 +395,13 @@ static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
 
 static void get_queue(struct kbase_queue *queue)
 {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN_ON(!atomic_inc_not_zero(&queue->refcount));
-#else
-	WARN_ON(!refcount_inc_not_zero(&queue->refcount));
-#endif
+	WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
 }
 
 static void release_queue(struct kbase_queue *queue)
 {
 	lockdep_assert_held(&queue->kctx->csf.lock);
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	if (atomic_dec_and_test(&queue->refcount)) {
-#else
-	if (refcount_dec_and_test(&queue->refcount)) {
-#endif
+	if (kbase_refcount_dec_and_test(&queue->refcount)) {
 		/* The queue can't still be on the per context list. */
 		WARN_ON(!list_empty(&queue->link));
 		WARN_ON(queue->group);
@@ -394,7 +417,7 @@ static void release_queue(struct kbase_queue *queue)
 		 * would free up the GPU queue memory.
 		 */
 		kbase_gpu_vm_lock(queue->kctx);
-		kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+		kbase_va_region_no_user_free_dec(queue->queue_reg);
 		kbase_gpu_vm_unlock(queue->kctx);
 
 		kfree(queue);
@@ -500,17 +523,16 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->kctx = kctx;
 	queue->base_addr = queue_addr;
-	queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
+
+	queue->queue_reg = region;
+	kbase_va_region_no_user_free_inc(region);
+
 	queue->size = (queue_size << PAGE_SHIFT);
 	queue->csi_index = KBASEP_IF_NR_INVALID;
 	queue->enabled = false;
 
 	queue->priority = reg->priority;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_set(&queue->refcount, 1);
-#else
-	refcount_set(&queue->refcount, 1);
-#endif
+	kbase_refcount_set(&queue->refcount, 1);
 
 	queue->group = NULL;
 	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
@@ -567,6 +589,13 @@ out:
 int kbase_csf_queue_register(struct kbase_context *kctx,
 			     struct kbase_ioctl_cs_queue_register *reg)
 {
+	/* Validate the ring buffer configuration parameters */
+	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+	    reg->buffer_gpu_addr & ~PAGE_MASK)
+		return -EINVAL;
+
 	return csf_queue_register_internal(kctx, reg, NULL);
 }
 
@@ -585,6 +614,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
 		return -EINVAL;
 
+	/* Validate the ring buffer configuration parameters */
+	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
+	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
+	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
+	    reg->buffer_gpu_addr & ~PAGE_MASK)
+		return -EINVAL;
+
 	/* Validate the cs_trace configuration parameters */
 	if (reg->ex_buffer_size &&
 		((reg->ex_event_size > max_size) ||
@@ -904,6 +940,9 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
 {
 	lockdep_assert_held(&kctx->csf.lock);
 
+	if (WARN_ON(queue->csi_index < 0))
+		return;
+
 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
 		unsigned long flags;
 
@@ -917,6 +956,7 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
 		kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
 
 		put_user_pages_mmap_handle(kctx, queue);
+		WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
 		queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
 	}
 }
@@ -1094,7 +1134,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 
 	/* Get physical page for a normal suspend buffer */
 	err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
-					 &s_buf->phy[0], false);
+					 &s_buf->phy[0], false, kctx->task);
 
 	if (err < 0) {
 		kfree(s_buf->phy);
@@ -1534,6 +1574,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 }
 KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
 
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 				  struct kbase_suspend_copy_buffer *sus_buf,
 				  u8 group_handle)
@@ -1564,6 +1605,7 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 
 	return err;
 }
+#endif
 
 void kbase_csf_add_group_fatal_error(
 	struct kbase_queue_group *const group,
@@ -1632,8 +1674,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 
 	kbase_csf_event_init(kctx);
 
-	kctx->csf.user_reg_vma = NULL;
-
 	/* Mark all the cookies as 'free' */
 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
 
@@ -1653,7 +1693,14 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 					mutex_init(&kctx->csf.lock);
 					INIT_WORK(&kctx->csf.pending_submission_work,
 						  pending_submission_worker);
-				} else
+
+					err = kbasep_ctx_user_reg_page_mapping_init(kctx);
+
+					if (unlikely(err))
+						kbase_csf_tiler_heap_context_term(kctx);
+				}
+
+				if (unlikely(err))
 					kbase_csf_kcpu_queue_context_term(kctx);
 			}
 
@@ -1811,17 +1858,14 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		 * only one reference left that was taken when queue was
 		 * registered.
 		 */
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-		WARN_ON(atomic_read(&queue->refcount) != 1);
-#else
-		WARN_ON(refcount_read(&queue->refcount) != 1);
-#endif
+		WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
 		list_del_init(&queue->link);
 		release_queue(queue);
 	}
 
 	mutex_unlock(&kctx->csf.lock);
 
+	kbasep_ctx_user_reg_page_mapping_term(kctx);
 	kbase_csf_tiler_heap_context_term(kctx);
 	kbase_csf_kcpu_queue_context_term(kctx);
 	kbase_csf_scheduler_context_term(kctx);
@@ -2736,6 +2780,9 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
 
+		KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
+			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
+
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_IDLE_MASK);
 
@@ -3149,12 +3196,12 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 	struct file *filp;
 	int ret;
 
-	filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+	filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
 	if (IS_ERR(filp))
 		return PTR_ERR(filp);
 
 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-					 false);
+					 false, NULL);
 
 	if (ret <= 0) {
 		fput(filp);
@@ -3170,29 +3217,34 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
 {
-	if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
-		struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+	if (kbdev->csf.user_reg.filp) {
+		struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
 
-		kbase_mem_pool_free(
-			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
-			false);
+		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
+		fput(kbdev->csf.user_reg.filp);
 	}
 }
 
 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 {
 	struct tagged_addr phys;
+	struct file *filp;
 	struct page *page;
 	u32 *addr;
-	int ret;
 
-	kbdev->csf.dummy_user_reg_page = as_tagged(0);
+	kbdev->csf.user_reg.filp = NULL;
 
-	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-					 false);
+	filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
+	if (IS_ERR(filp)) {
+		dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
+		return PTR_ERR(filp);
+	}
 
-	if (ret <= 0)
-		return ret;
+	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+				       false, NULL) <= 0) {
+		fput(filp);
+		return -ENOMEM;
+	}
 
 	page = as_page(phys);
 	addr = kmap_atomic(page);
@@ -3202,12 +3254,13 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 	 */
 	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
 
-	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
 				     DMA_BIDIRECTIONAL);
 	kunmap_atomic(addr);
 
-	kbdev->csf.dummy_user_reg_page = phys;
-
+	kbdev->csf.user_reg.filp = filp;
+	kbdev->csf.user_reg.dummy_page = phys;
+	kbdev->csf.user_reg.file_offset = 0;
 	return 0;
 }
 
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 9fbc932..dd947dc 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -274,6 +274,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
  */
 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
 
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 /**
  * kbase_csf_queue_group_suspend - Suspend a GPU command queue group
  *
@@ -291,6 +292,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
  */
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
+#endif
 
 /**
  * kbase_csf_add_group_fatal_error - Report a fatal group error to userspace
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index f1af1b9..f09544c 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,6 +30,7 @@
 #include <linux/wait.h>
 
 #include "mali_kbase_csf_firmware.h"
+#include "mali_kbase_refcount_defs.h"
 #include "mali_kbase_csf_event.h"
 #include <uapi/gpu/arm/midgard/csf/mali_kbase_csf_errors_dumpfault.h>
 
@@ -269,6 +270,8 @@ enum kbase_queue_group_priority {
  * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
  *                             to a ping from KBase.
  * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ *                                of a MMU operation
  * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
  *                                the enum.
  */
@@ -280,6 +283,7 @@ enum kbase_timeout_selector {
 	CSF_FIRMWARE_BOOT_TIMEOUT,
 	CSF_FIRMWARE_PING_TIMEOUT,
 	CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
+	MMU_AS_INACTIVE_WAIT_TIMEOUT,
 
 	/* Must be the last in the enum */
 	KBASE_TIMEOUT_SELECTOR_COUNT
@@ -387,11 +391,7 @@ struct kbase_queue {
 	int doorbell_nr;
 	unsigned long db_file_offset;
 	struct list_head link;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_t refcount;
-#else
-	refcount_t refcount;
-#endif
+	kbase_refcount_t refcount;
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
 	struct work_struct oom_event_work;
@@ -779,6 +779,23 @@ struct kbase_csf_event {
 };
 
 /**
+ * struct kbase_csf_user_reg_context - Object containing members to manage the mapping
+ *                                     of USER Register page for a context.
+ *
+ * @vma:                Pointer to the VMA corresponding to the virtual mapping
+ *                      of the USER register page.
+ * @file_offset:        File offset value that is assigned to userspace mapping
+ *                      of the USER Register page. It is in page units.
+ * @link:               Links the context to the device list when mapping is pointing to
+ *                      either the dummy or the real Register page.
+ */
+struct kbase_csf_user_reg_context {
+	struct vm_area_struct *vma;
+	u32 file_offset;
+	struct list_head link;
+};
+
+/**
  * struct kbase_csf_context - Object representing CSF for a GPU address space.
  *
  * @event_pages_head: A list of pages allocated for the event memory used by
@@ -816,13 +833,11 @@ struct kbase_csf_event {
  *                    used by GPU command queues, and progress timeout events.
  * @link:             Link to this csf context in the 'runnable_kctxs' list of
  *                    the scheduler instance
- * @user_reg_vma:     Pointer to the vma corresponding to the virtual mapping
- *                    of the USER register page. Currently used only for sanity
- *                    checking.
  * @sched:            Object representing the scheduler's context
  * @pending_submission_work: Work item to process pending kicked GPU command queues.
  * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
  *                    is enabled.
+ * @user_reg:         Collective information to support mapping to USER Register page.
  */
 struct kbase_csf_context {
 	struct list_head event_pages_head;
@@ -837,12 +852,12 @@ struct kbase_csf_context {
 	struct kbase_csf_tiler_heap_context tiler_heaps;
 	struct workqueue_struct *wq;
 	struct list_head link;
-	struct vm_area_struct *user_reg_vma;
 	struct kbase_csf_scheduler_context sched;
 	struct work_struct pending_submission_work;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_cpu_queue_context cpu_queue;
 #endif
+	struct kbase_csf_user_reg_context user_reg;
 };
 
 /**
@@ -1427,6 +1442,37 @@ struct kbase_csf_dump_on_fault {
 #endif /* CONFIG_DEBUG_FS*/
 
 /**
+ * struct kbase_csf_user_reg - Object containing members to manage the mapping
+ *                             of USER Register page for all contexts
+ *
+ * @dummy_page:             Address of a dummy page that is mapped in place
+ *                          of the real USER Register page just before the GPU
+ *                          is powered down. The USER Register page is mapped
+ *                          in the address space of every process, that created
+ *                          a Base context, to enable the access to LATEST_FLUSH
+ *                          register from userspace.
+ * @filp:                   Pointer to a dummy file, that along with @file_offset,
+ *                          facilitates the use of unique file offset for the userspace mapping
+ *                          created for USER Register page.
+ *                          The userspace mapping is made to point to this file
+ *                          inside the mmap handler.
+ * @file_offset:            Counter that is incremented every time Userspace creates a mapping of
+ *                          USER Register page, to provide a unique file offset range for
+ *                          @filp file, so that the CPU PTE of the Userspace mapping can be zapped
+ *                          through the kernel function unmap_mapping_range().
+ *                          It is incremented in page units.
+ * @list:                   Linked list to maintain user processes(contexts)
+ *                          having the mapping to USER Register page.
+ *                          It's protected by &kbase_csf_device.reg_lock.
+ */
+struct kbase_csf_user_reg {
+	struct tagged_addr dummy_page;
+	struct file *filp;
+	u32 file_offset;
+	struct list_head list;
+};
+
+/**
  * struct kbase_csf_device - Object representing CSF for an instance of GPU
  *                           platform device.
  *
@@ -1463,20 +1509,6 @@ struct kbase_csf_dump_on_fault {
  *                          of the real Hw doorbell page for the active GPU
  *                          command queues after they are stopped or after the
  *                          GPU is powered down.
- * @dummy_user_reg_page:    Address of the dummy page that is mapped in place
- *                          of the real User register page just before the GPU
- *                          is powered down. The User register page is mapped
- *                          in the address space of every process, that created
- *                          a Base context, to enable the access to LATEST_FLUSH
- *                          register from userspace.
- * @nr_user_page_mapped:    The number of clients using the mapping of USER page.
- *                          This is used to maintain backward compatibility.
- *                          It's protected by @reg_lock.
- * @mali_file_inode:        Pointer to the inode corresponding to mali device
- *                          file. This is needed in order to switch to the
- *                          @dummy_user_reg_page on GPU power down.
- *                          All instances of the mali device file will point to
- *                          the same inode. It's protected by @reg_lock.
  * @reg_lock:               Lock to serialize the MCU firmware related actions
  *                          that affect all contexts such as allocation of
  *                          regions from shared interface area, assignment of
@@ -1531,7 +1563,7 @@ struct kbase_csf_dump_on_fault {
  *                              the @p mcu_core_pwroff_dur_count as an update
  *                              to the latter is asynchronous.
  * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- *                          window in unit of microseconds. The firmware does not 
+ *                          window in unit of microseconds. The firmware does not
  *                          use it directly.
  * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
  *                          interface required format, ready to be used
@@ -1545,6 +1577,8 @@ struct kbase_csf_dump_on_fault {
  * @fw_core_dump:           Contain members required for handling the firmware
  *                          core dump.
  * @dof:                    Structure for dump on fault.
+ * @user_reg:               Collective information to support the mapping to
+ *                          USER Register page for user processes.
  */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -1558,9 +1592,6 @@ struct kbase_csf_device {
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
-	struct tagged_addr dummy_user_reg_page;
-	u32 nr_user_page_mapped;
-	struct inode *mali_file_inode;
 	struct mutex reg_lock;
 	wait_queue_head_t event_wait;
 	bool interrupt_received;
@@ -1597,6 +1628,7 @@ struct kbase_csf_device {
 	 */
 	struct kbase_debug_coresight_device coresight;
 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
+	struct kbase_csf_user_reg user_reg;
 };
 
 /**
@@ -1613,6 +1645,10 @@ struct kbase_csf_device {
  * @bf_data:           Data relating to Bus fault.
  * @gf_data:           Data relating to GPU fault.
  * @current_setup:     Stores the MMU configuration for this address space.
+ * @is_unresponsive:   Flag to indicate MMU is not responding.
+ *                     Set if a MMU command isn't completed within
+ *                     &kbase_device:mmu_as_inactive_wait_time_ms.
+ *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
  */
 struct kbase_as {
 	int number;
@@ -1624,6 +1660,7 @@ struct kbase_as {
 	struct kbase_fault bf_data;
 	struct kbase_fault gf_data;
 	struct kbase_mmu_setup current_setup;
+	bool is_unresponsive;
 };
 
 #endif /* _KBASE_CSF_DEFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 4dc9de4..d69a4d4 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -201,8 +201,8 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 	if (!interface)
 		return -EINVAL;
 
-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
+				      interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
 	if (reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -296,19 +296,41 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
 	wait_for_firmware_boot(kbdev);
 }
 
-static void wait_ready(struct kbase_device *kbdev)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev:        Kbase device to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command times out.
+ *
+ * Return:  0 on success, error code otherwise.
+ */
+static int wait_ready(struct kbase_device *kbdev)
 {
-	u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
-	u32 val;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	s64 diff;
 
-	val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+	do {
+		unsigned int i;
 
-	/* Wait for a while for the update command to take effect */
-	while (--max_loops && (val & AS_STATUS_AS_ACTIVE))
-		val = kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS));
+		for (i = 0; i < 1000; i++) {
+			/* Wait for the MMU status to indicate there is no active command */
+			if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+			      AS_STATUS_AS_ACTIVE))
+				return 0;
+		}
+
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < mmu_as_inactive_wait_time_ms);
 
-	if (max_loops == 0)
-		dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+	dev_err(kbdev->dev,
+		"AS_ACTIVE bit stuck for MCU AS. Might be caused by unstable GPU clk/pwr or faulty system");
+
+	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu_locked(kbdev);
+
+	return -ETIMEDOUT;
 }
 
 static void unload_mmu_tables(struct kbase_device *kbdev)
@@ -323,7 +345,7 @@ static void unload_mmu_tables(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 }
 
-static void load_mmu_tables(struct kbase_device *kbdev)
+static int load_mmu_tables(struct kbase_device *kbdev)
 {
 	unsigned long irq_flags;
 
@@ -334,7 +356,7 @@ static void load_mmu_tables(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
 	/* Wait for a while for the update command to take effect */
-	wait_ready(kbdev);
+	return wait_ready(kbdev);
 }
 
 /**
@@ -695,7 +717,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 			ret = kbase_mem_pool_alloc_pages(
 				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
 							    is_small_page),
-				num_pages_aligned, phys, false);
+				num_pages_aligned, phys, false, NULL);
 			ignore_page_migration = false;
 		}
 	}
@@ -2240,6 +2262,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+	INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -2403,7 +2426,9 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	kbase_pm_wait_for_l2_powered(kbdev);
 
 	/* Load the MMU tables into the selected address space */
-	load_mmu_tables(kbdev);
+	ret = load_mmu_tables(kbdev);
+	if (ret != 0)
+		goto err_out;
 
 	boot_csf_firmware(kbdev);
 
@@ -2445,9 +2470,8 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}
 
-#ifdef CONFIG_MALI_FW_CORE_DUMP
-	kbase_csf_firmware_core_dump_init(kbdev);
-#endif
+	if (kbdev->csf.fw_core_dump.available)
+		kbase_csf_firmware_core_dump_init(kbdev);
 
 	/* Firmware loaded successfully, ret = 0 */
 	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
@@ -3029,7 +3053,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 		goto page_list_alloc_error;
 
 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 
@@ -3040,8 +3064,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;
 
-	va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+					 KBASE_REG_ZONE_MCU_SHARED);
 	if (!va_reg)
 		goto va_region_alloc_error;
 
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 7976d90..37a7f21 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1124,6 +1124,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
+	INIT_LIST_HEAD(&kbdev->csf.user_reg.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
@@ -1569,7 +1570,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 		goto page_list_alloc_error;
 
 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 
@@ -1580,8 +1581,8 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;
 
-	va_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			num_pages, KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
+					 KBASE_REG_ZONE_MCU_SHARED);
 	if (!va_reg)
 		goto va_region_alloc_error;
 
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 42d19e1..7c14b8e 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -100,10 +100,10 @@ static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ct
 
 	lockdep_assert_held(&ctx_alloc->lock);
 
-	/* There is no need to take vm_lock here as the ctx_alloc region is no_user_free
-	 * refcounted. The region and the backing page can't disappear whilst this
-	 * function is executing.
-	 * Flush type is passed as FLUSH_PT to CLN+INV L2 only.
+	/* There is no need to take vm_lock here as the ctx_alloc region is protected
+	 * via a nonzero no_user_free_count. The region and the backing page can't
+	 * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
+	 * to CLN+INV L2 only.
 	 */
 	kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
 				heap_context_pa, ctx_alloc->heap_context_size_aligned,
@@ -181,14 +181,9 @@ void kbase_csf_heap_context_allocator_term(
 
 	if (ctx_alloc->region) {
 		kbase_gpu_vm_lock(kctx);
-		/*
-		 * We can't enforce (nor check) the no_user_free refcount
-		 * to be 0 here as other code regions can take such a reference.
-		 * Anyway, this isn't an issue as the region will eventually
-		 * be freed by the region tracker if its refcount didn't drop
-		 * to 0.
-		 */
-		kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
+		WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
+
+		kbase_va_region_no_user_free_dec(ctx_alloc->region);
 		kbase_mem_free_region(kctx, ctx_alloc->region);
 		kbase_gpu_vm_unlock(kctx);
 	}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 8c1fcdb..0797224 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -365,15 +365,16 @@ static int kbase_kcpu_jit_allocate_prepare(
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	void __user *data = u64_to_user_ptr(alloc_info->info);
-	struct base_jit_alloc_info *info;
+	struct base_jit_alloc_info *info = NULL;
 	u32 count = alloc_info->count;
 	int ret = 0;
 	u32 i;
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
-	if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
-			count > ARRAY_SIZE(kctx->jit_alloc)) {
+	if ((count == 0) || (count > ARRAY_SIZE(kctx->jit_alloc)) ||
+	    (count > kcpu_queue->kctx->jit_max_allocations) || (!data) ||
+	    !kbase_mem_allow_alloc(kctx)) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -610,6 +611,7 @@ out:
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 static int kbase_csf_queue_group_suspend_prepare(
 		struct kbase_kcpu_command_queue *kcpu_queue,
 		struct base_kcpu_command_group_suspend_info *suspend_buf,
@@ -681,8 +683,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 		    (kbase_reg_current_backed_size(reg) < nr_pages) ||
 		    !(reg->flags & KBASE_REG_CPU_WR) ||
 		    (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
-		    (kbase_is_region_shrinkable(reg)) ||
-		    (kbase_va_region_is_no_user_free(kctx, reg))) {
+		    (kbase_is_region_shrinkable(reg)) || (kbase_va_region_is_no_user_free(reg))) {
 			ret = -EINVAL;
 			goto out_clean_pages;
 		}
@@ -726,6 +727,7 @@ static int kbase_csf_queue_group_suspend_process(struct kbase_context *kctx,
 {
 	return kbase_csf_queue_group_suspend(kctx, sus_buf, group_handle);
 }
+#endif
 
 static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
 {
@@ -1037,9 +1039,12 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 				queue->kctx, cqs_wait_operation->objs[i].addr, &mapping);
 			u64 val = 0;
 
-			/* GPUCORE-28172 RDT to review */
-			if (!queue->command_started)
+			if (!queue->command_started) {
 				queue->command_started = true;
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(
+					kbdev, queue);
+			}
+
 
 			if (!evt) {
 				dev_warn(kbdev->dev,
@@ -1089,7 +1094,8 @@ static int kbase_kcpu_cqs_wait_operation_process(struct kbase_device *kbdev,
 					queue->has_error = true;
 				}
 
-				/* GPUCORE-28172 RDT to review */
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(
+					kbdev, queue, *(u32 *)evt);
 
 				queue->command_started = false;
 			}
@@ -1232,8 +1238,6 @@ static void kbase_kcpu_cqs_set_operation_process(
 		evt = (uintptr_t)kbase_phy_alloc_mapping_get(
 			queue->kctx, cqs_set_operation->objs[i].addr, &mapping);
 
-		/* GPUCORE-28172 RDT to review */
-
 		if (!evt) {
 			dev_warn(kbdev->dev,
 				"Sync memory %llx already freed", cqs_set_operation->objs[i].addr);
@@ -1258,7 +1262,8 @@ static void kbase_kcpu_cqs_set_operation_process(
 				break;
 			}
 
-			/* GPUCORE-28172 RDT to review */
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(
+				kbdev, queue, *(u32 *)evt ? 1 : 0);
 
 			/* Always propagate errors */
 			*(u32 *)evt = queue->has_error;
@@ -1622,11 +1627,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
 
 	/* Set reference to KCPU metadata and increment refcount */
 	kcpu_fence->metadata = kcpu_queue->metadata;
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount));
-#else
-	WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
-#endif
+	WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
 
 	/* create a sync_file fd representing the fence */
 	*sync_file = sync_file_create(fence_out);
@@ -2056,7 +2057,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 
 			break;
 		}
-		case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
+		case BASE_KCPU_COMMAND_TYPE_JIT_FREE: {
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
 
 			status = kbase_kcpu_jit_free_process(queue, cmd);
@@ -2066,6 +2067,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 				kbdev, queue);
 			break;
+		}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND: {
 			struct kbase_suspend_copy_buffer *sus_buf =
 					cmd->info.suspend_buf_copy.sus_buf;
@@ -2082,24 +2085,25 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
 					kbdev, queue, status);
+			}
 
-				if (!sus_buf->cpu_alloc) {
-					int i;
+			if (!sus_buf->cpu_alloc) {
+				int i;
 
-					for (i = 0; i < sus_buf->nr_pages; i++)
-						put_page(sus_buf->pages[i]);
-				} else {
-					kbase_mem_phy_alloc_kernel_unmapped(
-						sus_buf->cpu_alloc);
-					kbase_mem_phy_alloc_put(
-						sus_buf->cpu_alloc);
-				}
+				for (i = 0; i < sus_buf->nr_pages; i++)
+					put_page(sus_buf->pages[i]);
+			} else {
+				kbase_mem_phy_alloc_kernel_unmapped(
+					sus_buf->cpu_alloc);
+				kbase_mem_phy_alloc_put(
+					sus_buf->cpu_alloc);
 			}
 
 			kfree(sus_buf->pages);
 			kfree(sus_buf);
 			break;
 		}
+#endif
 		default:
 			dev_dbg(kbdev->dev,
 				"Unrecognized command type");
@@ -2174,12 +2178,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 	}
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
 	{
-		/* GPUCORE-28172 RDT to review */
+		const struct base_cqs_wait_operation_info *waits =
+			cmd->info.cqs_wait_operation.objs;
+		u32 inherit_err_flags = cmd->info.cqs_wait_operation.inherit_err_flags;
+		unsigned int i;
+
+		for (i = 0; i < cmd->info.cqs_wait_operation.nr_objs; i++) {
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(
+				kbdev, queue, waits[i].addr, waits[i].val,
+				waits[i].operation, waits[i].data_type,
+				(inherit_err_flags & ((uint32_t)1 << i)) ? 1 : 0);
+		}
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
 	{
-		/* GPUCORE-28172 RDT to review */
+		const struct base_cqs_set_operation_info *sets = cmd->info.cqs_set_operation.objs;
+		unsigned int i;
+
+		for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(
+				kbdev, queue, sets[i].addr, sets[i].val,
+				sets[i].operation, sets[i].data_type);
+		}
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
@@ -2226,11 +2247,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		break;
 	}
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
 			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
 			cmd->info.suspend_buf_copy.group_handle);
 		break;
+#endif
 	default:
 		dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
 		break;
@@ -2387,11 +2410,13 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
 			ret = kbase_kcpu_jit_free_prepare(queue,
 					&command.info.jit_free, kcpu_cmd);
 			break;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
 			ret = kbase_csf_queue_group_suspend_prepare(queue,
 					&command.info.suspend_buf_copy,
 					kcpu_cmd);
 			break;
+#endif
 		default:
 			dev_dbg(queue->kctx->kbdev->dev,
 				"Unknown command type %u", command.type);
@@ -2467,6 +2492,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 {
 	struct kbase_kcpu_command_queue *queue;
 	int idx;
+	int n;
 	int ret = 0;
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
@@ -2519,6 +2545,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 
 	metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
 	if (!metadata) {
+		destroy_workqueue(queue->wq);
 		kfree(queue);
 		ret = -ENOMEM;
 		goto out;
@@ -2526,14 +2553,17 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 
 	metadata->kbdev = kctx->kbdev;
 	metadata->kctx_id = kctx->id;
-	snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id,
-		 kctx->tgid, kctx->id, queue->fence_context);
+	n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu",
+		     kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
+	if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+		destroy_workqueue(queue->wq);
+		kfree(queue);
+		kfree(metadata);
+		ret = -EINVAL;
+		goto out;
+	}
 
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_set(&metadata->refcount, 1);
-#else
-	refcount_set(&metadata->refcount, 1);
-#endif
+	kbase_refcount_set(&metadata->refcount, 1);
 	queue->metadata = metadata;
 	atomic_inc(&kctx->kbdev->live_fence_metadata);
 #endif /* CONFIG_SYNC_FILE */
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index b8099fd..6d5145e 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -186,6 +186,7 @@ struct kbase_suspend_copy_buffer {
 	struct kbase_mem_phy_alloc *cpu_alloc;
 };
 
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 /**
  * struct kbase_kcpu_command_group_suspend_info - structure which contains
  *		suspend buffer data captured for a suspended queue group.
@@ -198,6 +199,7 @@ struct kbase_kcpu_command_group_suspend_info {
 	struct kbase_suspend_copy_buffer *sus_buf;
 	u8 group_handle;
 };
+#endif
 
 
 /**
@@ -232,7 +234,9 @@ struct kbase_kcpu_command {
 		struct kbase_kcpu_command_import_info import;
 		struct kbase_kcpu_command_jit_alloc_info jit_alloc;
 		struct kbase_kcpu_command_jit_free_info jit_free;
+#if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
 		struct kbase_kcpu_command_group_suspend_info suspend_buf_copy;
+#endif
 	} info;
 };
 
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
index 77e19db..4056a9d 100644
--- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -613,7 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 	int err, i;
 
 	INIT_LIST_HEAD(&csg_reg->link);
-	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
+	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
 				      KBASE_REG_ZONE_MCU_SHARED);
 
 	if (!reg) {
@@ -668,16 +668,17 @@ fail_userio_pages_map_fail:
 	while (i-- > 0) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
 		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+					 MCU_AS_NR, true);
 	}
 
 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 fail_pmod_map_fail:
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 fail_susp_map_fail:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -701,15 +702,16 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
 	for (i = 0; i < nr_csis; i++) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
 		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR, true);
+					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
+					 MCU_AS_NR, true);
 	}
 
 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
 	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, MCU_AS_NR, true);
+				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
 
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -738,7 +740,7 @@ int kbase_csf_mcu_shared_regs_data_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 
 	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1,
-				       &shared_regs->dummy_phys[0], false) <= 0)
+				       &shared_regs->dummy_phys[0], false, NULL) <= 0)
 		return -ENOMEM;
 
 	shared_regs->dummy_phys_allocated = true;
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index 82389e5..b5bf7bb 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,10 +31,6 @@
  * Begin register sets
  */
 
-/* DOORBELLS base address */
-#define DOORBELLS_BASE 0x0080000
-#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
-
 /* CS_KERNEL_INPUT_BLOCK base address */
 #define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
 #define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
@@ -71,10 +67,6 @@
 #define GLB_OUTPUT_BLOCK_BASE 0x0000
 #define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
 
-/* USER base address */
-#define USER_BASE 0x0010000
-#define USER_REG(r) (USER_BASE + (r))
-
 /* End register sets */
 
 /*
@@ -267,9 +259,6 @@
 #define GLB_DEBUG_ARG_OUT0 0x0FE0
 #endif /* CONFIG_MALI_CORESIGHT */
 
-/* USER register offsets */
-#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
-
 /* End register offsets */
 
 /* CS_KERNEL_INPUT_BLOCK register set definitions */
@@ -728,6 +717,27 @@
 #define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
 #define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
 #define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0 0xC0
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1 0xC1
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2 0xC2
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3 0xC3
+#define CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4 0xC4
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0 0xC8
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1 0xC9
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2 0xCA
+#define CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3 0xCB
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1 0xD9
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2 0xDA
+#define CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3 0xDB
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN 0xE0
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0 0xE4
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1 0xE5
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2 0xE6
+#define CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3 0xE7
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0 0xE8
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1 0xE9
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2 0xEA
+#define CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3 0xEB
 /* End of CS_FAULT_EXCEPTION_TYPE values */
 #define CS_FAULT_EXCEPTION_DATA_SHIFT 8
 #define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 755df75..bbae94a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1554,11 +1554,13 @@ static void program_cs(struct kbase_device *kbdev,
 	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
-	assign_user_doorbell_to_queue(kbdev, queue);
-	if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
-		return;
+	if (queue->enabled) {
+		assign_user_doorbell_to_queue(kbdev, queue);
+		if (queue->doorbell_nr == KBASEP_USER_DB_NR_INVALID)
+			return;
 
-	WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+		WARN_ON(queue->doorbell_nr != queue->group->doorbell_nr);
+	}
 
 	if (queue->enabled && queue_group_suspended_locked(group))
 		program_cs_extract_init(queue);
@@ -1860,6 +1862,7 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 		unsigned long flags;
 		struct kbase_csf_cmd_stream_group_info *ginfo =
 						&global_iface->groups[slot];
+
 		u32 halt_cmd = suspend ? CSG_REQ_STATE_SUSPEND :
 					 CSG_REQ_STATE_TERMINATE;
 
@@ -1877,8 +1880,8 @@ static void halt_csg_slot(struct kbase_queue_group *group, bool suspend)
 		csg_slot[slot].trigger_jiffies = jiffies;
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_STOP_REQ, group, halt_cmd);
 
-		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(
-			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot);
+		KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(
+			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, slot, suspend);
 	}
 }
 
@@ -3433,6 +3436,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask);
 
+				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
 				if (likely(group)) {
 					bool as_fault;
 					/* Only do save/cleanup if the
@@ -5071,6 +5077,9 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask_local);
 
+				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
+
 				group = scheduler->csg_slots[i].resident_group;
 				if (likely(group)) {
 					/* Only do save/cleanup if the
@@ -5129,8 +5138,13 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
 
 	if (all_addr_spaces_used) {
 		for (i = 0; i != total_csg_slots; ++i) {
-			if (scheduler->csg_slots[i].resident_group != NULL)
+			if (scheduler->csg_slots[i].resident_group != NULL) {
+				if (WARN_ON(scheduler->csg_slots[i].resident_group->kctx->as_nr <
+					    0))
+					continue;
+
 				as_usage[scheduler->csg_slots[i].resident_group->kctx->as_nr]++;
+			}
 		}
 	}
 
@@ -5151,6 +5165,9 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
 		    (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
 		    ((lru_idle_group == NULL) ||
 		     (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
+			if (WARN_ON(group->kctx->as_nr < 0))
+				continue;
+
 			/* If all address spaces are used, we need to ensure the group does not
 			 * share the AS with other active CSGs. Or CSG would be freed without AS
 			 * and this optimization would not work.
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 14d8097..8072a8b 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -228,11 +228,11 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	kbase_vunmap(kctx, &chunk->map);
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
-	 * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+	 * For "no user free count", we check that the count is 1 as it is a shrinkable region;
 	 * no other code part within kbase can take a reference to it.
 	 */
-	WARN_ON(chunk->region->no_user_free_refcnt > 1);
-	kbase_va_region_no_user_free_put(kctx, chunk->region);
+	WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
+	kbase_va_region_no_user_free_dec(chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
@@ -315,8 +315,8 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	 * It should be fine and not a security risk if we let the region leak till
 	 * region tracker termination in such a case.
 	 */
-	if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
-		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+	if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
 		goto unroll_region;
 	}
 
@@ -371,7 +371,7 @@ unroll_region:
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
 	 */
-	kbase_va_region_no_user_free_put(kctx, chunk->region);
+	kbase_va_region_no_user_free_dec(chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
@@ -531,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 	if (heap->buf_desc_reg) {
 		kbase_vunmap(kctx, &heap->buf_desc_map);
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 
@@ -741,7 +741,8 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 		 */
 
 		heap->buf_desc_va = buf_desc_va;
-		heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
+		heap->buf_desc_reg = buf_desc_reg;
+		kbase_va_region_no_user_free_inc(buf_desc_reg);
 
 		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
@@ -834,7 +835,7 @@ heap_context_alloc_failed:
 buf_desc_vmap_failed:
 	if (heap->buf_desc_reg) {
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
+		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 buf_desc_not_suitable:
@@ -967,7 +968,12 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 
 	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
 	if (unlikely(err)) {
-		dev_err(kctx->kbdev->dev,
+		/* The allocation request can be legitimate, but be invoked on a heap
+		 * that has already reached the maximum pre-configured capacity. This
+		 * is useful debug information, but should not be treated as an error,
+		 * since the request will be re-sent at a later point.
+		 */
+		dev_dbg(kctx->kbdev->dev,
 			"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
 			gpu_heap_va, err);
 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 162b40f..910ba22 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,9 +31,7 @@
 #include "mali_kbase_pm.h"
 #include "mali_kbase_hwaccess_time.h"
 
-#include <linux/gcd.h>
 #include <linux/math64.h>
-#include <asm/arch_timer.h>
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 #include "tl/mali_kbase_timeline_priv.h"
@@ -98,81 +96,6 @@ void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
 #endif
 
 /**
- * get_cpu_gpu_time() - Get current CPU and GPU timestamps.
- *
- * @kbdev:	Kbase device.
- * @cpu_ts:	Output CPU timestamp.
- * @gpu_ts:	Output GPU timestamp.
- * @gpu_cycle:  Output GPU cycle counts.
- */
-static void get_cpu_gpu_time(
-	struct kbase_device *kbdev,
-	u64 *cpu_ts,
-	u64 *gpu_ts,
-	u64 *gpu_cycle)
-{
-	struct timespec64 ts;
-
-	kbase_pm_context_active(kbdev);
-	kbase_backend_get_gpu_time(kbdev, gpu_cycle, gpu_ts, &ts);
-	kbase_pm_context_idle(kbdev);
-
-	if (cpu_ts)
-		*cpu_ts = ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
-}
-
-
-/**
- * kbase_ts_converter_init() - Initialize system timestamp converter.
- *
- * @self:	System Timestamp Converter instance.
- * @kbdev:	Kbase device pointer
- *
- * Return: Zero on success, -1 otherwise.
- */
-static int kbase_ts_converter_init(
-	struct kbase_ts_converter *self,
-	struct kbase_device *kbdev)
-{
-	u64 cpu_ts = 0;
-	u64 gpu_ts = 0;
-	u64 freq;
-	u64 common_factor;
-
-	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
-	freq = arch_timer_get_cntfrq();
-
-	if (!freq) {
-		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
-		return -1;
-	}
-
-	common_factor = gcd(NSEC_PER_SEC, freq);
-
-	self->multiplier = div64_u64(NSEC_PER_SEC, common_factor);
-	self->divisor = div64_u64(freq, common_factor);
-	self->offset =
-		cpu_ts - div64_u64(gpu_ts * self->multiplier, self->divisor);
-
-	return 0;
-}
-
-/**
- * kbase_ts_converter_convert() - Convert GPU timestamp to CPU timestamp.
- *
- * @self:	System Timestamp Converter instance.
- * @gpu_ts:	System timestamp value to converter.
- *
- * Return: The CPU timestamp.
- */
-static u64 __maybe_unused
-kbase_ts_converter_convert(const struct kbase_ts_converter *self, u64 gpu_ts)
-{
-	return div64_u64(gpu_ts * self->multiplier, self->divisor) +
-		  self->offset;
-}
-
-/**
  * tl_reader_overflow_notify() - Emit stream overflow tracepoint.
  *
  * @self:		CSFFW TL Reader instance.
@@ -322,8 +245,8 @@ int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
 		{
 			struct kbase_csffw_tl_message *msg =
 				(struct kbase_csffw_tl_message *) csffw_data_it;
-			msg->timestamp = kbase_ts_converter_convert(&self->ts_converter,
-						   msg->timestamp);
+			msg->timestamp =
+				kbase_backend_time_convert_gpu_to_cpu(kbdev, msg->timestamp);
 		}
 
 		/* Copy the message out to the tl_stream. */
@@ -397,9 +320,6 @@ static int tl_reader_init_late(
 		return -1;
 	}
 
-	if (kbase_ts_converter_init(&self->ts_converter, kbdev))
-		return -1;
-
 	self->kbdev = kbdev;
 	self->trace_buffer = tb;
 	self->tl_header.data = hdr;
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index d554d56..12b285f 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -40,37 +40,6 @@ struct kbase_tlstream;
 struct kbase_device;
 
 /**
- * struct kbase_ts_converter - System timestamp to CPU timestamp converter state.
- *
- * @multiplier:		Numerator of the converter's fraction.
- * @divisor:		Denominator of the converter's fraction.
- * @offset:		Converter's offset term.
- *
- * According to Generic timer spec, system timer:
- * - Increments at a fixed frequency
- * - Starts operating from zero
- *
- * Hence CPU time is a linear function of System Time.
- *
- * CPU_ts = alpha * SYS_ts + beta
- *
- * Where
- * - alpha = 10^9/SYS_ts_freq
- * - beta is calculated by two timer samples taken at the same time:
- *   beta = CPU_ts_s - SYS_ts_s * alpha
- *
- * Since alpha is a rational number, we minimizing possible
- * rounding error by simplifying the ratio. Thus alpha is stored
- * as a simple `multiplier / divisor` ratio.
- *
- */
-struct kbase_ts_converter {
-	u64 multiplier;
-	u64 divisor;
-	s64 offset;
-};
-
-/**
  * struct kbase_csf_tl_reader - CSFFW timeline reader state.
  *
  * @read_timer:        Timer used for periodical tracebufer reading.
@@ -106,7 +75,6 @@ struct kbase_csf_tl_reader {
 		size_t size;
 		size_t btc;
 	} tl_header;
-	struct kbase_ts_converter ts_converter;
 
 	bool got_first_event;
 	bool is_active;
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 217a056..492684f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -123,6 +123,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;
 
+	err = kbase_backend_time_init(kbdev);
+	if (err)
+		goto fail_update_l2_features;
+
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
 	kbase_pm_context_idle(kbdev);
@@ -285,8 +289,10 @@ static const struct kbase_device_init dev_init[] = {
 	  "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
-	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+	{ registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
 	{ power_control_init, power_control_term, "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
@@ -359,7 +365,6 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
 
 void kbase_device_term(struct kbase_device *kbdev)
 {
-	kbdev->csf.mali_file_inode = NULL;
 	kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
 	kbase_mem_halt(kbdev);
 }
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index c104fa4..b46180f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -100,6 +100,10 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;
 
+	err = kbase_backend_time_init(kbdev);
+	if (err)
+		goto fail_update_l2_features;
+
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
 	/* Idle the GPU and/or cores, if the policy wants it to */
@@ -211,17 +215,19 @@ static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbd
 
 static const struct kbase_device_init dev_init[] = {
 #if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
-	  "Dummy model initialization failed" },
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
-	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+	{ registers_map, registers_unmap, "Register map failed" },
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
 	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
 	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
+	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -237,7 +243,6 @@ static const struct kbase_device_init dev_init[] = {
 	  "Timeline stream initialization failed" },
 	{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
 	  "Clock rate trace manager initialization failed" },
-	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_instr_backend_init, kbase_instr_backend_term,
 	  "Instrumentation backend initialization failed" },
 	{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 4f5ac22..15839ae 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
 #include <mali_kbase_hwaccess_instr.h>
+#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_hw.h>
 #include <mali_kbase_config_defaults.h>
 #include <linux/priority_control_manager.h>
@@ -308,7 +309,8 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 #endif /* MALI_USE_CSF */
 
 	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-
+	kbdev->mmu_as_inactive_wait_time_ms =
+		kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
 	mutex_init(&kbdev->kctx_list_lock);
 	INIT_LIST_HEAD(&kbdev->kctx_list);
 
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index 15bfd03..60ba9be 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -105,6 +105,70 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
 	case GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT:
 		e = "GPU_CACHEABILITY_FAULT";
 		break;
+	/* MMU Fault */
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L0:
+		e = "TRANSLATION_FAULT at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L1:
+		e = "TRANSLATION_FAULT at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L2:
+		e = "TRANSLATION_FAULT at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L3:
+		e = "TRANSLATION_FAULT at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_TRANSLATION_FAULT_L4:
+		e = "TRANSLATION_FAULT";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_0:
+		e = "PERMISSION_FAULT at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_1:
+		e = "PERMISSION_FAULT at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_2:
+		e = "PERMISSION_FAULT at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_PERMISSION_FAULT_3:
+		e = "PERMISSION_FAULT at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_1:
+		e = "ACCESS_FLAG at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_2:
+		e = "ACCESS_FLAG at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ACCESS_FLAG_3:
+		e = "ACCESS_FLAG at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_IN:
+		e = "ADDRESS_SIZE_FAULT_IN";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_0:
+		e = "ADDRESS_SIZE_FAULT_OUT_0 at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_1:
+		e = "ADDRESS_SIZE_FAULT_OUT_1 at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_2:
+		e = "ADDRESS_SIZE_FAULT_OUT_2 at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_ADDRESS_SIZE_FAULT_OUT_3:
+		e = "ADDRESS_SIZE_FAULT_OUT_3 at level 3";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_0:
+		e = "MEMORY_ATTRIBUTE_FAULT_0 at level 0";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_1:
+		e = "MEMORY_ATTRIBUTE_FAULT_1 at level 1";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_2:
+		e = "MEMORY_ATTRIBUTE_FAULT_2 at level 2";
+		break;
+	case CS_FAULT_EXCEPTION_TYPE_MEMORY_ATTRIBUTE_FAULT_3:
+		e = "MEMORY_ATTRIBUTE_FAULT_3 at level 3";
+		break;
 	/* Any other exception code is unknown */
 	default:
 		e = "UNKNOWN";
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index 380ec30..f86f493 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -108,7 +108,6 @@
 #define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
 #define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
 
-#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
 #define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
 #define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
 #define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
@@ -125,8 +124,6 @@
 #define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
 #define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
 
-#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
 #define JS_XAFFINITY           0x1C /* (RO) Extended affinity mask for job slot n*/
 
 #define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h
index 8b50a5d..6a937a5 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_fault.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,9 +27,9 @@
  *
  * @exception_code: exception code
  *
- * This function is called from the interrupt handler when a GPU fault occurs.
+ * This function is called by error handlers when GPU reports an error.
  *
- * Return: name associated with the exception code
+ * Return: Error string associated with the exception code
  */
 const char *kbase_gpu_exception_name(u32 exception_code);
 
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index 907a872..e51791f 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -51,9 +51,7 @@
 #define MMU_FEATURES            0x014   /* (RO) MMU features */
 #define AS_PRESENT              0x018   /* (RO) Address space slots present */
 #define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
-#define GPU_IRQ_CLEAR           0x024   /* (WO) */
 #define GPU_IRQ_MASK            0x028   /* (RW) */
-#define GPU_IRQ_STATUS          0x02C   /* (RO) */
 
 #define GPU_COMMAND             0x030   /* (WO) */
 #define GPU_STATUS              0x034   /* (RO) */
@@ -176,14 +174,9 @@
 /* Job control registers */
 
 #define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
-#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
 
 /* MMU control registers */
 
-#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
-
 #define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
 #define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
 #define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index 424a360..27acfc6 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -289,6 +289,8 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe
 	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
 	size_t clk;
 
+	memset(cycle_counts, 0, sizeof(cycle_counts));
+
 	/* Read cycle count from CSF interface for both clock domains. */
 	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
 						       clk_enable_map);
@@ -308,6 +310,8 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b
 	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
 	size_t clk;
 
+	memset(cycle_counts, 0, sizeof(cycle_counts));
+
 	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
 
 	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
@@ -558,7 +562,7 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 							u32 insert_index_to_stop)
 {
 	u32 raw_idx;
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
 	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
 	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
@@ -639,7 +643,7 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
 {
 	struct kbase_hwcnt_backend_csf_info *csf_info = info;
 	struct kbase_hwcnt_backend_csf *backend_csf;
-	unsigned long flags;
+	unsigned long flags = 0UL;
 
 	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
 
@@ -658,8 +662,8 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
 	    /* 3. dump state indicates no other dumping is in progress. */
 	    ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
 	     (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
-		u32 extract_index;
-		u32 insert_index;
+		u32 extract_index = 0U;
+		u32 insert_index = 0U;
 
 		/* Read the raw extract and insert indexes from the CSF interface. */
 		csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
@@ -700,11 +704,11 @@ static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
  */
 static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
 {
-	unsigned long flags;
+	unsigned long flags = 0ULL;
 	struct kbase_hwcnt_backend_csf *backend_csf;
 	u32 insert_index_to_acc;
-	u32 extract_index;
-	u32 insert_index;
+	u32 extract_index = 0U;
+	u32 insert_index = 0U;
 
 	WARN_ON(!work);
 	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
@@ -776,10 +780,10 @@ static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
  */
 static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
 {
-	unsigned long flags;
+	unsigned long flags = 0ULL;
 	struct kbase_hwcnt_backend_csf *backend_csf;
-	u32 extract_index;
-	u32 insert_index;
+	u32 extract_index = 0U;
+	u32 insert_index = 0U;
 
 	WARN_ON(!work);
 
@@ -920,7 +924,7 @@ static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *back
 						const struct kbase_hwcnt_enable_map *enable_map)
 {
 	int errcode;
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 
 	if (!backend_csf)
@@ -954,7 +958,7 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
 /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
 static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	bool do_disable = false;
 
@@ -1050,7 +1054,7 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
 static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
 						 u64 *dump_time_ns)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	bool do_request = false;
 	bool watchdog_dumping = false;
@@ -1157,7 +1161,7 @@ static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *bac
 /* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
 static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 	int errcode;
 
@@ -1365,7 +1369,7 @@ alloc_error:
 static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
 					 struct kbase_hwcnt_backend **out_backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
 	struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
 	int errcode;
@@ -1407,7 +1411,7 @@ static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *
 /* CSF backend implementation of kbase_hwcnt_backend_term_fn */
 static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
 
 	if (!backend)
@@ -1619,7 +1623,7 @@ void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *
 
 void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf_info *csf_info;
 
 	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
@@ -1639,7 +1643,7 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i
 
 void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
 {
-	unsigned long flags;
+	unsigned long flags = 0UL;
 	struct kbase_hwcnt_backend_csf_info *csf_info;
 	struct kbase_hwcnt_backend_csf *backend_csf;
 
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 124fd4c..e4a963d 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -329,7 +329,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
 
 	/* Get physical page for the buffer */
 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
-					 phys, false);
+					 phys, false, NULL);
 	if (ret != num_pages)
 		goto phys_mem_pool_alloc_error;
 
@@ -482,7 +482,8 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 
 		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
 						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
-						 fw_ring_buf->num_pages, MCU_AS_NR, true));
+						 fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+						 MCU_AS_NR, true));
 
 		vunmap(fw_ring_buf->cpu_dump_base);
 
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
index e724572..34deb5d 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -362,7 +362,7 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
 	bool cur_map_any_enabled;
 	struct kbase_hwcnt_enable_map *cur_map;
 	bool new_map_any_enabled = false;
-	u64 dump_time_ns;
+	u64 dump_time_ns = 0;
 	struct kbase_hwcnt_accumulator *accum;
 
 	WARN_ON(!hctx);
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index cd5a9bf..5a204ae 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -455,16 +455,14 @@ static const struct kbase_ipa_group ipa_groups_def_tbax[] = {
 	},
 };
 
-
-#define IPA_POWER_MODEL_OPS(gpu, init_token) \
-	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
-		.name = "mali-" #gpu "-power-model", \
-		.init = kbase_ ## init_token ## _power_model_init, \
-		.term = kbase_ipa_vinstr_common_model_term, \
-		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff, \
-		.reset_counter_data = kbase_ipa_vinstr_reset_data, \
-	}; \
-	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+#define IPA_POWER_MODEL_OPS(gpu, init_token)                                                       \
+	static const struct kbase_ipa_model_ops kbase_##gpu##_ipa_model_ops = {                    \
+		.name = "mali-" #gpu "-power-model",                                               \
+		.init = kbase_##init_token##_power_model_init,                                     \
+		.term = kbase_ipa_vinstr_common_model_term,                                        \
+		.get_dynamic_coeff = kbase_ipa_vinstr_dynamic_coeff,                               \
+		.reset_counter_data = kbase_ipa_vinstr_reset_data,                                 \
+	}
 
 #define STANDARD_POWER_MODEL(gpu, reference_voltage) \
 	static int kbase_ ## gpu ## _power_model_init(\
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index fe8995a..debc3ad 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -127,10 +127,17 @@
 /**
  * enum kbase_timeout_selector - The choice of which timeout to get scaled
  *                               using the lowest GPU frequency.
+ * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
+ *                                of a MMU operation
+ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
+ *                              to be updated on HW side so a Job Slot is
+ *                              considered free.
  * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
  *                                the enum.
  */
 enum kbase_timeout_selector {
+	MMU_AS_INACTIVE_WAIT_TIMEOUT,
+	JM_DEFAULT_JS_FREE_TIMEOUT,
 
 	/* Must be the last in the enum */
 	KBASE_TIMEOUT_SELECTOR_COUNT
@@ -852,6 +859,10 @@ struct jsctx_queue {
  * @pf_data:           Data relating to Page fault.
  * @bf_data:           Data relating to Bus fault.
  * @current_setup:     Stores the MMU configuration for this address space.
+ * @is_unresponsive:   Flag to indicate MMU is not responding.
+ *                     Set if a MMU command isn't completed within
+ *                     &kbase_device:mmu_as_inactive_wait_time_ms.
+ *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
  */
 struct kbase_as {
 	int number;
@@ -861,6 +872,7 @@ struct kbase_as {
 	struct kbase_fault pf_data;
 	struct kbase_fault bf_data;
 	struct kbase_mmu_setup current_setup;
+	bool is_unresponsive;
 };
 
 #endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 924a685..2b93d3d 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -277,6 +277,7 @@ typedef u32 kbase_atom_ordering_flag_t;
  * @nr_contexts_runnable:Number of contexts that can either be pulled from or
  *                       arecurrently running
  * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
+ * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
  * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
  *               independently of the Run Pool.
  *               Of course, you don't need the Run Pool lock to access this.
@@ -329,6 +330,8 @@ struct kbasep_js_device_data {
 	u32 nr_contexts_pullable;
 	atomic_t nr_contexts_runnable;
 	atomic_t soft_job_timeout_ms;
+	u32 js_free_wait_time_ms;
+
 	struct mutex queue_mutex;
 	/*
 	 * Run Pool mutex, for managing contexts within the runpool.
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index c6fea79..11aedef 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -40,6 +40,7 @@ enum base_hw_feature {
 	BASE_HW_FEATURE_FLUSH_INV_SHADER_OTHER,
 	BASE_HW_FEATURE_CORE_FEATURES,
 	BASE_HW_FEATURE_PBHA_HWU,
+	BASE_HW_FEATURE_LARGE_PAGE_ALLOC,
 	BASE_HW_FEATURE_END
 };
 
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 2dc0402..0fbdec0 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -796,6 +796,19 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2
 	BASE_HW_ISSUE_END
 };
 
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p3[] = {
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_TURSEHW_2716,
+	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_TITANHW_2710,
+	BASE_HW_ISSUE_TITANHW_2679,
+	BASE_HW_ISSUE_GPU2022PRO_148,
+	BASE_HW_ISSUE_END
+};
+
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 5035ed5..96529a3 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -221,6 +221,16 @@ enum {
  */
 #define JM_DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
 
+/* Default timeout in clock cycles to be used when checking if JS_COMMAND_NEXT
+ * is updated on HW side so a Job Slot is considered free.
+ * This timeout will only take effect on GPUs with low value for the minimum
+ * GPU clock frequency (<= 100MHz).
+ *
+ * Based on 1ms timeout at 100MHz. Will default to 0ms on GPUs with higher
+ * value for minimum GPU clock frequency.
+ */
+#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
+
 #endif /* MALI_USE_CSF */
 
 /* Default timeslice that a context is scheduled in for, in nanoseconds.
@@ -257,5 +267,12 @@ enum {
  */
 #define DEFAULT_IR_THRESHOLD (192)
 
+/* Waiting time in clock cycles for the completion of a MMU operation.
+ *
+ * Ideally 1.6M GPU cycles required for the L2 cache (512KiB slice) flush.
+ *
+ * As a pessimistic value, 50M GPU cycles ( > 30 times bigger ) is chosen.
+ * It corresponds to 0.5s in GPU @ 100Mhz.
+ */
+#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
 #endif /* _KBASE_CONFIG_DEFAULTS_H_ */
-
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index a8f8791..4179091 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1565,7 +1565,6 @@ static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
 					cpu_queue_info->size);
 }
 
-#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 static int kbase_ioctl_read_user_page(struct kbase_context *kctx,
 				      union kbase_ioctl_read_user_page *user_page)
 {
@@ -2051,6 +2050,7 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				struct kbase_ioctl_cs_cpu_queue_info,
 				kctx);
 		break;
+	/* This IOCTL will be kept for backward compatibility */
 	case KBASE_IOCTL_READ_USER_PAGE:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_READ_USER_PAGE, kbase_ioctl_read_user_page,
 					 union kbase_ioctl_read_user_page, kctx);
@@ -2217,7 +2217,10 @@ KBASE_EXPORT_TEST_API(kbase_event_wakeup);
 #if MALI_USE_CSF
 int kbase_event_pending(struct kbase_context *ctx)
 {
-	WARN_ON_ONCE(!ctx);
+	KBASE_DEBUG_ASSERT(ctx);
+
+	if (unlikely(!ctx))
+		return -EPERM;
 
 	return (atomic_read(&ctx->event_count) != 0) ||
 		kbase_csf_event_error_pending(ctx) ||
@@ -2228,6 +2231,9 @@ int kbase_event_pending(struct kbase_context *ctx)
 {
 	KBASE_DEBUG_ASSERT(ctx);
 
+	if (unlikely(!ctx))
+		return -EPERM;
+
 	return (atomic_read(&ctx->event_count) != 0) ||
 		(atomic_read(&ctx->event_closed) != 0);
 }
@@ -4276,7 +4282,7 @@ void kbase_protected_mode_term(struct kbase_device *kbdev)
 	kfree(kbdev->protected_dev);
 }
 
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	return 0;
@@ -4284,7 +4290,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
 static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 {
 }
-#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#else /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
 static int kbase_common_reg_map(struct kbase_device *kbdev)
 {
 	int err = 0;
@@ -4320,7 +4326,7 @@ static void kbase_common_reg_unmap(struct kbase_device * const kbdev)
 		kbdev->reg_size = 0;
 	}
 }
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
+#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
 
 int registers_map(struct kbase_device * const kbdev)
 {
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index beb2928..dc6feb9 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -242,6 +242,7 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
 	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
 		struct kbase_context *kctx;
 
+		kbdev->as[i].is_unresponsive = false;
 #if MALI_USE_CSF
 		if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
 			kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c
index 598d8f5..418bb19 100644
--- a/mali_kbase/mali_kbase_debug_mem_allocs.c
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -26,7 +26,7 @@
 #include "mali_kbase_debug_mem_allocs.h"
 #include "mali_kbase.h"
 
-#include <string.h>
+#include <linux/string.h>
 #include <linux/list.h>
 #include <linux/file.h>
 
diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c
index 4c1aa28..c846491 100644
--- a/mali_kbase/mali_kbase_debugfs_helper.c
+++ b/mali_kbase/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -90,11 +90,10 @@ set_attr_from_string(char *const buf, void *const array, size_t const nelems,
 
 int kbase_debugfs_string_validator(char *const buf)
 {
-	size_t index;
 	int err = 0;
 	char *ptr = buf;
 
-	for (index = 0; *ptr; ++index) {
+	while (*ptr) {
 		unsigned long test_number;
 		size_t len;
 
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index e98ab45..6236f70 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -238,12 +238,25 @@ struct kbase_fault {
 	bool protected_mode;
 };
 
+/** Maximum number of memory pages that should be allocated for the array
+ * of pointers to free PGDs.
+ *
+ * This number has been pre-calculated to deal with the maximum allocation
+ * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
+ * This is supposed to be enough for almost the entirety of MMU operations.
+ * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
+ * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
+ * bytes.
+ *
+ * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
+ */
+#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
+
+/* Maximum number of pointers to free PGDs */
+#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
+
 /**
  * struct kbase_mmu_table  - object representing a set of GPU page tables
- * @mmu_teardown_pages:   Array containing pointers to 3 separate pages, used
- *                        to cache the entries of top (L0) & intermediate level
- *                        page tables (L1 & L2) to avoid repeated calls to
- *                        kmap_atomic() during the MMU teardown.
  * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
  *                        page tables
  * @pgd:                  Physical address of the page allocated for the top
@@ -255,14 +268,40 @@ struct kbase_fault {
  *                        Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  * @kctx:                 If this set of MMU tables belongs to a context then
  *                        this is a back-reference to the context, otherwise
- *                        it is NULL
+ *                        it is NULL.
+ * @scratch_mem:          Scratch memory used for MMU operations, which are
+ *                        serialized by the @mmu_lock.
  */
 struct kbase_mmu_table {
-	u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
 	struct mutex mmu_lock;
 	phys_addr_t pgd;
 	u8 group_id;
 	struct kbase_context *kctx;
+	union {
+		/**
+		 * @teardown_pages: Scratch memory used for backup copies of whole
+		 *                  PGD pages when tearing down levels upon
+		 *                  termination of the MMU table.
+		 */
+		struct {
+			/**
+			 * @levels: Array of PGD pages, large enough to copy one PGD
+			 *          for each level of the MMU table.
+			 */
+			u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
+		} teardown_pages;
+		/**
+		 * @free_pgds: Scratch memory user for insertion, update and teardown
+		 *             operations to store a temporary list of PGDs to be freed
+		 *             at the end of the operation.
+		 */
+		struct {
+			/** @pgds: Array of pointers to PGDs to free. */
+			struct page *pgds[MAX_FREE_PGDS];
+			/** @head_index: Index of first free element in the PGDs array. */
+			size_t head_index;
+		} free_pgds;
+	} scratch_mem;
 };
 
 /**
@@ -286,6 +325,8 @@ struct kbase_reg_zone {
 #include "jm/mali_kbase_jm_defs.h"
 #endif
 
+#include "mali_kbase_hwaccess_time.h"
+
 static inline int kbase_as_has_bus_fault(struct kbase_as *as,
 	struct kbase_fault *fault)
 {
@@ -754,6 +795,8 @@ struct kbase_mem_migrate {
  *                         GPU adrress spaces assigned to them.
  * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
  *                         register used in the handling of Bus & Page faults.
+ * @pagesize_2mb:          Boolean to determine whether 2MiB page sizes are
+ *                         supported and used where possible.
  * @gpu_props:             Object containing complete information about the
  *                         configuration/properties of GPU HW device in use.
  * @hw_issues_mask:        List of SW workarounds for HW issues
@@ -799,6 +842,7 @@ struct kbase_mem_migrate {
  *                         GPU reset.
  * @lowest_gpu_freq_khz:   Lowest frequency in KHz that the GPU can run at. Used
  *                         to calculate suitable timeouts for wait operations.
+ * @backend_time:          Kbase backend time related attributes.
  * @cache_clean_in_progress: Set when a cache clean has been started, and
  *                         cleared when it has finished. This prevents multiple
  *                         cache cleans being done simultaneously.
@@ -993,6 +1037,9 @@ struct kbase_mem_migrate {
  *                          KCPU queue. These structures may outlive kbase module
  *                          itself. Therefore, in such a case, a warning should be
  *                          be produced.
+ * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
+ *                          a MMU operation
+ * @va_region_slab:         kmem_cache (slab) for allocated kbase_va_region structures.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1049,6 +1096,8 @@ struct kbase_device {
 
 	spinlock_t mmu_mask_change;
 
+	bool pagesize_2mb;
+
 	struct kbase_gpu_props gpu_props;
 
 	unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -1102,6 +1151,10 @@ struct kbase_device {
 
 	u64 lowest_gpu_freq_khz;
 
+#if MALI_USE_CSF
+	struct kbase_backend_time backend_time;
+#endif
+
 	bool cache_clean_in_progress;
 	u32 cache_clean_queued;
 	wait_queue_head_t cache_clean_wait;
@@ -1283,6 +1336,8 @@ struct kbase_device {
 #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
 	atomic_t live_fence_metadata;
 #endif
+	u32 mmu_as_inactive_wait_time_ms;
+	struct kmem_cache *va_region_slab;
 };
 
 /**
@@ -1636,11 +1691,13 @@ struct kbase_sub_alloc {
  *                        is scheduled in and an atom is pulled from the context's per
  *                        slot runnable tree in JM GPU or GPU command queue
  *                        group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock:       lock used for handling of special tracking page.
  * @process_mm:           Pointer to the memory descriptor of the process which
  *                        created the context. Used for accounting the physical
  *                        pages used for GPU allocations, done for the context,
- *                        to the memory consumed by the process.
+ *                        to the memory consumed by the process. A reference is taken
+ *                        on this descriptor for the Userspace created contexts so that
+ *                        Kbase can safely access it to update the memory usage counters.
+ *                        The reference is dropped on context termination.
  * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
  * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
  *                        tiler heaps of the kbase context.
@@ -1762,6 +1819,10 @@ struct kbase_sub_alloc {
  * @limited_core_mask:    The mask that is applied to the affinity in case of atoms
  *                        marked with BASE_JD_REQ_LIMITED_CORE_MASK.
  * @platform_data:        Pointer to platform specific per-context data.
+ * @task:                 Pointer to the task structure of the main thread of the process
+ *                        that created the Kbase context. It would be set only for the
+ *                        contexts created by the Userspace and not for the contexts
+ *                        created internally by the Kbase.
  *
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
@@ -1849,8 +1910,7 @@ struct kbase_context {
 
 	atomic_t refcount;
 
-	spinlock_t         mm_update_lock;
-	struct mm_struct __rcu *process_mm;
+	struct mm_struct *process_mm;
 	u64 gpu_va_end;
 #if MALI_USE_CSF
 	u32 running_total_tiler_heap_nr_chunks;
@@ -1913,6 +1973,8 @@ struct kbase_context {
 #if !MALI_USE_CSF
 	void *platform_data;
 #endif
+
+	struct task_struct *task;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -2015,5 +2077,4 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 /* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
 #define KBASE_PRFCNT_ACTIVE_MAX_LOOPS   100000000
-
 #endif /* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 25986f6..f4507ac 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
+#include "mali_kbase_refcount_defs.h"
 
 #if MALI_USE_CSF
 /* Maximum number of characters in DMA fence timeline name. */
@@ -49,11 +50,7 @@
  * @timeline_name:  String of timeline name for associated fence object.
  */
 struct kbase_kcpu_dma_fence_meta {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	atomic_t refcount;
-#else
-	refcount_t refcount;
-#endif
+	kbase_refcount_t refcount;
 	struct kbase_device *kbdev;
 	int kctx_id;
 	char timeline_name[MAX_TIMELINE_NAME];
@@ -225,11 +222,7 @@ static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_f
 
 static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
 {
-#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-	if (atomic_dec_and_test(&metadata->refcount)) {
-#else
-	if (refcount_dec_and_test(&metadata->refcount)) {
-#endif
+	if (kbase_refcount_dec_and_test(&metadata->refcount)) {
 		atomic_dec(&metadata->kbdev->live_fence_metadata);
 		kfree(metadata);
 	}
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 16cccee..0eba889 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -125,14 +125,17 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx)
 	return 0;
 }
 
-
+#if (KERNEL_VERSION(5, 13, 0) <= LINUX_VERSION_CODE)
+static int list_cmp_function(void *priv, const struct list_head *a, const struct list_head *b)
+#else
 static int list_cmp_function(void *priv, struct list_head *a,
 				struct list_head *b)
+#endif
 {
-	struct kbasep_gwt_list_element *elementA = container_of(a,
-				struct kbasep_gwt_list_element, link);
-	struct kbasep_gwt_list_element *elementB = container_of(b,
-				struct kbasep_gwt_list_element, link);
+	const struct kbasep_gwt_list_element *elementA =
+		container_of(a, struct kbasep_gwt_list_element, link);
+	const struct kbasep_gwt_list_element *elementB =
+		container_of(b, struct kbasep_gwt_list_element, link);
 
 	CSTD_UNUSED(priv);
 
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index c658fb7..b07327a 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2012-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -232,6 +232,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+		    { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
 		    { U32_MAX, NULL } } },
 
 		{ GPU_ID2_PRODUCT_LTUX,
@@ -239,6 +240,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
+		    { GPU_ID2_VERSION_MAKE(1, 3, 0), base_hw_issues_tTUx_r1p3 },
 		    { U32_MAX, NULL } } },
 
 		{ GPU_ID2_PRODUCT_TTIX,
@@ -303,21 +305,20 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 			 */
 			issues = fallback_issues;
 
-			dev_warn(kbdev->dev,
-				"GPU hardware issue table may need updating:\n"
-				"r%dp%d status %d is unknown; treating as r%dp%d status %d",
-				(gpu_id & GPU_ID2_VERSION_MAJOR) >>
-					GPU_ID2_VERSION_MAJOR_SHIFT,
-				(gpu_id & GPU_ID2_VERSION_MINOR) >>
-					GPU_ID2_VERSION_MINOR_SHIFT,
-				(gpu_id & GPU_ID2_VERSION_STATUS) >>
-					GPU_ID2_VERSION_STATUS_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_MAJOR) >>
-					GPU_ID2_VERSION_MAJOR_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_MINOR) >>
-					GPU_ID2_VERSION_MINOR_SHIFT,
-				(fallback_version & GPU_ID2_VERSION_STATUS) >>
-					GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev, "r%dp%d status %d not found in HW issues table;\n",
+				   (gpu_id & GPU_ID2_VERSION_MAJOR) >> GPU_ID2_VERSION_MAJOR_SHIFT,
+				   (gpu_id & GPU_ID2_VERSION_MINOR) >> GPU_ID2_VERSION_MINOR_SHIFT,
+				   (gpu_id & GPU_ID2_VERSION_STATUS) >>
+					   GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev, "falling back to closest match: r%dp%d status %d\n",
+				   (fallback_version & GPU_ID2_VERSION_MAJOR) >>
+					   GPU_ID2_VERSION_MAJOR_SHIFT,
+				   (fallback_version & GPU_ID2_VERSION_MINOR) >>
+					   GPU_ID2_VERSION_MINOR_SHIFT,
+				   (fallback_version & GPU_ID2_VERSION_STATUS) >>
+					   GPU_ID2_VERSION_STATUS_SHIFT);
+			dev_notice(kbdev->dev,
+				   "Execution proceeding normally with fallback match\n");
 
 			gpu_id &= ~GPU_ID2_VERSION;
 			gpu_id |= fallback_version;
@@ -343,7 +344,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		issues = kbase_hw_get_issues_for_new_id(kbdev);
 		if (issues == NULL) {
 			dev_err(kbdev->dev,
-				"Unknown GPU ID %x", gpu_id);
+				"HW product - Unknown GPU ID %x", gpu_id);
 			return -EINVAL;
 		}
 
@@ -407,7 +408,7 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 			break;
 		default:
 			dev_err(kbdev->dev,
-				"Unknown GPU ID %x", gpu_id);
+				"HW issues - Unknown GPU ID %x", gpu_id);
 			return -EINVAL;
 		}
 	}
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index 27e2cb7..ac2a26d 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2018-2021, 2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,49 @@
 #ifndef _KBASE_BACKEND_TIME_H_
 #define _KBASE_BACKEND_TIME_H_
 
+#if MALI_USE_CSF
+/**
+ * struct kbase_backend_time - System timestamp attributes.
+ *
+ * @multiplier:		Numerator of the converter's fraction.
+ * @divisor:		Denominator of the converter's fraction.
+ * @offset:		Converter's offset term.
+ *
+ * According to Generic timer spec, system timer:
+ * - Increments at a fixed frequency
+ * - Starts operating from zero
+ *
+ * Hence CPU time is a linear function of System Time.
+ *
+ * CPU_ts = alpha * SYS_ts + beta
+ *
+ * Where
+ * - alpha = 10^9/SYS_ts_freq
+ * - beta is calculated by two timer samples taken at the same time:
+ *   beta = CPU_ts_s - SYS_ts_s * alpha
+ *
+ * Since alpha is a rational number, we minimizing possible
+ * rounding error by simplifying the ratio. Thus alpha is stored
+ * as a simple `multiplier / divisor` ratio.
+ *
+ */
+struct kbase_backend_time {
+	u64 multiplier;
+	u64 divisor;
+	s64 offset;
+};
+
+/**
+ * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
+ *
+ * @kbdev:	Kbase device pointer
+ * @gpu_ts:	System timestamp value to converter.
+ *
+ * Return: The CPU timestamp.
+ */
+u64 __maybe_unused kbase_backend_time_convert_gpu_to_cpu(struct kbase_device *kbdev, u64 gpu_ts);
+#endif
+
 /**
  * kbase_backend_get_gpu_time() - Get current GPU time
  * @kbdev:              Device pointer
@@ -46,9 +89,6 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 					  u64 *cycle_counter,
 					  u64 *system_time,
 					  struct timespec64 *ts);
-
-#endif /* _KBASE_BACKEND_TIME_H_ */
-
 /**
  * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
  *                        GPU frequency, using a choice from
@@ -70,3 +110,17 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
  * Return: Snapshot of the GPU cycle count register.
  */
 u64 kbase_backend_get_cycle_cnt(struct kbase_device *kbdev);
+
+/**
+ * kbase_backend_time_init() - Initialize system timestamp converter.
+ *
+ * @kbdev:	Kbase device pointer
+ *
+ * This function should only be called after GPU is powered-up and
+ * L2 cached power-up has been initiated.
+ *
+ * Return: Zero on success, error code otherwise.
+ */
+int kbase_backend_time_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index d623aca..6e803bd 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,7 @@
 
 #include "mali_kbase_jm.h"
 #include "mali_kbase_hwaccess_jm.h"
+#include <mali_kbase_hwaccess_time.h>
 #include <linux/priority_control_manager.h>
 
 /*
@@ -531,6 +532,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 	jsdd->gpu_reset_ticks_dumping = DEFAULT_JS_RESET_TICKS_DUMPING;
 	jsdd->ctx_timeslice_ns = DEFAULT_JS_CTX_TIMESLICE_NS;
 	atomic_set(&jsdd->soft_job_timeout_ms, DEFAULT_JS_SOFT_JOB_TIMEOUT);
+	jsdd->js_free_wait_time_ms = kbase_get_timeout_ms(kbdev, JM_DEFAULT_JS_FREE_TIMEOUT);
 
 	dev_dbg(kbdev->dev, "JS Config Attribs: ");
 	dev_dbg(kbdev->dev, "\tscheduling_period_ns:%u",
@@ -555,6 +557,7 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 			jsdd->ctx_timeslice_ns);
 	dev_dbg(kbdev->dev, "\tsoft_job_timeout:%i",
 		atomic_read(&jsdd->soft_job_timeout_ms));
+	dev_dbg(kbdev->dev, "\tjs_free_wait_time_ms:%u", jsdd->js_free_wait_time_ms);
 
 	if (!(jsdd->soft_stop_ticks < jsdd->hard_stop_ticks_ss &&
 			jsdd->hard_stop_ticks_ss < jsdd->gpu_reset_ticks_ss &&
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index b18b1e2..1c94e9c 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -44,6 +44,9 @@
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_trace_gpu_mem.h>
 
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
 #if MALI_JIT_PRESSURE_LIMIT_BASE
 
 /*
@@ -92,10 +95,8 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
 #error "Unknown CPU VA width for this architecture"
 #endif
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (kbase_ctx_compat_mode(kctx))
 		cpu_va_bits = 32;
-#endif
 
 	return cpu_va_bits;
 }
@@ -130,18 +131,14 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
 	else {
 		u64 same_va_end;
 
-#if IS_ENABLED(CONFIG_64BIT)
-		if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
+		if (kbase_ctx_compat_mode(kctx)) {
 			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
 		} else {
 			struct kbase_reg_zone *same_va_zone =
 				kbase_ctx_reg_zone_get(kctx,
 						       KBASE_REG_ZONE_SAME_VA);
 			same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
 		}
-#endif /* CONFIG_64BIT */
 
 		if (gpu_pfn >= same_va_end)
 			rbtree = &kctx->reg_rbtree_custom;
@@ -383,6 +380,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 	struct rb_node *rbnext;
 	struct kbase_va_region *next = NULL;
 	struct rb_root *reg_rbtree = NULL;
+	struct kbase_va_region *orig_reg = reg;
 
 	int merged_front = 0;
 	int merged_back = 0;
@@ -447,9 +445,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 		 */
 		struct kbase_va_region *free_reg;
 
-		free_reg = kbase_alloc_free_region(reg_rbtree,
-				reg->start_pfn, reg->nr_pages,
-				reg->flags & KBASE_REG_ZONE_MASK);
+		free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
+						   reg->flags & KBASE_REG_ZONE_MASK);
 		if (!free_reg) {
 			/* In case of failure, we cannot allocate a replacement
 			 * free region, so we will be left with a 'gap' in the
@@ -480,6 +477,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 		rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
 	}
 
+	/* This operation is always safe because the function never frees
+	 * the region. If the region has been merged to both front and back,
+	 * then it's the previous region that is supposed to be freed.
+	 */
+	orig_reg->start_pfn = 0;
+
 out:
 	return;
 }
@@ -490,6 +493,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
  * kbase_insert_va_region_nolock - Insert a VA region to the list,
  * replacing the existing one.
  *
+ * @kbdev: The kbase device
  * @new_reg: The new region to insert
  * @at_reg: The region to replace
  * @start_pfn: The Page Frame Number to insert at
@@ -497,8 +501,10 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
  *
  * Return: 0 on success, error code otherwise.
  */
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
-		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+					 struct kbase_va_region *new_reg,
+					 struct kbase_va_region *at_reg, u64 start_pfn,
+					 size_t nr_pages)
 {
 	struct rb_root *reg_rbtree = NULL;
 	int err = 0;
@@ -542,10 +548,9 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
 	else {
 		struct kbase_va_region *new_front_reg;
 
-		new_front_reg = kbase_alloc_free_region(reg_rbtree,
-				at_reg->start_pfn,
-				start_pfn - at_reg->start_pfn,
-				at_reg->flags & KBASE_REG_ZONE_MASK);
+		new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
+							start_pfn - at_reg->start_pfn,
+							at_reg->flags & KBASE_REG_ZONE_MASK);
 
 		if (new_front_reg) {
 			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -682,8 +687,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 			goto exit;
 		}
 
-		err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
-				nr_pages);
+		err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
 		if (err) {
 			dev_warn(dev, "Failed to insert va region");
 			err = -ENOMEM;
@@ -708,8 +712,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 				nr_pages, align_offset, align_mask,
 				&start_pfn);
 		if (tmp) {
-			err = kbase_insert_va_region_nolock(reg, tmp,
-							start_pfn, nr_pages);
+			err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
 			if (unlikely(err)) {
 				dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
 					start_pfn, nr_pages);
@@ -847,7 +850,7 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 		if (rbnode) {
 			rb_erase(rbnode, rbtree);
 			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
-			WARN_ON(reg->va_refcnt != 1);
+			WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
 			if (kbase_page_migration_enabled)
 				kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
 			/* Reset the start_pfn - as the rbtree is being
@@ -933,9 +936,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 #endif
 
 	/* all have SAME_VA */
-	same_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
-					same_va_pages, KBASE_REG_ZONE_SAME_VA);
+	same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
+					      same_va_pages, KBASE_REG_ZONE_SAME_VA);
 
 	if (!same_va_reg) {
 		err = -ENOMEM;
@@ -944,10 +946,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
 				same_va_pages);
 
-#if IS_ENABLED(CONFIG_64BIT)
-	/* 32-bit clients have custom VA zones */
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+	if (kbase_ctx_compat_mode(kctx)) {
 		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
 			err = -EINVAL;
 			goto fail_free_same_va;
@@ -959,10 +958,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
 			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
 
-		custom_va_reg = kbase_alloc_free_region(
-				&kctx->reg_rbtree_custom,
-				KBASE_REG_ZONE_CUSTOM_VA_BASE,
-				custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+		custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
+							KBASE_REG_ZONE_CUSTOM_VA_BASE,
+							custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
 
 		if (!custom_va_reg) {
 			err = -ENOMEM;
@@ -971,11 +969,9 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
 					KBASE_REG_ZONE_CUSTOM_VA_BASE,
 					custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
 	} else {
 		custom_va_size = 0;
 	}
-#endif
 
 #if MALI_USE_CSF
 	/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
@@ -986,17 +982,15 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	 */
 	fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (kbase_ctx_compat_mode(kctx)) {
 		exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
 		fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
 	}
-#endif
 
 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
 				KBASE_REG_ZONE_EXEC_VA_SIZE);
 
-	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
+	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
 					      KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
 
 	if (!exec_va_reg) {
@@ -1010,8 +1004,8 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 				KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
 
 	exec_fixed_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
-					KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+		kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
+					exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
 					KBASE_REG_ZONE_EXEC_FIXED_VA);
 
 	if (!exec_fixed_va_reg) {
@@ -1024,7 +1018,7 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 
 	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
 
-	fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
+	fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
 					       fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
 
 	kctx->gpu_va_end = fixed_va_end;
@@ -1163,7 +1157,6 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
 	return false;
 }
 
-#if IS_ENABLED(CONFIG_64BIT)
 static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 		u64 jit_va_pages)
 {
@@ -1212,9 +1205,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	 * Create a custom VA zone at the end of the VA for allocations which
 	 * JIT can use so it doesn't have to allocate VA from the kernel.
 	 */
-	custom_va_reg =
-		kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
-					jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+	custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
+						jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
 
 	/*
 	 * The context will be destroyed if we fail here so no point
@@ -1231,7 +1223,6 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	kbase_region_tracker_insert(custom_va_reg);
 	return 0;
 }
-#endif
 
 int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 		int max_allocations, int trim_level, int group_id,
@@ -1272,10 +1263,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 		goto exit_unlock;
 	}
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+	if (!kbase_ctx_compat_mode(kctx))
 		err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
 	/*
 	 * Nothing to do for 32-bit clients, JIT uses the existing
 	 * custom VA zone.
@@ -1351,17 +1340,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 		goto exit_unlock;
 	}
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+	if (kbase_ctx_compat_mode(kctx)) {
 		/* 32-bit client: take from CUSTOM_VA zone */
 		target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
 	} else {
 		/* 64-bit client: take from SAME_VA zone */
 		target_zone_bits = KBASE_REG_ZONE_SAME_VA;
 	}
-#endif
+
 	target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
 	target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
 
@@ -1389,10 +1375,8 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 	/* Taken from the end of the target zone */
 	exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
 
-	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
-			exec_va_start,
-			exec_va_pages,
-			KBASE_REG_ZONE_EXEC_VA);
+	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
+					      exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
 	if (!exec_va_reg) {
 		err = -ENOMEM;
 		goto exit_unlock;
@@ -1435,10 +1419,9 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
 
 	kbdev->csf.shared_reg_rbtree = RB_ROOT;
 
-	shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
-					shared_reg_start_pfn,
-					shared_reg_size,
-					KBASE_REG_ZONE_MCU_SHARED);
+	shared_reg =
+		kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
+					shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
 	if (!shared_reg)
 		return -ENOMEM;
 
@@ -1447,10 +1430,30 @@ int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
 }
 #endif
 
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+	kbdev->pagesize_2mb = true;
+	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+		dev_warn(
+			kbdev->dev,
+			"2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+	}
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+	kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+	/* Set it to the default based on which GPU is present */
+	kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
 int kbase_mem_init(struct kbase_device *kbdev)
 {
 	int err = 0;
 	struct kbasep_mem_device *memdev;
+	char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
 #if IS_ENABLED(CONFIG_OF)
 	struct device_node *mgm_node = NULL;
 #endif
@@ -1459,6 +1462,19 @@ int kbase_mem_init(struct kbase_device *kbdev)
 
 	memdev = &kbdev->memdev;
 
+	kbasep_mem_page_size_init(kbdev);
+
+	scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+		  kbdev->devname);
+
+	/* Initialize slab cache for kbase_va_regions */
+	kbdev->va_region_slab =
+		kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+	if (kbdev->va_region_slab == NULL) {
+		dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+		return -ENOMEM;
+	}
+
 	kbase_mem_migrate_init(kbdev);
 	kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
 		KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1550,6 +1566,9 @@ void kbase_mem_term(struct kbase_device *kbdev)
 
 	kbase_mem_migrate_term(kbdev);
 
+	kmem_cache_destroy(kbdev->va_region_slab);
+	kbdev->va_region_slab = NULL;
+
 	WARN_ON(kbdev->total_gpu_pages);
 	WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
 	WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1563,6 +1582,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
 /**
  * kbase_alloc_free_region - Allocate a free region object.
  *
+ * @kbdev:     kbase device
  * @rbtree:    Backlink to the red-black tree of memory regions.
  * @start_pfn: The Page Frame Number in GPU virtual address space.
  * @nr_pages:  The size of the region in pages.
@@ -1575,8 +1595,8 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
  *
  * Return: pointer to the allocated region object on success, NULL otherwise.
  */
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
-		u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+						u64 start_pfn, size_t nr_pages, int zone)
 {
 	struct kbase_va_region *new_reg;
 
@@ -1588,13 +1608,13 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
 
-	new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+	new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
 
 	if (!new_reg)
 		return NULL;
 
-	new_reg->va_refcnt = 1;
-	new_reg->no_user_free_refcnt = 0;
+	kbase_refcount_set(&new_reg->va_refcnt, 1);
+	atomic_set(&new_reg->no_user_free_count, 0);
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->rbtree = rbtree;
@@ -1726,7 +1746,6 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	unsigned long gwt_mask = ~0;
 	int group_id;
 	struct kbase_mem_phy_alloc *alloc;
-	bool ignore_page_migration = false;
 
 #ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
@@ -1755,41 +1774,46 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
-				err = kbase_mmu_insert_pages(
+				err = kbase_mmu_insert_aliased_pages(
 					kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
 					alloc->imported.alias.aliased[i].alloc->pages +
 						alloc->imported.alias.aliased[i].offset,
 					alloc->imported.alias.aliased[i].length,
 					reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
-					NULL, ignore_page_migration);
+					NULL);
 				if (err)
-					goto bad_insert;
+					goto bad_aliased_insert;
 
 				/* Note: mapping count is tracked at alias
 				 * creation time
 				 */
 			} else {
-				err = kbase_mmu_insert_single_page(
-					kctx, reg->start_pfn + i * stride,
-					kctx->aliasing_sink_page,
+				err = kbase_mmu_insert_single_aliased_page(
+					kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
 					alloc->imported.alias.aliased[i].length,
-					(reg->flags & mask & gwt_mask) | attr,
-					group_id, mmu_sync_info);
+					(reg->flags & mask & gwt_mask) | attr, group_id,
+					mmu_sync_info);
 
 				if (err)
-					goto bad_insert;
+					goto bad_aliased_insert;
 			}
 		}
 	} else {
 		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
-		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
-			ignore_page_migration = true;
-
-		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					     kbase_get_gpu_phy_pages(reg),
-					     kbase_reg_current_backed_size(reg),
-					     reg->flags & gwt_mask, kctx->as_nr, group_id,
-					     mmu_sync_info, reg, ignore_page_migration);
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+
+			err = kbase_mmu_insert_imported_pages(
+				kctx->kbdev, &kctx->mmu, reg->start_pfn,
+				kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+				reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+		} else {
+			err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						     kbase_get_gpu_phy_pages(reg),
+						     kbase_reg_current_backed_size(reg),
+						     reg->flags & gwt_mask, kctx->as_nr, group_id,
+						     mmu_sync_info, reg, true);
+		}
+
 		if (err)
 			goto bad_insert;
 		kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1799,9 +1823,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	    !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
 	    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
 	    reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
-		/* For padded imported dma-buf memory, map the dummy aliasing
-		 * page from the end of the dma-buf pages, to the end of the
-		 * region using a read only mapping.
+		/* For padded imported dma-buf or user-buf memory, map the dummy
+		 * aliasing page from the end of the imported pages, to the end of
+		 * the region using a read only mapping.
 		 *
 		 * Only map when it's imported dma-buf memory that is currently
 		 * mapped.
@@ -1809,22 +1833,32 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		 * Assume reg->gpu_alloc->nents is the number of actual pages
 		 * in the dma-buf memory.
 		 */
-		err = kbase_mmu_insert_single_page(
-			kctx, reg->start_pfn + reg->gpu_alloc->nents,
-			kctx->aliasing_sink_page,
+		err = kbase_mmu_insert_single_imported_page(
+			kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
 			reg->nr_pages - reg->gpu_alloc->nents,
-			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
-			KBASE_MEM_GROUP_SINK, mmu_sync_info);
+			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+			mmu_sync_info);
 		if (err)
 			goto bad_insert;
 	}
 
 	return err;
 
-bad_insert:
-	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 reg->nr_pages, kctx->as_nr, ignore_page_migration);
+bad_aliased_insert:
+	while (i-- > 0) {
+		struct tagged_addr *phys_alloc = NULL;
+		u64 const stride = alloc->imported.alias.stride;
+
+		if (alloc->imported.alias.aliased[i].alloc != NULL)
+			phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+				     alloc->imported.alias.aliased[i].offset;
 
+		kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+					 phys_alloc, alloc->imported.alias.aliased[i].length,
+					 alloc->imported.alias.aliased[i].length, kctx->as_nr,
+					 false);
+	}
+bad_insert:
 	kbase_remove_va_region(kctx->kbdev, reg);
 
 	return err;
@@ -1870,26 +1904,49 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 					kctx->kbdev, &kctx->mmu,
 					reg->start_pfn + (i * alloc->imported.alias.stride),
 					phys_alloc, alloc->imported.alias.aliased[i].length,
-					kctx->as_nr, false);
+					alloc->imported.alias.aliased[i].length, kctx->as_nr,
+					false);
 
 				if (WARN_ON_ONCE(err_loop))
 					err = err_loop;
 			}
 		}
 		break;
-	case KBASE_MEM_TYPE_IMPORTED_UMM:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+	case KBASE_MEM_TYPE_IMPORTED_UMM: {
+			size_t nr_phys_pages = reg->nr_pages;
+			size_t nr_virt_pages = reg->nr_pages;
+			/* If the region has import padding and falls under the threshold for
+			 * issuing a partial GPU cache flush, we want to reduce the number of
+			 * physical pages that get flushed.
+
+			 * This is symmetric with case of mapping the memory, which first maps
+			 * each imported physical page to a separate virtual page, and then
+			 * maps the single aliasing sink page to each of the virtual padding
+			 * pages.
+			 */
+			if (reg->flags & KBASE_REG_IMPORT_PAD)
+				nr_phys_pages = alloc->nents + 1;
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_phys_pages, nr_virt_pages,
+						       kctx->as_nr, true);
+		}
 		break;
-	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr, true);
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+			size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_reg_pages, nr_reg_pages,
+						       kctx->as_nr, true);
+		}
 		break;
-	default:
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr, false);
+	default: {
+			size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+						       alloc->pages, nr_reg_pages, nr_reg_pages,
+						       kctx->as_nr, false);
+		}
 		break;
 	}
 
@@ -2214,7 +2271,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 		__func__, (void *)reg, (void *)kctx);
 	lockdep_assert_held(&kctx->reg_lock);
 
-	if (kbase_va_region_is_no_user_free(kctx, reg)) {
+	if (kbase_va_region_is_no_user_free(reg)) {
 		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
 		return -EINVAL;
 	}
@@ -2435,7 +2492,7 @@ int kbase_update_region_flags(struct kbase_context *kctx,
 
 	if (flags & BASEP_MEM_NO_USER_FREE) {
 		kbase_gpu_vm_lock(kctx);
-		kbase_va_region_no_user_free_get(kctx, reg);
+		kbase_va_region_no_user_free_inc(reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 
@@ -2489,15 +2546,14 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 
 	tp = alloc->pages + alloc->nents;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
 	/* Check if we have enough pages requested so we can allocate a large
 	 * page (512 * 4KB = 2MB )
 	 */
-	if (nr_left >= (SZ_2M / SZ_4K)) {
+	if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
 		int nr_lp = nr_left / (SZ_2M / SZ_4K);
 
 		res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
-						 nr_lp * (SZ_2M / SZ_4K), tp, true);
+						 nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
 
 		if (res > 0) {
 			nr_left -= res;
@@ -2551,7 +2607,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 
 				err = kbase_mem_pool_grow(
 					&kctx->mem_pools.large[alloc->group_id],
-					1);
+					1, kctx->task);
 				if (err)
 					break;
 			} while (1);
@@ -2592,12 +2648,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 			}
 		}
 	}
-no_new_partial:
-#endif
 
+no_new_partial:
 	if (nr_left) {
 		res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
-						 tp, false);
+						 tp, false, kctx->task);
 		if (res <= 0)
 			goto alloc_failed;
 	}
@@ -2656,18 +2711,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 
 	lockdep_assert_held(&pool->pool_lock);
 
-#if !defined(CONFIG_MALI_2MB_ALLOC)
-	WARN_ON(pool->order);
-#endif
+	kctx = alloc->imported.native.kctx;
+	kbdev = kctx->kbdev;
+
+	if (!kbdev->pagesize_2mb)
+		WARN_ON(pool->order);
 
 	if (alloc->reg) {
 		if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
 			goto invalid_request;
 	}
 
-	kctx = alloc->imported.native.kctx;
-	kbdev = kctx->kbdev;
-
 	lockdep_assert_held(&kctx->mem_partials_lock);
 
 	if (nr_pages_requested == 0)
@@ -2686,8 +2740,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 	tp = alloc->pages + alloc->nents;
 	new_pages = tp;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-	if (pool->order) {
+	if (kbdev->pagesize_2mb && pool->order) {
 		int nr_lp = nr_left / (SZ_2M / SZ_4K);
 
 		res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2771,15 +2824,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 		if (nr_left)
 			goto alloc_failed;
 	} else {
-#endif
 		res = kbase_mem_pool_alloc_pages_locked(pool,
 						 nr_left,
 						 tp);
 		if (res <= 0)
 			goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
 	}
-#endif
 
 	KBASE_TLSTREAM_AUX_PAGESALLOC(
 			kbdev,
@@ -2800,8 +2850,7 @@ alloc_failed:
 
 		struct tagged_addr *start_free = alloc->pages + alloc->nents;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-		if (pool->order) {
+		if (kbdev->pagesize_2mb && pool->order) {
 			while (nr_pages_to_free) {
 				if (is_huge_head(*start_free)) {
 					kbase_mem_pool_free_pages_locked(
@@ -2819,15 +2868,12 @@ alloc_failed:
 				}
 			}
 		} else {
-#endif
 			kbase_mem_pool_free_pages_locked(pool,
 					nr_pages_to_free,
 					start_free,
 					false, /* not dirty */
 					true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
 		}
-#endif
 	}
 
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
@@ -3816,8 +3862,8 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(reg->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, reg);
+		WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(reg);
 		kbase_mem_free_region(kctx, reg);
 		kbase_gpu_vm_unlock(kctx);
 	} while (1);
@@ -4078,18 +4124,14 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 	delta = info->commit_pages - reg->gpu_alloc->nents;
 	pages_required = delta;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-	if (pages_required >= (SZ_2M / SZ_4K)) {
+	if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
 		pool = &kctx->mem_pools.large[kctx->jit_group_id];
 		/* Round up to number of 2 MB pages required */
 		pages_required += ((SZ_2M / SZ_4K) - 1);
 		pages_required /= (SZ_2M / SZ_4K);
 	} else {
-#endif
 		pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
 	}
-#endif
 
 	if (reg->cpu_alloc != reg->gpu_alloc)
 		pages_required *= 2;
@@ -4110,7 +4152,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 		spin_unlock(&kctx->mem_partials_lock);
 
 		kbase_gpu_vm_unlock(kctx);
-		ret = kbase_mem_pool_grow(pool, pool_delta);
+		ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
 		kbase_gpu_vm_lock(kctx);
 
 		if (ret)
@@ -4374,14 +4416,14 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
 		return NULL;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-	/* Preallocate memory for the sub-allocation structs */
-	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
-		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
-		if (!prealloc_sas[i])
-			goto end;
+	if (kctx->kbdev->pagesize_2mb) {
+		/* Preallocate memory for the sub-allocation structs */
+		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+			prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+			if (!prealloc_sas[i])
+				goto end;
+		}
 	}
-#endif
 
 	kbase_gpu_vm_lock(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
@@ -4561,7 +4603,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 
 	/* Similarly to tiler heap init, there is a short window of time
 	 * where the (either recycled or newly allocated, in our case) region has
-	 * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+	 * "no user free" count incremented but is still missing the DONT_NEED flag, and
 	 * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
 	 * allocation is the least bad option that doesn't lead to a security issue down the
 	 * line (it will eventually be cleaned up during context termination).
@@ -4570,9 +4612,9 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	 * flags.
 	 */
 	kbase_gpu_vm_lock(kctx);
-	if (unlikely(reg->no_user_free_refcnt > 1)) {
+	if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
 		kbase_gpu_vm_unlock(kctx);
-		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
 
 		mutex_lock(&kctx->jit_evict_lock);
 		list_move(&reg->jit_node, &kctx->jit_pool_head);
@@ -4728,8 +4770,8 @@ bool kbase_jit_evict(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(reg->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, reg);
+		WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(reg);
 		kbase_mem_free_region(kctx, reg);
 	}
 
@@ -4757,8 +4799,8 @@ void kbase_jit_term(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(walker->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, walker);
+		WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4776,8 +4818,8 @@ void kbase_jit_term(struct kbase_context *kctx)
 		 * by implementing "free on putting the last reference",
 		 * but only for JIT regions.
 		 */
-		WARN_ON(walker->no_user_free_refcnt > 1);
-		kbase_va_region_no_user_free_put(kctx, walker);
+		WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+		kbase_va_region_no_user_free_dec(walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -5023,9 +5065,13 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	 * region, otherwise the initial content of memory would be wrong.
 	 */
 	for (i = 0; i < pinned_pages; i++) {
-		dma_addr_t dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
-							 DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+		dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+		dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+					      DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 		err = dma_mapping_error(dev, dma_addr);
 		if (err)
 			goto unwind;
@@ -5041,9 +5087,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
-				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
-				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+					      kbase_reg_current_backed_size(reg),
+					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+					      mmu_sync_info, NULL);
 	if (err == 0)
 		return 0;
 
@@ -5064,8 +5111,12 @@ unwind:
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
 		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
 				     DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 	}
 
 	/* The user buffer could already have been previously pinned before
@@ -5182,9 +5233,13 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		}
 
 		/* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
 				     PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
-
+#endif
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
@@ -5308,6 +5363,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 				kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 							 alloc->pages,
 							 kbase_reg_current_backed_size(reg),
+							 kbase_reg_current_backed_size(reg),
 							 kctx->as_nr, true);
 			}
 
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 83872a1..02e5509 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -38,6 +38,7 @@
 /* Required for kbase_mem_evictable_unmake */
 #include "mali_kbase_mem_linux.h"
 #include "mali_kbase_mem_migrate.h"
+#include "mali_kbase_refcount_defs.h"
 
 static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
 		int pages);
@@ -419,8 +420,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @jit_usage_id: The last just-in-time memory usage ID for this region.
  * @jit_bin_id:   The just-in-time memory bin this region came from.
  * @va_refcnt:    Number of users of this region. Protected by reg_lock.
- * @no_user_free_refcnt:    Number of users that want to prevent the region from
- *                          being freed by userspace.
+ * @no_user_free_count:    Number of contexts that want to prevent the region
+ *                         from being freed by userspace.
  * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
  *                      an allocated region
  *                      The object can be one of:
@@ -681,8 +682,8 @@ struct kbase_va_region {
 	size_t used_pages;
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-	int    va_refcnt;
-	int no_user_free_refcnt;
+	kbase_refcount_t va_refcnt;
+	atomic_t no_user_free_count;
 };
 
 /**
@@ -759,15 +760,12 @@ static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
 static inline struct kbase_va_region *kbase_va_region_alloc_get(
 		struct kbase_context *kctx, struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-
-	WARN_ON(!region->va_refcnt);
-	WARN_ON(region->va_refcnt == INT_MAX);
+	WARN_ON(!kbase_refcount_read(&region->va_refcnt));
+	WARN_ON(kbase_refcount_read(&region->va_refcnt) == INT_MAX);
 
-	/* non-atomic as kctx->reg_lock is held */
 	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
-		region->va_refcnt, (void *)region);
-	region->va_refcnt++;
+		kbase_refcount_read(&region->va_refcnt), (void *)region);
+	kbase_refcount_inc(&region->va_refcnt);
 
 	return region;
 }
@@ -775,17 +773,14 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
 static inline struct kbase_va_region *kbase_va_region_alloc_put(
 		struct kbase_context *kctx, struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-
-	WARN_ON(region->va_refcnt <= 0);
+	WARN_ON(kbase_refcount_read(&region->va_refcnt) <= 0);
 	WARN_ON(region->flags & KBASE_REG_FREE);
 
-	/* non-atomic as kctx->reg_lock is held */
-	region->va_refcnt--;
-	dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
-		region->va_refcnt, (void *)region);
-	if (!region->va_refcnt)
+	if (kbase_refcount_dec_and_test(&region->va_refcnt))
 		kbase_region_refcnt_free(kctx->kbdev, region);
+	else
+		dev_dbg(kctx->kbdev->dev, "va_refcnt %d after put %pK\n",
+			kbase_refcount_read(&region->va_refcnt), (void *)region);
 
 	return NULL;
 }
@@ -799,58 +794,44 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
  * Hence, callers cannot rely on this check alone to determine if a region might be shrunk
  * by any part of kbase. Instead they should use kbase_is_region_shrinkable().
  *
- * @kctx: Pointer to kbase context.
  * @region: Pointer to region.
  *
  * Return: true if userspace cannot free the region, false if userspace can free the region.
  */
-static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
-						   struct kbase_va_region *region)
+static inline bool kbase_va_region_is_no_user_free(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-	return region->no_user_free_refcnt > 0;
+	return atomic_read(&region->no_user_free_count) > 0;
 }
 
 /**
- * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_inc - Increment "no user free" count for a region.
  * Calling this function will prevent the region to be shrunk by parts of kbase that
- * don't own the region (as long as the refcount stays above zero). Refer to
+ * don't own the region (as long as the count stays above zero). Refer to
  * kbase_va_region_is_no_user_free() for more information.
  *
- * @kctx: Pointer to kbase context.
  * @region: Pointer to region (not shrinkable).
  *
  * Return: the pointer to the region passed as argument.
  */
-static inline struct kbase_va_region *
-kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_inc(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-
 	WARN_ON(kbase_is_region_shrinkable(region));
-	WARN_ON(region->no_user_free_refcnt == INT_MAX);
+	WARN_ON(atomic_read(&region->no_user_free_count) == INT_MAX);
 
 	/* non-atomic as kctx->reg_lock is held */
-	region->no_user_free_refcnt++;
-
-	return region;
+	atomic_inc(&region->no_user_free_count);
 }
 
 /**
- * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ * kbase_va_region_no_user_free_dec - Decrement "no user free" count for a region.
  *
- * @kctx: Pointer to kbase context.
  * @region: Pointer to region (not shrinkable).
  */
-static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
-						    struct kbase_va_region *region)
+static inline void kbase_va_region_no_user_free_dec(struct kbase_va_region *region)
 {
-	lockdep_assert_held(&kctx->reg_lock);
+	WARN_ON(!kbase_va_region_is_no_user_free(region));
 
-	WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
-
-	/* non-atomic as kctx->reg_lock is held */
-	region->no_user_free_refcnt--;
+	atomic_dec(&region->no_user_free_count);
 }
 
 /* Common functions */
@@ -1148,6 +1129,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * @pages:    Pointer to array where the physical address of the allocated
  *            pages will be stored.
  * @partial_allowed: If fewer pages allocated is allowed
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the pages are being allocated. It can be NULL if the pages
+ *              won't be associated with any Kbase context.
  *
  * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
  *
@@ -1164,7 +1148,8 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-			       struct tagged_addr *pages, bool partial_allowed);
+			       struct tagged_addr *pages, bool partial_allowed,
+			       struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1276,13 +1261,17 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
  * kbase_mem_pool_grow - Grow the pool
  * @pool:       Memory pool to grow
  * @nr_to_grow: Number of pages to add to the pool
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the memory pool is being grown. It can be NULL if the pages
+ *              to be allocated won't be associated with any Kbase context.
  *
  * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
  * become larger than the maximum size specified.
  *
  * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
  */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+			struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -1398,8 +1387,8 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
 struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
 		u64 gpu_addr);
 
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
-		u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
+						u64 start_pfn, size_t nr_pages, int zone);
 void kbase_free_alloced_region(struct kbase_va_region *reg);
 int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
 		u64 addr, size_t nr_pages, size_t align);
@@ -1410,6 +1399,32 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 bool kbase_check_alloc_flags(unsigned long flags);
 bool kbase_check_import_flags(unsigned long flags);
 
+static inline bool kbase_import_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_dbg(
+			kbdev->dev,
+			"Import attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+			(unsigned long long)va_pages);
+		return false;
+	}
+
+	return true;
+}
+
+static inline bool kbase_alias_size_is_valid(struct kbase_device *kbdev, u64 va_pages)
+{
+	if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
+		dev_dbg(
+			kbdev->dev,
+			"Alias attempted with va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
+			(unsigned long long)va_pages);
+		return false;
+	}
+
+	return true;
+}
+
 /**
  * kbase_check_alloc_sizes - check user space sizes parameters for an
  *                           allocation
@@ -1737,7 +1752,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
  *
  * @prealloc_sa:        Information about the partial allocation if the amount of memory requested
  *                      is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
- *                      allocated by the caller iff CONFIG_MALI_2MB_ALLOC is enabled.
+ *                      allocated by the caller if kbdev->pagesize_2mb is enabled.
  *
  * Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
  * pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
@@ -1765,7 +1780,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
  * This ensures that the pool can be grown to the required size and that the allocation can
  * complete without another thread using the newly grown pages.
  *
- * If CONFIG_MALI_2MB_ALLOC is defined and the allocation is >= 2MB, then @pool must be one of the
+ * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
  * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
  * mempools from alloc->imported.native.kctx->mem_pools.small[].
  *
@@ -2494,8 +2509,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
  * @kctx: Pointer to kbase context
  *
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
  * from the forked child process using the mali device file fd inherited from
  * the parent process.
  *
@@ -2503,13 +2517,23 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  */
 static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
 {
-	bool allow_alloc = true;
-
-	rcu_read_lock();
-	allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
-	rcu_read_unlock();
+	return (kctx->process_mm == current->mm);
+}
 
-	return allow_alloc;
+/**
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+	/* This merely takes a reference on the memory descriptor structure
+	 * i.e. mm_struct of current process and not on its address space and
+	 * so won't block the freeing of address space on process exit.
+	 */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+	atomic_inc(&current->mm->mm_count);
+#else
+	mmgrab(current->mm);
+#endif
 }
 
 /**
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index e577452..7d30790 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -37,7 +37,7 @@
 #include <linux/memory_group_manager.h>
 #include <linux/math64.h>
 #include <linux/migrate.h>
-
+#include <linux/version.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -385,8 +385,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 		zone = KBASE_REG_ZONE_CUSTOM_VA;
 	}
 
-	reg = kbase_alloc_free_region(rbtree, PFN_DOWN(*gpu_va),
-			va_pages, zone);
+	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
 
 	if (!reg) {
 		dev_err(dev, "Failed to allocate free region");
@@ -481,22 +480,22 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 	} else /* we control the VA */ {
 		size_t align = 1;
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-		/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
-		 * boundaries. The similar condition is used for mapping from
-		 * the SAME_VA zone inside kbase_context_get_unmapped_area().
-		 */
-		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
-			if (va_pages >= (SZ_2M / SZ_4K))
-				align = (SZ_2M / SZ_4K);
-		}
-		if (*gpu_va)
-			align = 1;
+		if (kctx->kbdev->pagesize_2mb) {
+			/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
+			* boundaries. The similar condition is used for mapping from
+			* the SAME_VA zone inside kbase_context_get_unmapped_area().
+			*/
+			if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
+				if (va_pages >= (SZ_2M / SZ_4K))
+					align = (SZ_2M / SZ_4K);
+			}
+			if (*gpu_va)
+				align = 1;
 #if !MALI_USE_CSF
-		if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
-			align = 1;
+			if (reg->flags & KBASE_REG_TILER_ALIGN_TOP)
+				align = 1;
 #endif /* !MALI_USE_CSF */
-#endif /* CONFIG_MALI_2MB_ALLOC */
+		}
 		if (kbase_gpu_mmap(kctx, reg, *gpu_va, va_pages, align,
 				   mmu_sync_info) != 0) {
 			dev_warn(dev, "Failed to map memory on GPU");
@@ -999,7 +998,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	 * & GPU queue ringbuffer and none of them needs to be explicitly marked
 	 * as evictable by Userspace.
 	 */
-	if (kbase_va_region_is_no_user_free(kctx, reg))
+	if (kbase_va_region_is_no_user_free(reg))
 		goto out_unlock;
 
 	/* Is the region being transitioning between not needed and needed? */
@@ -1319,10 +1318,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-				     kbase_get_gpu_phy_pages(reg),
-				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
-				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
+	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+					      kbase_get_gpu_phy_pages(reg),
+					      kbase_reg_current_backed_size(reg),
+					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
+					      mmu_sync_info, NULL);
 	if (err)
 		goto bad_insert;
 
@@ -1335,11 +1335,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 		 * Assume alloc->nents is the number of actual pages in the
 		 * dma-buf memory.
 		 */
-		err = kbase_mmu_insert_single_page(
-			kctx, reg->start_pfn + alloc->nents,
-			kctx->aliasing_sink_page, reg->nr_pages - alloc->nents,
-			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
-			KBASE_MEM_GROUP_SINK, mmu_sync_info);
+		err = kbase_mmu_insert_single_imported_page(
+			kctx, reg->start_pfn + alloc->nents, kctx->aliasing_sink_page,
+			reg->nr_pages - alloc->nents,
+			(reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+			mmu_sync_info);
 		if (err)
 			goto bad_pad_insert;
 	}
@@ -1348,7 +1348,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 
 bad_pad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 alloc->nents, kctx->as_nr, true);
+				 alloc->nents, alloc->nents, kctx->as_nr, true);
 bad_insert:
 	kbase_mem_umm_unmap_attachment(kctx, alloc);
 bad_map_attachment:
@@ -1377,7 +1377,8 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 		int err;
 
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+					       alloc->pages, reg->nr_pages, reg->nr_pages,
+					       kctx->as_nr, true);
 		WARN_ON(err);
 	}
 
@@ -1449,6 +1450,9 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 		return NULL;
 	}
 
+	if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+		return NULL;
+
 	/* ignore SAME_VA */
 	*flags &= ~BASE_MEM_SAME_VA;
 
@@ -1469,23 +1473,21 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
 		need_sync = true;
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
 		 */
 		shared_zone = true;
 	}
-#endif
 
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same,
-				0, *va_pages, KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
+					      KBASE_REG_ZONE_SAME_VA);
 	} else {
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
-				0, *va_pages, KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
 	if (!reg) {
@@ -1618,21 +1620,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		/* 64-bit address range is the max */
 		goto bad_size;
 
+	if (!kbase_import_size_is_valid(kctx->kbdev, *va_pages))
+		goto bad_size;
+
 	/* SAME_VA generally not supported with imported memory (no known use cases) */
 	*flags &= ~BASE_MEM_SAME_VA;
 
 	if (*flags & BASE_MEM_IMPORT_SHARED)
 		shared_zone = true;
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/*
 		 * 64-bit tasks require us to reserve VA on the CPU that we use
 		 * on the GPU.
 		 */
 		shared_zone = true;
 	}
-#endif
 
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
@@ -1641,7 +1644,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	} else
 		rbtree = &kctx->reg_rbtree_custom;
 
-	reg = kbase_alloc_free_region(rbtree, 0, *va_pages, zone);
+	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
 
 	if (!reg)
 		goto no_region;
@@ -1667,11 +1670,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	user_buf->address = address;
 	user_buf->nr_pages = *va_pages;
 	user_buf->mm = current->mm;
-#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
-	atomic_inc(&current->mm->mm_count);
-#else
-	mmgrab(current->mm);
-#endif
+	kbase_mem_mmgrab();
 	if (reg->gpu_alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
 		user_buf->pages = vmalloc(*va_pages * sizeof(struct page *));
 	else
@@ -1746,10 +1745,13 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		 * region, otherwise the initial content of memory would be wrong.
 		 */
 		for (i = 0; i < faulted_pages; i++) {
-			dma_addr_t dma_addr =
-				dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
-						   DMA_ATTR_SKIP_CPU_SYNC);
-
+			dma_addr_t dma_addr;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+			dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
+			dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+						      DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
 
@@ -1776,8 +1778,12 @@ unwind_dma_map:
 		dma_addr_t dma_addr = user_buf->dma_addrs[i];
 
 		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+#else
 		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
 				     DMA_ATTR_SKIP_CPU_SYNC);
+#endif
 	}
 fault_mismatch:
 	if (pages) {
@@ -1853,22 +1859,19 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 	/* calculate the number of pages this alias will cover */
 	*num_pages = nents * stride;
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_alias_size_is_valid(kctx->kbdev, *num_pages))
+		goto bad_size;
+
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
 		 * clients
 		 */
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0,
-				*num_pages,
-				KBASE_REG_ZONE_SAME_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
+					      KBASE_REG_ZONE_SAME_VA);
 	} else {
-#else
-	if (1) {
-#endif
-		reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom,
-				0, *num_pages,
-				KBASE_REG_ZONE_CUSTOM_VA);
+		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
+					      KBASE_REG_ZONE_CUSTOM_VA);
 	}
 
 	if (!reg)
@@ -1919,7 +1922,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 				goto bad_handle; /* Not found/already free */
 			if (kbase_is_region_shrinkable(aliasing_reg))
 				goto bad_handle; /* Ephemeral region */
-			if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
+			if (kbase_va_region_is_no_user_free(aliasing_reg))
 				goto bad_handle; /* JIT regions can't be
 						  * aliased. NO_USER_FREE flag
 						  * covers the entire lifetime
@@ -1974,8 +1977,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 		}
 	}
 
-#if IS_ENABLED(CONFIG_64BIT)
-	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+	if (!kbase_ctx_compat_mode(kctx)) {
 		/* Bind to a cookie */
 		if (bitmap_empty(kctx->cookies, BITS_PER_LONG)) {
 			dev_err(kctx->kbdev->dev, "No cookies available for allocation!");
@@ -1990,10 +1992,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 		/* relocate to correct base */
 		gpu_va += PFN_DOWN(BASE_MEM_COOKIE_BASE);
 		gpu_va <<= PAGE_SHIFT;
-	} else /* we control the VA */ {
-#else
-	if (1) {
-#endif
+	} else {
+		/* we control the VA */
 		if (kbase_gpu_mmap(kctx, reg, 0, *num_pages, 1,
 				   mmu_sync_info) != 0) {
 			dev_warn(kctx->kbdev->dev, "Failed to map memory on GPU");
@@ -2010,9 +2010,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 
 	return gpu_va;
 
-#if IS_ENABLED(CONFIG_64BIT)
 no_cookie:
-#endif
 no_mmap:
 bad_handle:
 	/* Marking the source allocs as not being mapped on the GPU and putting
@@ -2227,7 +2225,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 	int ret = 0;
 
 	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
-				       alloc->pages + new_pages, delta, kctx->as_nr, false);
+				       alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
 
 	return ret;
 }
@@ -2295,7 +2293,7 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (kbase_is_region_shrinkable(reg))
 		goto out_unlock;
 
-	if (kbase_va_region_is_no_user_free(kctx, reg))
+	if (kbase_va_region_is_no_user_free(reg))
 		goto out_unlock;
 
 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2398,18 +2396,19 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
 		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 		if (reg->cpu_alloc != reg->gpu_alloc)
 			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-#ifdef CONFIG_MALI_2MB_ALLOC
-		if (kbase_reg_current_backed_size(reg) > new_pages) {
-			old_pages = new_pages;
-			new_pages = kbase_reg_current_backed_size(reg);
-
-			/* Update GPU mapping. */
-			err = kbase_mem_grow_gpu_mapping(kctx, reg,
-					new_pages, old_pages, CALLER_MMU_ASYNC);
+
+		if (kctx->kbdev->pagesize_2mb) {
+			if (kbase_reg_current_backed_size(reg) > new_pages) {
+				old_pages = new_pages;
+				new_pages = kbase_reg_current_backed_size(reg);
+
+				/* Update GPU mapping. */
+				err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
+								 CALLER_MMU_ASYNC);
+			}
+		} else {
+			WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
 		}
-#else
-		WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
-#endif
 	}
 
 	return err;
@@ -2707,8 +2706,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 		goto out;
 	}
 
-	new_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 0, nr_pages,
-			KBASE_REG_ZONE_SAME_VA);
+	new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
+					  KBASE_REG_ZONE_SAME_VA);
 	if (!new_reg) {
 		err = -ENOMEM;
 		WARN_ON(1);
@@ -3378,79 +3377,29 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 {
-	struct mm_struct *mm;
+	struct mm_struct *mm = kctx->process_mm;
 
-	rcu_read_lock();
-	mm = rcu_dereference(kctx->process_mm);
-	if (mm) {
-		atomic_add(pages, &kctx->nonmapped_pages);
-#ifdef SPLIT_RSS_COUNTING
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
-		spin_lock(&mm->page_table_lock);
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-		spin_unlock(&mm->page_table_lock);
-#endif
-	}
-	rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
-	int pages;
-	struct mm_struct *mm;
-
-	spin_lock(&kctx->mm_update_lock);
-	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
-	if (!mm) {
-		spin_unlock(&kctx->mm_update_lock);
+	if (unlikely(!mm))
 		return;
-	}
 
-	rcu_assign_pointer(kctx->process_mm, NULL);
-	spin_unlock(&kctx->mm_update_lock);
-	synchronize_rcu();
-
-	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
+	atomic_add(pages, &kctx->nonmapped_pages);
 #ifdef SPLIT_RSS_COUNTING
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 #else
 	spin_lock(&mm->page_table_lock);
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 	spin_unlock(&mm->page_table_lock);
 #endif
 }
 
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx;
-
-	kctx = vma->vm_private_data;
-	kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
-	/* check that this is the only tracking page */
-	spin_lock(&kctx->mm_update_lock);
-	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
-		spin_unlock(&kctx->mm_update_lock);
-		return -EFAULT;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, current->mm);
-
-	spin_unlock(&kctx->mm_update_lock);
+	if (vma_pages(vma) != 1)
+		return -EINVAL;
 
 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;
 
 	return 0;
 }
@@ -3723,23 +3672,27 @@ static void kbase_csf_user_reg_vm_open(struct vm_area_struct *vma)
 static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 {
 	struct kbase_context *kctx = vma->vm_private_data;
+	struct kbase_device *kbdev;
 
-	if (!kctx) {
+	if (unlikely(!kctx)) {
 		pr_debug("Close function called for the unexpected mapping");
 		return;
 	}
 
-	if (unlikely(!kctx->csf.user_reg_vma))
-		dev_warn(kctx->kbdev->dev, "user_reg_vma pointer unexpectedly NULL");
+	kbdev = kctx->kbdev;
 
-	kctx->csf.user_reg_vma = NULL;
+	if (unlikely(!kctx->csf.user_reg.vma))
+		dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
+			 kctx->tgid, kctx->id);
 
-	mutex_lock(&kctx->kbdev->csf.reg_lock);
-	if (unlikely(kctx->kbdev->csf.nr_user_page_mapped == 0))
-		dev_warn(kctx->kbdev->dev, "Unexpected value for the USER page mapping counter");
-	else
-		kctx->kbdev->csf.nr_user_page_mapped--;
-	mutex_unlock(&kctx->kbdev->csf.reg_lock);
+	mutex_lock(&kbdev->csf.reg_lock);
+	list_del_init(&kctx->csf.user_reg.link);
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	kctx->csf.user_reg.vma = NULL;
+
+	/* Now as the VMA is closed, drop the reference on mali device file */
+	fput(kctx->filp);
 }
 
 /**
@@ -3784,10 +3737,11 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	unsigned long flags;
 
 	/* Few sanity checks up front */
-	if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg_vma) ||
-	    (vma->vm_pgoff != PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE))) {
-		pr_warn("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
-			current->comm, current->tgid, current->pid);
+
+	if (!kctx || (nr_pages != 1) || (vma != kctx->csf.user_reg.vma) ||
+	    (vma->vm_pgoff != kctx->csf.user_reg.file_offset)) {
+		pr_err("Unexpected CPU page fault on USER page mapping for process %s tgid %d pid %d\n",
+		       current->comm, current->tgid, current->pid);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -3796,22 +3750,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
 
 	mutex_lock(&kbdev->csf.reg_lock);
+
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	/* Don't map in the actual register page if GPU is powered down.
-	 * Always map in the dummy page in no mali builds.
+	/* Dummy page will be mapped during GPU off.
+	 *
+	 * In no mail builds, always map in the dummy page.
 	 */
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
-	pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#else
-	if (!kbdev->pm.backend.gpu_powered)
-		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
-#endif
+	if (IS_ENABLED(CONFIG_MALI_NO_MALI) || !kbdev->pm.backend.gpu_powered)
+		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.user_reg.dummy_page));
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
+	list_move_tail(&kctx->csf.user_reg.link, &kbdev->csf.user_reg.list);
 	ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
 						   KBASE_MEM_GROUP_CSF_FW, vma,
 						   vma->vm_start, pfn,
 						   vma->vm_page_prot);
+
 	mutex_unlock(&kbdev->csf.reg_lock);
 
 	return ret;
@@ -3824,20 +3778,6 @@ static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
 	.fault = kbase_csf_user_reg_vm_fault
 };
 
-/**
- * kbase_csf_cpu_mmap_user_reg_page - Memory map method for USER page.
- *
- * @kctx: Pointer of the kernel context.
- * @vma:  Pointer to the struct containing the information about
- *        the userspace mapping of USER page.
- *
- * Return: 0 on success, error code otherwise.
- *
- * Note:
- * New Base will request Kbase to read the LATEST_FLUSH of USER page on its behalf.
- * But this function needs to be kept for backward-compatibility as old Base (<=1.12)
- * will try to mmap USER page for direct access when it creates a base context.
- */
 static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 				struct vm_area_struct *vma)
 {
@@ -3845,7 +3785,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	struct kbase_device *kbdev = kctx->kbdev;
 
 	/* Few sanity checks */
-	if (kctx->csf.user_reg_vma)
+	if (kctx->csf.user_reg.vma)
 		return -EBUSY;
 
 	if (nr_pages != 1)
@@ -3864,19 +3804,21 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	 */
 	vma->vm_flags |= VM_PFNMAP;
 
-	kctx->csf.user_reg_vma = vma;
+	kctx->csf.user_reg.vma = vma;
 
 	mutex_lock(&kbdev->csf.reg_lock);
-	kbdev->csf.nr_user_page_mapped++;
-
-	if (!kbdev->csf.mali_file_inode)
-		kbdev->csf.mali_file_inode = kctx->filp->f_inode;
-
-	if (unlikely(kbdev->csf.mali_file_inode != kctx->filp->f_inode))
-		dev_warn(kbdev->dev, "Device file inode pointer not same for all contexts");
-
+	kctx->csf.user_reg.file_offset = kbdev->csf.user_reg.file_offset++;
 	mutex_unlock(&kbdev->csf.reg_lock);
 
+	/* Make VMA point to the special internal file, but don't drop the
+	 * reference on mali device file (that would be done later when the
+	 * VMA is closed).
+	 */
+	vma->vm_file = kctx->kbdev->csf.user_reg.filp;
+	get_file(vma->vm_file);
+
+	/* Also adjust the vm_pgoff */
+	vma->vm_pgoff = kctx->csf.user_reg.file_offset;
 	vma->vm_ops = &kbase_csf_user_reg_vm_ops;
 	vma->vm_private_data = kctx;
 
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 9c4b0d9..1dc76d0 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -60,6 +60,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
 	lock_page(p);
 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
 	__SetPageMovable(p, &movable_ops);
+	page_md->status = PAGE_MOVABLE_SET(page_md->status);
 #else
 	/* In some corner cases, the driver may attempt to allocate memory pages
 	 * even before the device file is open and the mapping for address space
@@ -78,8 +79,10 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
 	 * is enabled and because the pages may always return to memory pools and
 	 * gain the movable property later on in their life cycle.
 	 */
-	if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping)
+	if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping) {
 		__SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping);
+		page_md->status = PAGE_MOVABLE_SET(page_md->status);
+	}
 #endif
 	unlock_page(p);
 
@@ -112,6 +115,7 @@ static void kbase_free_pages_worker(struct work_struct *work)
 		container_of(work, struct kbase_mem_migrate, free_pages_work);
 	struct kbase_device *kbdev = container_of(mem_migrate, struct kbase_device, mem_migrate);
 	struct page *p, *tmp;
+	struct kbase_page_metadata *page_md;
 	LIST_HEAD(free_list);
 
 	spin_lock(&mem_migrate->free_pages_lock);
@@ -123,8 +127,11 @@ static void kbase_free_pages_worker(struct work_struct *work)
 		list_del_init(&p->lru);
 
 		lock_page(p);
-		if (PageMovable(p))
+		page_md = kbase_page_private(p);
+		if (IS_PAGE_MOVABLE(page_md->status)) {
 			__ClearPageMovable(p);
+			page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+		}
 		unlock_page(p);
 
 		kbase_free_page_metadata(kbdev, p, &group_id);
@@ -189,9 +196,12 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
 
 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
 		__SetPageMovable(new_page, &movable_ops);
+		page_md->status = PAGE_MOVABLE_SET(page_md->status);
 #else
-		if (kbdev->mem_migrate.inode->i_mapping)
+		if (kbdev->mem_migrate.inode->i_mapping) {
 			__SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping);
+			page_md->status = PAGE_MOVABLE_SET(page_md->status);
+		}
 #endif
 		SetPagePrivate(new_page);
 		get_page(new_page);
@@ -257,9 +267,12 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
 		/* Set PG_movable to the new page. */
 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
 		__SetPageMovable(new_page, &movable_ops);
+		page_md->status = PAGE_MOVABLE_SET(page_md->status);
 #else
-		if (kctx->kbdev->mem_migrate.inode->i_mapping)
+		if (kctx->kbdev->mem_migrate.inode->i_mapping) {
 			__SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping);
+			page_md->status = PAGE_MOVABLE_SET(page_md->status);
+		}
 #endif
 	} else
 		dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -288,7 +301,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 
 	CSTD_UNUSED(mode);
 
-	if (!PageMovable(p) || !page_md)
+	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
 		return false;
 
 	if (!spin_trylock(&page_md->migrate_lock))
@@ -327,6 +340,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 	case NOT_MOVABLE:
 		/* Opportunistically clear the movable property for these pages */
 		__ClearPageMovable(p);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 		break;
 	default:
 		/* State should always fall in one of the previous cases!
@@ -401,7 +415,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
 #endif
 	CSTD_UNUSED(mode);
 
-	if (!PageMovable(old_page) || !page_md)
+	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
 		return -EINVAL;
 
 	if (!spin_trylock(&page_md->migrate_lock))
@@ -469,8 +483,10 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
 	 * error is returned are called putback on, which may not be what we
 	 * expect.
 	 */
-	if (err < 0 && err != -EAGAIN)
+	if (err < 0 && err != -EAGAIN) {
 		__ClearPageMovable(old_page);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+	}
 
 	return err;
 }
@@ -542,6 +558,7 @@ static void kbase_page_putback(struct page *p)
 	if (status_mem_pool || status_free_isolated_in_progress ||
 	    status_free_pt_isolated_in_progress) {
 		__ClearPageMovable(p);
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 		if (!WARN_ON_ONCE(!kbdev)) {
 			struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index 30d0803..76bbc99 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,13 +23,22 @@
  * DOC: Base kernel page migration implementation.
  */
 
-#define PAGE_STATUS_MASK ((u8)0x7F)
+#define PAGE_STATUS_MASK ((u8)0x3F)
 #define PAGE_STATUS_GET(status) (status & PAGE_STATUS_MASK)
 #define PAGE_STATUS_SET(status, value) ((status & ~PAGE_STATUS_MASK) | (value & PAGE_STATUS_MASK))
+
 #define PAGE_ISOLATE_SHIFT (7)
+#define PAGE_ISOLATE_MASK ((u8)1 << PAGE_ISOLATE_SHIFT)
 #define PAGE_ISOLATE_SET(status, value)                                                            \
-	((status & PAGE_STATUS_MASK) | (value << PAGE_ISOLATE_SHIFT))
-#define IS_PAGE_ISOLATED(status) ((bool)(status & ~PAGE_STATUS_MASK))
+	((status & ~PAGE_ISOLATE_MASK) | (value << PAGE_ISOLATE_SHIFT))
+#define IS_PAGE_ISOLATED(status) ((bool)(status & PAGE_ISOLATE_MASK))
+
+#define PAGE_MOVABLE_SHIFT (6)
+#define PAGE_MOVABLE_MASK ((u8)1 << PAGE_MOVABLE_SHIFT)
+#define PAGE_MOVABLE_CLEAR(status) ((status) & ~PAGE_MOVABLE_MASK)
+#define PAGE_MOVABLE_SET(status) (status | PAGE_MOVABLE_MASK)
+
+#define IS_PAGE_MOVABLE(status) ((bool)(status & PAGE_MOVABLE_MASK))
 
 /* Global integer used to determine if module parameter value has been
  * provided and if page migration feature is enabled.
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 75569cc..fa8f34d 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -28,6 +28,11 @@
 #include <linux/shrinker.h>
 #include <linux/atomic.h>
 #include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
 
 #define pool_dbg(pool, format, ...) \
 	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
@@ -39,6 +44,47 @@
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
 
+/**
+ * can_alloc_page() - Check if the current thread can allocate a physical page
+ *
+ * @pool:                Pointer to the memory pool.
+ * @page_owner:          Pointer to the task/process that created the Kbase context
+ *                       for which a page needs to be allocated. It can be NULL if
+ *                       the page won't be associated with Kbase context.
+ * @alloc_from_kthread:  Flag indicating that the current thread is a kernel thread.
+ *
+ * This function checks if the current thread is a kernel thread and can make a
+ * request to kernel to allocate a physical page. If the kernel thread is allocating
+ * a page for the Kbase context and the process that created the context is exiting
+ * or is being killed, then there is no point in doing a page allocation.
+ *
+ * The check done by the function is particularly helpful when the system is running
+ * low on memory. When a page is allocated from the context of a kernel thread, OoM
+ * killer doesn't consider the kernel thread for killing and kernel keeps retrying
+ * to allocate the page as long as the OoM killer is able to kill processes.
+ * The check allows kernel thread to quickly exit the page allocation loop once OoM
+ * killer has initiated the killing of @page_owner, thereby unblocking the context
+ * termination for @page_owner and freeing of GPU memory allocated by it. This helps
+ * in preventing the kernel panic and also limits the number of innocent processes
+ * that get killed.
+ *
+ * Return: true if the page can be allocated otherwise false.
+ */
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+				  const bool alloc_from_kthread)
+{
+	if (likely(!alloc_from_kthread || !page_owner))
+		return true;
+
+	if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+		dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting",
+			__func__, page_owner->comm, task_pid_nr(page_owner));
+		return false;
+	}
+
+	return true;
+}
+
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
 	ssize_t max_size = kbase_mem_pool_max_size(pool);
@@ -342,10 +388,12 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 	return nr_freed;
 }
 
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,
+			struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t i;
+	const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 
 	kbase_mem_pool_lock(pool);
 
@@ -360,6 +408,9 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow)
 		}
 		kbase_mem_pool_unlock(pool);
 
+		if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+			return -ENOMEM;
+
 		p = kbase_mem_alloc_page(pool);
 		if (!p) {
 			kbase_mem_pool_lock(pool);
@@ -392,7 +443,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
-		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+		err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
 
 	if (err) {
 		size_t grown_size = kbase_mem_pool_size(pool);
@@ -656,13 +707,15 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 }
 
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-			       struct tagged_addr *pages, bool partial_allowed)
+			       struct tagged_addr *pages, bool partial_allowed,
+			       struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t nr_from_pool;
 	size_t i = 0;
 	int err = -ENOMEM;
 	size_t nr_pages_internal;
+	const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 
 	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
 
@@ -697,7 +750,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	if (i != nr_4k_pages && pool->next_pool) {
 		/* Allocate via next pool */
 		err = kbase_mem_pool_alloc_pages(pool->next_pool, nr_4k_pages - i, pages + i,
-						 partial_allowed);
+						 partial_allowed, page_owner);
 
 		if (err < 0)
 			goto err_rollback;
@@ -706,6 +759,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	} else {
 		/* Get any remaining pages from kernel */
 		while (i != nr_4k_pages) {
+			if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+				goto err_rollback;
+
 			p = kbase_mem_alloc_page(pool);
 			if (!p) {
 				if (partial_allowed)
diff --git a/mali_kbase/mali_kbase_refcount_defs.h b/mali_kbase/mali_kbase_refcount_defs.h
new file mode 100644
index 0000000..c517a2d
--- /dev/null
+++ b/mali_kbase/mali_kbase_refcount_defs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_REFCOUNT_DEFS_H_
+#define _KBASE_REFCOUNT_DEFS_H_
+
+/*
+ * The Refcount API is available from 4.11 onwards
+ * This file hides the compatibility issues with this for the rest the driver
+ */
+
+#include <linux/version.h>
+#include <linux/types.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+
+#define kbase_refcount_t atomic_t
+#define kbase_refcount_read(x) atomic_read(x)
+#define kbase_refcount_set(x, v) atomic_set(x, v)
+#define kbase_refcount_dec_and_test(x) atomic_dec_and_test(x)
+#define kbase_refcount_dec(x) atomic_dec(x)
+#define kbase_refcount_inc_not_zero(x) atomic_inc_not_zero(x)
+#define kbase_refcount_inc(x) atomic_inc(x)
+
+#else
+
+#include <linux/refcount.h>
+
+#define kbase_refcount_t refcount_t
+#define kbase_refcount_read(x) refcount_read(x)
+#define kbase_refcount_set(x, v) refcount_set(x, v)
+#define kbase_refcount_dec_and_test(x) refcount_dec_and_test(x)
+#define kbase_refcount_dec(x) refcount_dec(x)
+#define kbase_refcount_inc_not_zero(x) refcount_inc_not_zero(x)
+#define kbase_refcount_inc(x) refcount_inc(x)
+
+#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
+
+#endif /* _KBASE_REFCOUNT_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index b64bbc1..f494a8f 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -943,6 +943,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 	int ret;
 	u32 i;
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto fail;
+	}
+
 	/* For backwards compatibility, and to prevent reading more than 1 jit
 	 * info struct on jit version 1
 	 */
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 4a09265..4cac787 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -150,17 +150,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 					"true" : "false";
 	int as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"PA_VALID: %s\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"access type 0x%X: %s\n"
 		"source id 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		addr_valid,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
@@ -557,6 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
 	kbdev->as[i].gf_data.addr = 0ULL;
+	kbdev->as[i].is_unresponsive = false;
 
 	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
 	if (!kbdev->as[i].pf_wq)
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index 83605c3..d716ce0 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	u32 const exception_data = (status >> 8) & 0xFFFFFF;
 	int const as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"exception data 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
 		exception_data,
@@ -428,6 +429,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].number = i;
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
+	kbdev->as[i].is_unresponsive = false;
 
 	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
 	if (!kbdev->as[i].pf_wq)
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 41876ff..ea58381 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -144,35 +144,21 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 			       enum kbase_mmu_op_type op)
 {
 	u32 flush_op;
-	int ret;
-
-	if (WARN_ON(kbdev == NULL))
-		return;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	/* Translate operation to command */
-	if (op == KBASE_MMU_OP_FLUSH_PT) {
+	if (op == KBASE_MMU_OP_FLUSH_PT)
 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
-	} else if (op == KBASE_MMU_OP_FLUSH_MEM) {
+	else if (op == KBASE_MMU_OP_FLUSH_MEM)
 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
-	} else {
+	else {
 		dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
 		return;
 	}
 
-	ret = kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op);
-
-	if (ret) {
-		/* Flush failed to complete, assume the GPU has hung and
-		 * perform a reset to recover
-		 */
-		dev_err(kbdev->dev,
-			"Flush for physical address range did not complete. Issuing GPU soft-reset to recover");
-
-		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu(kbdev);
-	}
+	if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
+		dev_err(kbdev->dev, "Flush for physical address range did not complete");
 }
 #endif
 
@@ -190,21 +176,15 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
 			   const struct kbase_mmu_hw_op_param *op_param)
 {
-	int err = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
 		as_nr = kctx ? kctx->as_nr : as_nr;
-		err = kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param);
-	}
-
-	if (err) {
-		dev_err(kbdev->dev,
-			"Invalidate after GPU page table update did not complete. Issuing GPU soft-reset to recover");
-		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu(kbdev);
+		if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
+			dev_err(kbdev->dev,
+				"Invalidate after GPU page table update did not complete");
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -215,25 +195,14 @@ static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kct
 static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
 				    const struct kbase_mmu_hw_op_param *op_param)
 {
-	int err = 0;
 	unsigned long flags;
 
 	/* AS transaction begin */
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	if (kbdev->pm.backend.gpu_powered)
-		err = kbase_mmu_hw_do_flush_locked(kbdev, as, op_param);
-
-	if (err) {
-		/* Flush failed to complete, assume the GPU has hung and
-		 * perform a reset to recover.
-		 */
-		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
-
-		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu_locked(kbdev);
-	}
+	if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+		dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -308,7 +277,6 @@ static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_contex
 static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
 					int as_nr, const struct kbase_mmu_hw_op_param *op_param)
 {
-	int err = 0;
 	unsigned long flags;
 
 	/* AS transaction begin */
@@ -317,19 +285,8 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct
 
 	if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
 		as_nr = kctx ? kctx->as_nr : as_nr;
-		err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr],
-							op_param);
-	}
-
-	if (err) {
-		/* Flush failed to complete, assume the GPU has hung and
-		 * perform a reset to recover.
-		 */
-		dev_err(kbdev->dev,
-			"Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
-
-		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu(kbdev);
+		if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
+			dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -405,13 +362,11 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
  * @level:    The level of MMU page table.
  * @flush_op: The type of MMU flush operation to perform.
  * @dirty_pgds: Flags to track every level where a PGD has been updated.
- * @free_pgds_list: Linked list of the page directory pages to free.
  */
 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 						  u64 vpfn, int level,
-						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
-						  struct list_head *free_pgds_list);
+						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
 
 static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
@@ -485,14 +440,17 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
 			       phys_addr_t pgd)
 {
 	struct page *p;
+	bool page_is_isolated = false;
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	p = pfn_to_page(PFN_DOWN(pgd));
+	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
 
-	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
-
-	kbase_mmu_account_freed_pgd(kbdev, mmut);
+	if (likely(!page_is_isolated)) {
+		kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
+		kbase_mmu_account_freed_pgd(kbdev, mmut);
+	}
 }
 
 /**
@@ -500,41 +458,42 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
  *
  * @kbdev:          Device pointer.
  * @mmut:           GPU MMU page table.
- * @free_pgds_list: Linked list of the page directory pages to free.
  *
  * This function will call kbase_mmu_free_pgd() on each page directory page
- * present in the @free_pgds_list.
+ * present in the list of free PGDs inside @mmut.
  *
  * The function is supposed to be called after the GPU cache and MMU TLB has
  * been invalidated post the teardown loop.
+ *
+ * The mmu_lock shall be held prior to calling the function.
  */
-static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				     struct list_head *free_pgds_list)
+static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
-	struct page *page, *next_page;
+	size_t i;
 
-	mutex_lock(&mmut->mmu_lock);
+	lockdep_assert_held(&mmut->mmu_lock);
 
-	list_for_each_entry_safe(page, next_page, free_pgds_list, lru) {
-		list_del_init(&page->lru);
-		kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(page));
-	}
+	for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
+		kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
 
-	mutex_unlock(&mmut->mmu_lock);
+	mmut->scratch_mem.free_pgds.head_index = 0;
 }
 
-static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev,
-					    struct kbase_mmu_table *mmut,
-					    struct page *p, struct list_head *free_pgds_list)
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
 {
-	bool page_is_isolated = false;
-
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+	if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
+		return;
+
+	mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
+}
+
+static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
+{
+	lockdep_assert_held(&mmut->mmu_lock);
 
-	if (likely(!page_is_isolated))
-		list_add(&p->lru, free_pgds_list);
+	mmut->scratch_mem.free_pgds.head_index = 0;
 }
 
 /**
@@ -627,6 +586,7 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
 	 */
 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
 	struct kbase_mmu_hw_op_param op_param;
+	int ret = 0;
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
 
@@ -645,16 +605,20 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
 		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 		op_param.flush_skip_levels =
 				pgd_level_to_skip_flush(dirty_pgds);
-		kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+		ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 	} else {
 		mmu_hw_operation_begin(kbdev);
-		kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
+		ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
 		mmu_hw_operation_end(kbdev);
 	}
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
+	if (ret)
+		dev_err(kbdev->dev,
+			"Flush for GPU page fault due to write access did not complete");
+
 	kbase_mmu_hw_enable_fault(kbdev, faulting_as,
 			KBASE_MMU_FAULT_TYPE_PAGE);
 }
@@ -869,17 +833,13 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
 		return false;
 	}
 
-#ifdef CONFIG_MALI_2MB_ALLOC
-	if (new_pages >= (SZ_2M / SZ_4K)) {
+	if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
 		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
 		*grow_2mb_pool = true;
 	} else {
-#endif
 		root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
 		*grow_2mb_pool = false;
-#ifdef CONFIG_MALI_2MB_ALLOC
 	}
-#endif
 
 	if (region->gpu_alloc != region->cpu_alloc)
 		new_pages *= 2;
@@ -1128,22 +1088,22 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	}
 
 page_fault_retry:
-#ifdef CONFIG_MALI_2MB_ALLOC
-	/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
-	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
-		if (!prealloc_sas[i]) {
-			prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
-
+	if (kbdev->pagesize_2mb) {
+		/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
+		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
 			if (!prealloc_sas[i]) {
-				kbase_mmu_report_fault_and_kill(
-					kctx, faulting_as,
-					"Failed pre-allocating memory for sub-allocations' metadata",
-					fault);
-				goto fault_done;
+				prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+
+				if (!prealloc_sas[i]) {
+					kbase_mmu_report_fault_and_kill(
+						kctx, faulting_as,
+						"Failed pre-allocating memory for sub-allocations' metadata",
+						fault);
+					goto fault_done;
+				}
 			}
 		}
 	}
-#endif /* CONFIG_MALI_2MB_ALLOC */
 
 	/* so we have a translation fault,
 	 * let's see if it is for growable memory
@@ -1457,8 +1417,7 @@ page_fault_retry:
 		 * Otherwise fail the allocation.
 		 */
 		if (pages_to_grow > 0) {
-#ifdef CONFIG_MALI_2MB_ALLOC
-			if (grow_2mb_pool) {
+			if (kbdev->pagesize_2mb && grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
 					&kctx->mem_pools.large[
@@ -1469,18 +1428,15 @@ page_fault_retry:
 						>> lp_mem_pool->order;
 
 				ret = kbase_mem_pool_grow(lp_mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 			} else {
-#endif
 				struct kbase_mem_pool *const mem_pool =
 					&kctx->mem_pools.small[
 					region->gpu_alloc->group_id];
 
 				ret = kbase_mem_pool_grow(mem_pool,
-					pages_to_grow);
-#ifdef CONFIG_MALI_2MB_ALLOC
+					pages_to_grow, kctx->task);
 			}
-#endif
 		}
 		if (ret < 0) {
 			/* failed to extend, handle as a normal PF */
@@ -1570,15 +1526,24 @@ alloc_free:
 	return KBASE_MMU_INVALID_PGD_ADDRESS;
 }
 
-/* Given PGD PFN for level N, return PGD PFN for level N+1, allocating the
- * new table from the pool if needed and possible
+/**
+ * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
+ *
+ * @kbdev:    Device pointer.
+ * @mmut:     GPU MMU page table.
+ * @pgd:      Physical addresse of level N page directory.
+ * @vpfn:     The virtual page frame number.
+ * @level:    The level of MMU page table (N).
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
  */
 static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-			    phys_addr_t *pgd, u64 vpfn, int level, bool *newly_created_pgd,
-			    u64 *dirty_pgds)
+			    phys_addr_t *pgd, u64 vpfn, int level)
 {
 	u64 *page;
-	u64 pgd_vpfn = vpfn;
 	phys_addr_t target_pgd;
 	struct page *p;
 
@@ -1594,67 +1559,15 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	p = pfn_to_page(PFN_DOWN(*pgd));
 	page = kmap(p);
 	if (page == NULL) {
-		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
+		dev_err(kbdev->dev, "%s: kmap failure", __func__);
 		return -EINVAL;
 	}
 
 	if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
-		unsigned int current_valid_entries;
-		u64 managed_pte;
-
-		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
-		if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
-			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__);
-			kunmap(p);
-			return -ENOMEM;
-		}
-
-		current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(page);
-		kbdev->mmu_mode->entry_set_pte(&managed_pte, target_pgd);
-		page[vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
-			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
-		kbdev->mmu_mode->set_num_valid_entries(page, current_valid_entries + 1);
-
-		/* Rely on the caller to update the address space flags. */
-		if (newly_created_pgd && !*newly_created_pgd) {
-			*newly_created_pgd = true;
-			if (dirty_pgds)
-				*dirty_pgds |= 1ULL << level;
-		}
-
-		/* A new valid entry is added to an existing PGD. Perform the
-		 * invalidate operation for GPU cache as it could be having a
-		 * cacheline that contains the entry (in an invalid form).
-		 * Even if the parent PGD was newly created, invalidation of
-		 * GPU cache is still needed. For explanation, please refer
-		 * the comment in kbase_mmu_insert_pages_no_flush().
-		 */
-		kbase_mmu_sync_pgd(kbdev, mmut->kctx,
-				   *pgd + (vpfn * sizeof(u64)),
-				   kbase_dma_addr(p) + (vpfn * sizeof(u64)),
-				   sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
-
-		/* Update the new target_pgd page to its stable state */
-		if (kbase_page_migration_enabled) {
-			struct kbase_page_metadata *page_md =
-				kbase_page_private(phys_to_page(target_pgd));
-
-			spin_lock(&page_md->migrate_lock);
-
-			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
-				     IS_PAGE_ISOLATED(page_md->status));
-
-			if (mmut->kctx) {
-				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
-				page_md->data.pt_mapped.mmut = mmut;
-				page_md->data.pt_mapped.pgd_vpfn_level =
-					PGD_VPFN_LEVEL_SET(pgd_vpfn, level);
-			} else {
-				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
-			}
-
-			spin_unlock(&page_md->migrate_lock);
-		}
+		dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
+			vpfn);
+		kunmap(p);
+		return -EFAULT;
 	} else {
 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
@@ -1667,12 +1580,69 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	return 0;
 }
 
+/**
+ * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
+ *
+ * @kbdev:    Device pointer.
+ * @mmut:     GPU MMU page table.
+ * @vpfn:     The virtual page frame number.
+ * @in_level:     The level of MMU page table (N).
+ * @out_level:    Set to the level of the lowest valid PGD found on success.
+ *                Invalid on error.
+ * @out_pgd:      Set to the lowest valid PGD found on success.
+ *                Invalid on error.
+ *
+ * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
+ * closest to in_level
+ *
+ * Terminology:
+ * Level-0 = Top-level = highest
+ * Level-3 = Bottom-level = lowest
+ *
+ * Return:
+ * * 0 - OK
+ * * -EINVAL - kmap() failed during page table walk.
+ */
+static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
+{
+	phys_addr_t pgd;
+	int l;
+	int err = 0;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+	pgd = mmut->pgd;
+
+	for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
+		err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
+
+		/* Handle failure condition */
+		if (err) {
+			dev_dbg(kbdev->dev,
+				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+				__func__, l + 1);
+			break;
+		}
+	}
+
+	*out_pgd = pgd;
+	*out_level = l;
+
+	/* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
+	 * This implies that we have found the lowest valid pgd. Reset the error code.
+	 */
+	if (err == -EFAULT)
+		err = 0;
+
+	return err;
+}
+
 /*
- * Returns the PGD for the specified level of translation
+ * On success, sets out_pgd to the PGD for the specified level of translation
+ * Returns -EFAULT if a valid PGD is not found
  */
 static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-				int level, phys_addr_t *out_pgd, bool *newly_created_pgd,
-				u64 *dirty_pgds)
+				int level, phys_addr_t *out_pgd)
 {
 	phys_addr_t pgd;
 	int l;
@@ -1681,12 +1651,12 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
 	pgd = mmut->pgd;
 
 	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
-		int err =
-			mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
+		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
 		/* Handle failure condition */
 		if (err) {
-			dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__,
-				l);
+			dev_err(kbdev->dev,
+				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
+				__func__, l + 1);
 			return err;
 		}
 	}
@@ -1696,17 +1666,9 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
 	return 0;
 }
 
-static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			      phys_addr_t *out_pgd, bool *newly_created_pgd, u64 *dirty_pgds)
-{
-	return mmu_get_pgd_at_level(kbdev, mmut, vpfn, MIDGARD_MMU_BOTTOMLEVEL, out_pgd,
-				    newly_created_pgd, dirty_pgds);
-}
-
 static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, u64 from_vpfn,
 					      u64 to_vpfn, u64 *dirty_pgds,
-					      struct list_head *free_pgds_list,
 					      struct tagged_addr *phys, bool ignore_page_migration)
 {
 	u64 vpfn = from_vpfn;
@@ -1719,6 +1681,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	mmu_mode = kbdev->mmu_mode;
+	kbase_mmu_reset_free_pgds_list(mmut);
 
 	while (vpfn < to_vpfn) {
 		unsigned int idx = vpfn & 0x1FF;
@@ -1779,11 +1742,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		if (!num_of_valid_entries) {
 			kunmap(p);
 
-			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(mmut, p);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
-							      KBASE_MMU_OP_NONE, dirty_pgds,
-							      free_pgds_list);
+							      KBASE_MMU_OP_NONE, dirty_pgds);
 			vpfn += count;
 			continue;
 		}
@@ -1863,34 +1825,209 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
 		mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
 }
 
-/*
- * Map the single page 'phys' 'nr' of times, starting at GPU PFN 'vpfn'
+/**
+ * update_parent_pgds() - Updates the page table from bottom level towards
+ *                        the top level to insert a new ATE
+ *
+ * @kbdev:    Device pointer.
+ * @mmut:     GPU MMU page table.
+ * @cur_level:    The level of MMU page table where the ATE needs to be added.
+ *                The bottom PGD level.
+ * @insert_level: The level of MMU page table where the chain of newly allocated
+ *                PGDs needs to be linked-in/inserted.
+ *                The top-most PDG level to be updated.
+ * @insert_vpfn:  The virtual page frame number for the ATE.
+ * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
+ *                  the physical addresses of newly allocated PGDs from index
+ *                  insert_level+1 to cur_level, and an existing PGD at index
+ *                  insert_level.
+ *
+ * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
+ * at insert_level which already exists in the MMU Page Tables.Migration status is also
+ * updated for all the newly allocated PGD pages.
+ *
+ * Return:
+ * * 0 - OK
+ * * -EFAULT - level N+1 PGD does not exist
+ * * -EINVAL - kmap() failed for level N PGD PFN
+ */
+static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+			      int cur_level, int insert_level, u64 insert_vpfn,
+			      phys_addr_t *pgds_to_insert)
+{
+	int pgd_index;
+	int err = 0;
+
+	/* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
+	 * Loop runs from the bottom-most to the top-most level so that all entries in the chain
+	 * are valid when they are inserted into the MMU Page table via the insert_level PGD.
+	 */
+	for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
+		int parent_index = pgd_index - 1;
+		phys_addr_t parent_pgd = pgds_to_insert[parent_index];
+		unsigned int current_valid_entries;
+		u64 pte;
+		phys_addr_t target_pgd = pgds_to_insert[pgd_index];
+		u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
+		struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
+		u64 *parent_page_va;
+
+		if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
+			err = -EFAULT;
+			goto failure_recovery;
+		}
+
+		parent_page_va = kmap(parent_page);
+		if (unlikely(parent_page_va == NULL)) {
+			dev_err(kbdev->dev, "%s: kmap failure", __func__);
+			err = -EINVAL;
+			goto failure_recovery;
+		}
+
+		current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
+
+		kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
+		parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
+		kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
+		kunmap(parent_page);
+
+		if (parent_index != insert_level) {
+			/* Newly allocated PGDs */
+			kbase_mmu_sync_pgd_cpu(
+				kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+				sizeof(u64));
+		} else {
+			/* A new valid entry is added to an existing PGD. Perform the
+			 * invalidate operation for GPU cache as it could be having a
+			 * cacheline that contains the entry (in an invalid form).
+			 */
+			kbase_mmu_sync_pgd(
+				kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
+				kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
+				sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+		}
+
+		/* Update the new target_pgd page to its stable state */
+		if (kbase_page_migration_enabled) {
+			struct kbase_page_metadata *page_md =
+				kbase_page_private(phys_to_page(target_pgd));
+
+			spin_lock(&page_md->migrate_lock);
+
+			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+				     IS_PAGE_ISOLATED(page_md->status));
+
+			if (mmut->kctx) {
+				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+				page_md->data.pt_mapped.mmut = mmut;
+				page_md->data.pt_mapped.pgd_vpfn_level =
+					PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
+			} else {
+				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+			}
+
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+
+	return 0;
+
+failure_recovery:
+	/* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
+	for (; pgd_index < cur_level; pgd_index++) {
+		phys_addr_t pgd = pgds_to_insert[pgd_index];
+		struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
+		u64 *pgd_page_va = kmap(pgd_page);
+		u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
+
+		kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
+		kunmap(pgd_page);
+	}
+
+	return err;
+}
+
+/**
+ * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
+ *                           level_high (inclusive)
+ *
+ * @kbdev:    Device pointer.
+ * @mmut:     GPU MMU page table.
+ * @level_low:  The lower bound for the levels for which the PGD allocs are required
+ * @level_high: The higher bound for the levels for which the PGD allocs are required
+ * @new_pgds:   Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
+ *              newly allocated PGD addresses to.
+ *
+ * Numerically, level_low < level_high, not to be confused with top level and
+ * bottom level concepts for MMU PGDs. They are only used as low and high bounds
+ * in an incrementing for-loop.
+ *
+ * Return:
+ * * 0 - OK
+ * * -ENOMEM - allocation failed for a PGD.
  */
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
-				 struct tagged_addr phys, size_t nr,
-				 unsigned long flags, int const group_id,
-				 enum kbase_caller_mmu_sync_info mmu_sync_info)
+static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				 phys_addr_t *new_pgds, int level_low, int level_high)
+{
+	int err = 0;
+	int i;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	for (i = level_low; i <= level_high; i++) {
+		do {
+			new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
+			if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+				break;
+
+			mutex_unlock(&mmut->mmu_lock);
+			err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
+						  level_high, NULL);
+			mutex_lock(&mmut->mmu_lock);
+			if (err) {
+				dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
+					__func__, err);
+
+				/* Free all PGDs allocated in previous successful iterations
+				 * from (i-1) to level_low
+				 */
+				for (i = (i - 1); i >= level_low; i--) {
+					if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
+						kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
+				}
+
+				return err;
+			}
+		} while (1);
+	}
+
+	return 0;
+}
+
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+				 struct tagged_addr phys, size_t nr, unsigned long flags,
+				 int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+				 bool ignore_page_migration)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
-	/* In case the insert_single_page only partially completes
-	 * we need to be able to recover
-	 */
-	bool recover_required = false;
-	u64 start_vpfn = vpfn;
-	size_t recover_count = 0;
+	u64 insert_vpfn = start_vpfn;
 	size_t remain = nr;
 	int err;
 	struct kbase_device *kbdev;
-	enum kbase_mmu_op_type flush_op;
 	u64 dirty_pgds = 0;
-	LIST_HEAD(free_pgds_list);
+	unsigned int i;
+	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+	enum kbase_mmu_op_type flush_op;
+	struct kbase_mmu_table *mmut = &kctx->mmu;
+	int l, cur_level, insert_level;
 
 	if (WARN_ON(kctx == NULL))
 		return -EINVAL;
 
 	/* 64-bit address range is the max */
-	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
+	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
 
 	kbdev = kctx->kbdev;
 
@@ -1901,7 +2038,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 	/* If page migration is enabled, pages involved in multiple GPU mappings
 	 * are always treated as not movable.
 	 */
-	if (kbase_page_migration_enabled) {
+	if (kbase_page_migration_enabled && !ignore_page_migration) {
 		struct page *phys_page = as_page(phys);
 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 
@@ -1912,12 +2049,11 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		}
 	}
 
-	mutex_lock(&kctx->mmu.mmu_lock);
+	mutex_lock(&mmut->mmu_lock);
 
 	while (remain) {
-		unsigned int i;
-		unsigned int index = vpfn & 0x1FF;
-		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
+		unsigned int vindex = insert_vpfn & 0x1FF;
+		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
 		struct page *p;
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
@@ -1925,64 +2061,61 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		if (count > remain)
 			count = remain;
 
+		cur_level = MIDGARD_MMU_BOTTOMLEVEL;
+		insert_level = cur_level;
+
 		/*
-		 * Repeatedly calling mmu_get_bottom_pgd() is clearly
+		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
 		 * each time (just cache the l0-l2 sequence).
 		 * On the other hand, it's only a gain when we map more than
 		 * 256 pages at once (on average). Do we really care?
 		 */
-		do {
-			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu, vpfn, &pgd, &newly_created_pgd,
-						 &dirty_pgds);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&kctx->mmu.mmu_lock);
-			err = kbase_mem_pool_grow(
-				&kbdev->mem_pools.small[
-					kctx->mmu.group_id],
-				MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu.mmu_lock);
-		} while (!err);
+		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+					       &pgd);
+
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__);
-			if (recover_required) {
-				/* Invalidate the pages we have partially
-				 * completed
-				 */
-				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
-								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list,
-								  NULL, true);
-			}
+			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+				__func__, err);
 			goto fail_unlock;
 		}
 
+		/* No valid pgd at cur_level */
+		if (insert_level != cur_level) {
+			/* Allocate new pgds for all missing levels from the required level
+			 * down to the lowest valid pgd at insert_level
+			 */
+			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+						    cur_level);
+			if (err)
+				goto fail_unlock;
+
+			newly_created_pgd = true;
+
+			new_pgds[insert_level] = pgd;
+
+			/* If we didn't find an existing valid pgd at cur_level,
+			 * we've now allocated one. The ATE in the next step should
+			 * be inserted in this newly allocated pgd.
+			 */
+			pgd = new_pgds[cur_level];
+		}
+
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
-			if (recover_required) {
-				/* Invalidate the pages we have partially
-				 * completed
-				 */
-				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
-								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list,
-								  NULL, true);
-			}
+			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -ENOMEM;
-			goto fail_unlock;
+
+			goto fail_unlock_free_pgds;
 		}
 
 		num_of_valid_entries =
 			kbdev->mmu_mode->get_num_valid_entries(pgd_page);
 
 		for (i = 0; i < count; i++) {
-			unsigned int ofs = index + i;
+			unsigned int ofs = vindex + i;
 
 			/* Fail if the current page is a valid ATE entry */
 			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
@@ -1994,50 +2127,87 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		kbdev->mmu_mode->set_num_valid_entries(
 			pgd_page, num_of_valid_entries + count);
 
-		vpfn += count;
-		remain -= count;
-
-		if (count > 0 && !newly_created_pgd)
-			dirty_pgds |= 1ULL << MIDGARD_MMU_BOTTOMLEVEL;
+		dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
 
 		/* MMU cache flush operation here will depend on whether bottom level
 		 * PGD is newly created or not.
 		 *
-		 * If bottom level PGD is newly created then no cache maintenance is
+		 * If bottom level PGD is newly created then no GPU cache maintenance is
 		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
 		 * maintenance is required for existing PGD.
 		 */
 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
 
-		kbase_mmu_sync_pgd(kbdev, kctx, pgd + (index * sizeof(u64)),
-				   kbase_dma_addr(p) + (index * sizeof(u64)), count * sizeof(u64),
+		kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
+				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
 				   flush_op);
 
+		if (newly_created_pgd) {
+			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+						 new_pgds);
+			if (err) {
+				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+					__func__, err);
+
+				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+				kunmap(p);
+				goto fail_unlock_free_pgds;
+			}
+		}
+
+		insert_vpfn += count;
+		remain -= count;
 		kunmap(p);
-		/* We have started modifying the page table.
-		 * If further pages need inserting and fail we need to undo what
-		 * has already taken place
-		 */
-		recover_required = true;
-		recover_count += count;
 	}
-	mutex_unlock(&kctx->mmu.mmu_lock);
 
-	mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
-					  mmu_sync_info, false);
+	mutex_unlock(&mmut->mmu_lock);
+
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+					  false);
 
 	return 0;
 
+fail_unlock_free_pgds:
+	/* Free the pgds allocated by us from insert_level+1 to bottom level */
+	for (l = cur_level; l > insert_level; l--)
+		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
 fail_unlock:
-	mutex_unlock(&kctx->mmu.mmu_lock);
+	if (insert_vpfn != start_vpfn) {
+		/* Invalidate the pages we have partially completed */
+		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
+						  NULL, true);
+	}
 
-	mmu_flush_invalidate_insert_pages(kbdev, &kctx->mmu, start_vpfn, nr, dirty_pgds,
-					  mmu_sync_info, true);
-	kbase_mmu_free_pgds_list(kbdev, &kctx->mmu, &free_pgds_list);
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
+					  true);
+	kbase_mmu_free_pgds_list(kbdev, mmut);
+	mutex_unlock(&mmut->mmu_lock);
 
 	return err;
 }
 
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+					  struct tagged_addr phys, size_t nr, unsigned long flags,
+					  int const group_id,
+					  enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+					    false);
+}
+
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+					 struct tagged_addr phys, size_t nr, unsigned long flags,
+					 int const group_id,
+					 enum kbase_caller_mmu_sync_info mmu_sync_info)
+{
+	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
+	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
+					    false);
+}
+
 static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
 						   struct kbase_va_region *reg,
 						   struct kbase_mmu_table *mmut, const u64 vpfn)
@@ -2139,7 +2309,9 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 	size_t remain = nr;
 	int err;
 	struct kbase_mmu_mode const *mmu_mode;
-	LIST_HEAD(free_pgds_list);
+	unsigned int i;
+	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
+	int l, cur_level, insert_level;
 
 	/* Note that 0 is a valid start_vpfn */
 	/* 64-bit address range is the max */
@@ -2154,13 +2326,12 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 	mutex_lock(&mmut->mmu_lock);
 
 	while (remain) {
-		unsigned int i;
 		unsigned int vindex = insert_vpfn & 0x1FF;
 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
 		struct page *p;
-		int cur_level;
 		register unsigned int num_of_valid_entries;
 		bool newly_created_pgd = false;
+		enum kbase_mmu_op_type flush_op;
 
 		if (count > remain)
 			count = remain;
@@ -2170,57 +2341,53 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		else
 			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 
+		insert_level = cur_level;
+
 		/*
-		 * Repeatedly calling mmu_get_pgd_at_level() is clearly
+		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
 		 * suboptimal. We don't have to re-parse the whole tree
 		 * each time (just cache the l0-l2 sequence).
 		 * On the other hand, it's only a gain when we map more than
 		 * 256 pages at once (on average). Do we really care?
 		 */
-		do {
-			err = mmu_get_pgd_at_level(kbdev, mmut, insert_vpfn, cur_level, &pgd,
-						   &newly_created_pgd, dirty_pgds);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&mmut->mmu_lock);
-			err = kbase_mem_pool_grow(
-				&kbdev->mem_pools.small[mmut->group_id],
-				cur_level);
-			mutex_lock(&mmut->mmu_lock);
-		} while (!err);
+		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
+		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
+					       &pgd);
 
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__);
-			if (insert_vpfn != start_vpfn) {
-				/* Invalidate the pages we have partially
-				 * completed
-				 */
-				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
-								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list, phys,
-								  ignore_page_migration);
-			}
+			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
+				__func__, err);
 			goto fail_unlock;
 		}
 
+		/* No valid pgd at cur_level */
+		if (insert_level != cur_level) {
+			/* Allocate new pgds for all missing levels from the required level
+			 * down to the lowest valid pgd at insert_level
+			 */
+			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
+						    cur_level);
+			if (err)
+				goto fail_unlock;
+
+			newly_created_pgd = true;
+
+			new_pgds[insert_level] = pgd;
+
+			/* If we didn't find an existing valid pgd at cur_level,
+			 * we've now allocated one. The ATE in the next step should
+			 * be inserted in this newly allocated pgd.
+			 */
+			pgd = new_pgds[cur_level];
+		}
+
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
-			if (insert_vpfn != start_vpfn) {
-				/* Invalidate the pages we have partially
-				 * completed
-				 */
-				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
-								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list, phys,
-								  ignore_page_migration);
-			}
+			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -ENOMEM;
-			goto fail_unlock;
+
+			goto fail_unlock_free_pgds;
 		}
 
 		num_of_valid_entries =
@@ -2262,34 +2429,39 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 
 		mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
 
-		if (dirty_pgds && !newly_created_pgd)
-			*dirty_pgds |= 1ULL << cur_level;
-
-		phys += count;
-		insert_vpfn += count;
-		remain -= count;
+		if (dirty_pgds)
+			*dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
 
-		/* Even if mmu_get_pgd_at_level() allocated a new bottom level
-		 * table page, the invalidation of L2 cache is still needed for
-		 * for the valid entries written in that page. This is because a
-		 * race can happen as soon as the entry of parent level table is
-		 * updated to point to the page of bottom level table.
-		 * GPU can try to access within the the same virtual range that
-		 * is being mapped, before the valid entries of bottom level table
-		 * page are flushed to the memory from the CPU's cache. And if that
-		 * happens then the invalid entries from memory could get fetched
-		 * into the L2 cache and so those entries won't be affected by the
-		 * MMU TLB invalidation done by sending the UNLOCK command.
-		 * If the memory is growable then this could result in unexpected
-		 * page faults happening repeatedly, until the invalid entry is
-		 * evicted from the L2 cache, as Driver would consider the page
-		 * faults for mapped memory as duplicate and won't take any action
-		 * effectively.
+		/* MMU cache flush operation here will depend on whether bottom level
+		 * PGD is newly created or not.
+		 *
+		 * If bottom level PGD is newly created then no GPU cache maintenance is
+		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
+		 * maintenance is required for existing PGD.
 		 */
+		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
+
 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
 				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
-				   KBASE_MMU_OP_FLUSH_PT);
+				   flush_op);
+
+		if (newly_created_pgd) {
+			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
+						 new_pgds);
+			if (err) {
+				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
+					__func__, err);
 
+				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
+
+				kunmap(p);
+				goto fail_unlock_free_pgds;
+			}
+		}
+
+		phys += count;
+		insert_vpfn += count;
+		remain -= count;
 		kunmap(p);
 	}
 
@@ -2297,12 +2469,22 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 
 	return 0;
 
+fail_unlock_free_pgds:
+	/* Free the pgds allocated by us from insert_level+1 to bottom level */
+	for (l = cur_level; l > insert_level; l--)
+		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
+
 fail_unlock:
-	mutex_unlock(&mmut->mmu_lock);
+	if (insert_vpfn != start_vpfn) {
+		/* Invalidate the pages we have partially completed */
+		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
+						  phys, ignore_page_migration);
+	}
 
 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
 					  dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
-	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+	kbase_mmu_free_pgds_list(kbdev, mmut);
+	mutex_unlock(&mmut->mmu_lock);
 
 	return err;
 }
@@ -2318,7 +2500,6 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 {
 	int err;
 	u64 dirty_pgds = 0;
-	LIST_HEAD(free_pgds_list);
 
 	/* Early out if there is nothing to do */
 	if (nr == 0)
@@ -2336,58 +2517,56 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 
-/**
- * kbase_mmu_flush_noretain() - Flush and invalidate the GPU caches
- * without retaining the kbase context.
- * @kctx: The KBase context.
- * @vpfn: The virtual page frame number to start the flush on.
- * @nr: The number of pages to flush.
- *
- * As per kbase_mmu_flush_invalidate but doesn't retain the kctx or do any
- * other locking.
- */
-static void kbase_mmu_flush_noretain(struct kbase_context *kctx, u64 vpfn, size_t nr)
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    u64 vpfn, struct tagged_addr *phys, size_t nr,
+				    unsigned long flags, int as_nr, int const group_id,
+				    enum kbase_caller_mmu_sync_info mmu_sync_info,
+				    struct kbase_va_region *reg)
 {
-	struct kbase_device *kbdev = kctx->kbdev;
 	int err;
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
+	u64 dirty_pgds = 0;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	/* Imported allocations don't have metadata and therefore always ignore the
+	 * page migration logic.
 	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-	struct kbase_mmu_hw_op_param op_param;
+	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+					      &dirty_pgds, reg, true);
+	if (err)
+		return err;
 
-	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
-	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
+
+	return 0;
+}
+
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				   u64 vpfn, struct tagged_addr *phys, size_t nr,
+				   unsigned long flags, int as_nr, int const group_id,
+				   enum kbase_caller_mmu_sync_info mmu_sync_info,
+				   struct kbase_va_region *reg)
+{
+	int err;
+	u64 dirty_pgds = 0;
 
 	/* Early out if there is nothing to do */
 	if (nr == 0)
-		return;
+		return 0;
 
-	/* flush L2 and unlock the VA (resumes the MMU) */
-	op_param.vpfn = vpfn;
-	op_param.nr = nr;
-	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
-	op_param.kctx_id = kctx->id;
-	op_param.mmu_sync_info = mmu_sync_info;
-	if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
-		/* Value used to prevent skipping of any levels when flushing */
-		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
-		err = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[kctx->as_nr],
-							&op_param);
-	} else {
-		err = kbase_mmu_hw_do_flush_locked(kbdev, &kbdev->as[kctx->as_nr],
-						   &op_param);
-	}
+	/* Memory aliases are always built on top of existing allocations,
+	 * therefore the state of physical pages shall be updated.
+	 */
+	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
+					      &dirty_pgds, reg, false);
+	if (err)
+		return err;
 
-	if (err) {
-		/* Flush failed to complete, assume the
-		 * GPU has hung and perform a reset to recover
-		 */
-		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
 
-		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
-			kbase_reset_gpu_locked(kbdev);
-	}
+	return 0;
 }
 
 void kbase_mmu_update(struct kbase_device *kbdev,
@@ -2412,6 +2591,14 @@ void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
 
 void kbase_mmu_disable(struct kbase_context *kctx)
 {
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct kbase_mmu_hw_op_param op_param = { 0 };
+	int lock_err, flush_err;
+
 	/* ASSERT that the context has a valid as_nr, which is only the case
 	 * when it's scheduled in.
 	 *
@@ -2422,16 +2609,49 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
 	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
 
-	/*
-	 * The address space is being disabled, drain all knowledge of it out
-	 * from the caches as pages and page tables might be freed after this.
-	 *
-	 * The job scheduler code will already be holding the locks and context
-	 * so just do the flush.
+	op_param.vpfn = 0;
+	op_param.nr = ~0;
+	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
+	op_param.kctx_id = kctx->id;
+	op_param.mmu_sync_info = mmu_sync_info;
+
+#if MALI_USE_CSF
+	/* 0xF value used to prevent skipping of any levels when flushing */
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
+		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
+#endif
+
+	/* lock MMU to prevent existing jobs on GPU from executing while the AS is
+	 * not yet disabled
+	 */
+	lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+	if (lock_err)
+		dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
+			kctx->id);
+
+	/* Issue the flush command only when L2 cache is in stable power on state.
+	 * Any other state for L2 cache implies that shader cores are powered off,
+	 * which in turn implies there is no execution happening on the GPU.
 	 */
-	kbase_mmu_flush_noretain(kctx, 0, ~0);
+	if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
+		flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+								GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+		if (flush_err)
+			dev_err(kbdev->dev,
+				"Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
+				kctx->as_nr, kctx->tgid, kctx->id);
+	}
+	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
+
+	if (!lock_err) {
+		/* unlock the MMU to allow it to resume */
+		lock_err =
+			kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
+		if (lock_err)
+			dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
+				kctx->tgid, kctx->id);
+	}
 
-	kctx->kbdev->mmu_mode->disable_as(kctx->kbdev, kctx->as_nr);
 #if !MALI_USE_CSF
 	/*
 	 * JM GPUs has some L1 read only caches that need to be invalidated
@@ -2439,7 +2659,7 @@ void kbase_mmu_disable(struct kbase_context *kctx)
 	 * the slot_rb tracking field so such invalidation is performed when
 	 * a new katom is executed on the affected slots.
 	 */
-	kbase_backend_slot_kctx_purge_locked(kctx->kbdev, kctx);
+	kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
 #endif
 }
 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
@@ -2447,8 +2667,7 @@ KBASE_EXPORT_TEST_API(kbase_mmu_disable);
 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
 						  u64 vpfn, int level,
-						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
-						  struct list_head *free_pgds_list)
+						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
 {
 	int current_level;
 
@@ -2480,7 +2699,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 				current_pgd + (index * sizeof(u64)),
 				sizeof(u64), flush_op);
 
-			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(mmut, p);
 		} else {
 			current_valid_entries--;
 
@@ -2500,13 +2719,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 /**
  * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
  *
- * @kbdev:       Pointer to kbase device.
- * @kctx:        Pointer to kbase context.
- * @as_nr:       Address space number, for GPU cache maintenance operations
- *               that happen outside a specific kbase context.
- * @phys:        Array of physical pages to flush.
- * @op_param:  Non-NULL pointer to struct containing information about the flush
- *             operation to perform.
+ * @kbdev:         Pointer to kbase device.
+ * @kctx:          Pointer to kbase context.
+ * @as_nr:         Address space number, for GPU cache maintenance operations
+ *                 that happen outside a specific kbase context.
+ * @phys:          Array of physical pages to flush.
+ * @phys_page_nr:  Number of physical pages to flush.
+ * @op_param:      Non-NULL pointer to struct containing information about the flush
+ *                 operation to perform.
  *
  * This function will do one of three things:
  * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
@@ -2514,10 +2734,14 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
  * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
  *    supported on GPU or,
  * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
+ *
+ * When performing a partial GPU cache flush, the number of physical
+ * pages does not have to be identical to the number of virtual pages on the MMU,
+ * to support a single physical address flush for an aliased page.
  */
 static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 						struct kbase_context *kctx, int as_nr,
-						struct tagged_addr *phys,
+						struct tagged_addr *phys, size_t phys_page_nr,
 						struct kbase_mmu_hw_op_param *op_param)
 {
 	if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
@@ -2536,7 +2760,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 
 		mmu_invalidate(kbdev, kctx, as_nr, op_param);
 
-		for (i = 0; !flush_done && i < op_param->nr; i++) {
+		for (i = 0; !flush_done && i < phys_page_nr; i++) {
 			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 			if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
 				mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
@@ -2549,76 +2773,15 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 #endif
 }
 
-/**
- * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
- *
- * @kbdev:    Pointer to kbase device.
- * @mmut:     Pointer to GPU MMU page table.
- * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
- * @phys:     Array of physical pages currently mapped to the virtual
- *            pages to unmap, or NULL. This is used for GPU cache maintenance
- *            and page migration support.
- * @nr:       Number of pages to unmap.
- * @as_nr:    Address space number, for GPU cache maintenance operations
- *            that happen outside a specific kbase context.
- * @ignore_page_migration: Whether page migration metadata should be ignored.
- *
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
- *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
- *
- * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
- * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
- * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
- * instead of specific physical address ranges.
- *
- * Return: 0 on success, otherwise an error code.
- */
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr,
-			     bool ignore_page_migration)
+static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+					u64 vpfn, size_t nr, u64 *dirty_pgds,
+					struct list_head *free_pgds_list,
+					enum kbase_mmu_op_type flush_op)
 {
-	const size_t requested_nr = nr;
-	u64 start_vpfn = vpfn;
-	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
-	struct kbase_mmu_mode const *mmu_mode;
-	struct kbase_mmu_hw_op_param op_param;
-	int err = -EFAULT;
-	u64 dirty_pgds = 0;
-	LIST_HEAD(free_pgds_list);
-
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
-
-	if (nr == 0) {
-		/* early out if nothing to do */
-		return 0;
-	}
-
-	/* MMU cache flush strategy depends on the number of pages to unmap. In both cases
-	 * the operation is invalidate but the granularity of cache maintenance may change
-	 * according to the situation.
-	 *
-	 * If GPU control command operations are present and the number of pages is "small",
-	 * then the optimal strategy is flushing on the physical address range of the pages
-	 * which are affected by the operation. That implies both the PGDs which are modified
-	 * or removed from the page table and the physical pages which are freed from memory.
-	 *
-	 * Otherwise, there's no alternative to invalidating the whole GPU cache.
-	 */
-	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys && nr <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
-		flush_op = KBASE_MMU_OP_FLUSH_PT;
-
-	mutex_lock(&mmut->mmu_lock);
+	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 
-	mmu_mode = kbdev->mmu_mode;
+	lockdep_assert_held(&mmut->mmu_lock);
+	kbase_mmu_reset_free_pgds_list(mmut);
 
 	while (nr) {
 		unsigned int index = vpfn & 0x1FF;
@@ -2703,7 +2866,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 		}
 
 		if (pcount > 0)
-			dirty_pgds |= 1ULL << level;
+			*dirty_pgds |= 1ULL << level;
 
 		num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
 		if (WARN_ON_ONCE(num_of_valid_entries < pcount))
@@ -2725,11 +2888,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 				pgd + (index * sizeof(u64)),
 				pcount * sizeof(u64), flush_op);
 
-			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(mmut, p);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
-							      flush_op, &dirty_pgds,
-							      &free_pgds_list);
+							      flush_op, dirty_pgds);
 
 			vpfn += count;
 			nr -= count;
@@ -2746,19 +2908,77 @@ next:
 		vpfn += count;
 		nr -= count;
 	}
-	err = 0;
 out:
+	return 0;
+}
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+			     int as_nr, bool ignore_page_migration)
+{
+	u64 start_vpfn = vpfn;
+	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
+	struct kbase_mmu_hw_op_param op_param;
+	int err = -EFAULT;
+	u64 dirty_pgds = 0;
+	LIST_HEAD(free_pgds_list);
+
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	/* This function performs two operations: MMU maintenance and flushing
+	 * the caches. To ensure internal consistency between the caches and the
+	 * MMU, it does not make sense to be able to flush only the physical pages
+	 * from the cache and keep the PTE, nor does it make sense to use this
+	 * function to remove a PTE and keep the physical pages in the cache.
+	 *
+	 * However, we have legitimate cases where we can try to tear down a mapping
+	 * with zero virtual and zero physical pages, so we must have the following
+	 * behaviour:
+	 *  - if both physical and virtual page counts are zero, return early
+	 *  - if either physical and virtual page counts are zero, return early
+	 *  - if there are fewer physical pages than virtual pages, return -EINVAL
+	 */
+	if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
+		return 0;
+
+	if (unlikely(nr_virt_pages < nr_phys_pages))
+		return -EINVAL;
+
+	/* MMU cache flush strategy depends on the number of pages to unmap. In both cases
+	 * the operation is invalidate but the granularity of cache maintenance may change
+	 * according to the situation.
+	 *
+	 * If GPU control command operations are present and the number of pages is "small",
+	 * then the optimal strategy is flushing on the physical address range of the pages
+	 * which are affected by the operation. That implies both the PGDs which are modified
+	 * or removed from the page table and the physical pages which are freed from memory.
+	 *
+	 * Otherwise, there's no alternative to invalidating the whole GPU cache.
+	 */
+	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
+	    nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
+		flush_op = KBASE_MMU_OP_FLUSH_PT;
+
+	mutex_lock(&mmut->mmu_lock);
+
+	err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
+					   &free_pgds_list, flush_op);
+
 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
 	op_param = (struct kbase_mmu_hw_op_param){
 		.vpfn = start_vpfn,
-		.nr = requested_nr,
+		.nr = nr_virt_pages,
 		.mmu_sync_info = mmu_sync_info,
 		.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
 		.op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
 							    KBASE_MMU_OP_FLUSH_MEM,
 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
 	};
-	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
+	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
+					    &op_param);
 
 	/* If page migration is enabled: the status of all physical pages involved
 	 * shall be updated, unless they are not movable. Their status shall be
@@ -2766,15 +2986,14 @@ out:
 	 * requests to migrate the pages, if they have been isolated.
 	 */
 	if (kbase_page_migration_enabled && phys && !ignore_page_migration)
-		kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr);
+		kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
 
-	mutex_unlock(&mmut->mmu_lock);
+	kbase_mmu_free_pgds_list(kbdev, mmut);
 
-	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
+	mutex_unlock(&mmut->mmu_lock);
 
 	return err;
 }
-
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 /**
@@ -2834,7 +3053,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kb
 		if (is_huge(*phys) && (index == index_in_large_page(*phys)))
 			cur_level = MIDGARD_MMU_LEVEL(2);
 
-		err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd, NULL, dirty_pgds);
+		err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
 		if (WARN_ON(err))
 			goto fail_unlock;
 
@@ -3119,9 +3338,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 		}
 	}
 
-	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL);
+	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
 	if (ret) {
-		dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+		dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
 		goto get_pgd_at_level_error;
 	}
 
@@ -3167,10 +3386,8 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	if (ret < 0) {
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 		mutex_unlock(&kbdev->pm.lock);
-		dev_err(kbdev->dev,
-			"%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.",
-			__func__);
-		goto gpu_reset;
+		dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
+		goto undo_mappings;
 	}
 
 	/* Copy memory content.
@@ -3270,7 +3487,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	/* Checking the final migration transaction error state */
 	if (ret < 0) {
 		dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
-		goto gpu_reset;
+		goto undo_mappings;
 	}
 
 	/* Undertaking metadata transfer, while we are holding the mmu_lock */
@@ -3305,19 +3522,13 @@ new_page_map_error:
 old_page_map_error:
 	return ret;
 
-gpu_reset:
-	/* Unlock the MMU table before resetting the GPU and undo
-	 * mappings.
-	 */
+undo_mappings:
+	/* Unlock the MMU table and undo mappings. */
 	mutex_unlock(&mmut->mmu_lock);
 	kunmap(phys_to_page(pgd));
 	kunmap(as_page(new_phys));
 	kunmap(as_page(old_phys));
 
-	/* Reset the GPU because of an unrecoverable error in locking or flushing. */
-	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-		kbase_reset_gpu(kbdev);
-
 	return ret;
 }
 
@@ -3329,7 +3540,6 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 	u64 *pgd_page_buffer = NULL;
-	bool page_is_isolated = false;
 	struct page *p = phys_to_page(pgd);
 
 	lockdep_assert_held(&mmut->mmu_lock);
@@ -3342,7 +3552,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 		/* Copy the page to our preallocated buffer so that we can minimize
 		 * kmap_atomic usage
 		 */
-		pgd_page_buffer = mmut->mmu_teardown_pages[level];
+		pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
 		memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
 	}
 
@@ -3370,41 +3580,27 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 		}
 	}
 
-	/* Top level PGD page is excluded from migration process. */
-	if (level != MIDGARD_MMU_TOPLEVEL)
-		page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
-
-	if (likely(!page_is_isolated))
-		kbase_mmu_free_pgd(kbdev, mmut, pgd);
+	kbase_mmu_free_pgd(kbdev, mmut, pgd);
 }
 
 int kbase_mmu_init(struct kbase_device *const kbdev,
 	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
 	int const group_id)
 {
-	int level;
-
 	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
 	    WARN_ON(group_id < 0))
 		return -EINVAL;
 
+	compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
+			   "List of free PGDs may not be large enough.");
+	compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
+			   "Array of MMU levels is not large enough.");
+
 	mmut->group_id = group_id;
 	mutex_init(&mmut->mmu_lock);
 	mmut->kctx = kctx;
 	mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
 
-	/* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
-	for (level = MIDGARD_MMU_TOPLEVEL;
-			level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
-		mmut->mmu_teardown_pages[level] =
-			kmalloc(PAGE_SIZE, GFP_KERNEL);
-
-		if (!mmut->mmu_teardown_pages[level]) {
-			kbase_mmu_term(kbdev, mmut);
-			return -ENOMEM;
-		}
-	}
-
 	/* We allocate pages into the kbdev memory pool, then
 	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
 	 * avoid allocations from the kernel happening with the lock held.
@@ -3414,7 +3610,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 
 		err = kbase_mem_pool_grow(
 			&kbdev->mem_pools.small[mmut->group_id],
-			MIDGARD_MMU_BOTTOMLEVEL);
+			MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
 		if (err) {
 			kbase_mmu_term(kbdev, mmut);
 			return -ENOMEM;
@@ -3430,8 +3626,6 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 
 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
-	int level;
-
 	WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
 	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
 	     mmut->kctx->tgid, mmut->kctx->id);
@@ -3445,13 +3639,6 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
 	}
 
-	for (level = MIDGARD_MMU_TOPLEVEL;
-			level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
-		if (!mmut->mmu_teardown_pages[level])
-			break;
-		kfree(mmut->mmu_teardown_pages[level]);
-	}
-
 	mutex_destroy(&mmut->mmu_lock);
 }
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 2b3e6c0..49b42e0 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -152,21 +152,71 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
 	struct tagged_addr phy, unsigned long flags, int level, int group_id);
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+				    u64 vpfn, struct tagged_addr *phys, size_t nr,
 				    unsigned long flags, int group_id, u64 *dirty_pgds,
 				    struct kbase_va_region *reg, bool ignore_page_migration);
 int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
 			   int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
 			   struct kbase_va_region *reg, bool ignore_page_migration);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
-				 struct tagged_addr phys, size_t nr,
-				 unsigned long flags, int group_id,
-				 enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    u64 vpfn, struct tagged_addr *phys, size_t nr,
+				    unsigned long flags, int as_nr, int group_id,
+				    enum kbase_caller_mmu_sync_info mmu_sync_info,
+				    struct kbase_va_region *reg);
+int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				   u64 vpfn, struct tagged_addr *phys, size_t nr,
+				   unsigned long flags, int as_nr, int group_id,
+				   enum kbase_caller_mmu_sync_info mmu_sync_info,
+				   struct kbase_va_region *reg);
+int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
+				 size_t nr, unsigned long flags, int group_id,
+				 enum kbase_caller_mmu_sync_info mmu_sync_info,
+				 bool ignore_page_migration);
+int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
+					  struct tagged_addr phys, size_t nr, unsigned long flags,
+					  int group_id,
+					  enum kbase_caller_mmu_sync_info mmu_sync_info);
+int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
+					 struct tagged_addr phys, size_t nr, unsigned long flags,
+					 int group_id,
+					 enum kbase_caller_mmu_sync_info mmu_sync_info);
 
+/**
+ * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev:    Pointer to kbase device.
+ * @mmut:     Pointer to GPU MMU page table.
+ * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
+ * @phys:     Array of physical pages currently mapped to the virtual
+ *            pages to unmap, or NULL. This is used for GPU cache maintenance
+ *            and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr:    Address space number, for GPU cache maintenance operations
+ *            that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr,
-			     bool ignore_page_migration);
+			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+			     int as_nr, bool ignore_page_migration);
+
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
 			   unsigned long flags, int const group_id);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 68bc697..1a892dc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_mem.h>
+#include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu_hw.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <linux/delay.h>
@@ -156,37 +157,60 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
 	return 0;
 }
 
-static int wait_ready(struct kbase_device *kbdev,
-		unsigned int as_nr)
+/**
+ * wait_ready() - Wait for previously issued MMU command to complete.
+ *
+ * @kbdev:        Kbase device to wait for a MMU command to complete.
+ * @as_nr:        Address space to wait for a MMU command to complete.
+ *
+ * Reset GPU if the wait for previously issued command fails.
+ *
+ * Return: 0 on successful completion. negative error on failure.
+ */
+static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
 {
-	u32 max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	s64 diff;
 
-	/* Wait for the MMU status to indicate there is no active command. */
-	while (--max_loops &&
-	       kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
-		       AS_STATUS_AS_ACTIVE) {
-		;
-	}
+	if (unlikely(kbdev->as[as_nr].is_unresponsive))
+		return -EBUSY;
 
-	if (WARN_ON_ONCE(max_loops == 0)) {
-		dev_err(kbdev->dev,
-			"AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector",
-			as_nr);
-		return -1;
-	}
+	do {
+		unsigned int i;
 
-	return 0;
+		for (i = 0; i < 1000; i++) {
+			/* Wait for the MMU status to indicate there is no active command */
+			if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+			      AS_STATUS_AS_ACTIVE))
+				return 0;
+		}
+
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while (diff < mmu_as_inactive_wait_time_ms);
+
+	dev_err(kbdev->dev,
+		"AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
+		as_nr);
+	kbdev->as[as_nr].is_unresponsive = true;
+	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu_locked(kbdev);
+
+	return -ETIMEDOUT;
 }
 
 static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
 {
-	int status;
-
 	/* write AS_COMMAND when MMU is ready to accept another command */
-	status = wait_ready(kbdev, as_nr);
-	if (status == 0)
+	const int status = wait_ready(kbdev, as_nr);
+
+	if (likely(status == 0))
 		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
-	else {
+	else if (status == -EBUSY) {
+		dev_dbg(kbdev->dev,
+			"Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
+			as_nr, cmd);
+	} else {
 		dev_err(kbdev->dev,
 			"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",
 			as_nr, cmd);
@@ -259,17 +283,21 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c
 
 		/* Wait for the LOCK MMU command to complete, issued by the caller */
 		ret = wait_ready(kbdev, as_nr);
-		if (ret)
+		if (unlikely(ret))
 			return ret;
 
 		ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
 				GPU_COMMAND_CACHE_CLN_INV_LSC);
-		if (ret)
+		if (unlikely(ret))
 			return ret;
 
 		ret = wait_cores_power_trans_complete(kbdev);
-		if (ret)
+		if (unlikely(ret)) {
+			if (kbase_prepare_to_reset_gpu_locked(kbdev,
+							      RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+				kbase_reset_gpu_locked(kbdev);
 			return ret;
+		}
 
 		/* As LSC is guaranteed to have been flushed we can use FLUSH_PT
 		 * MMU command to only flush the L2.
@@ -397,12 +425,21 @@ static int mmu_hw_do_lock_no_wait(struct kbase_device *kbdev, struct kbase_as *a
 
 	ret = mmu_hw_set_lock_addr(kbdev, as->number, lock_addr, op_param);
 
-	if (!ret)
-		write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
+	if (likely(!ret))
+		ret = write_cmd(kbdev, as->number, AS_COMMAND_LOCK);
 
 	return ret;
 }
 
+/**
+ * mmu_hw_do_lock - Issue LOCK command to the MMU and wait for its completion.
+ *
+ * @kbdev:      Kbase device to issue the MMU operation on.
+ * @as:         Address space to issue the MMU operation on.
+ * @op_param:   Pointer to a struct containing information about the MMU operation.
+ *
+ * Return: 0 if issuing the LOCK command was successful, otherwise an error code.
+ */
 static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 			  const struct kbase_mmu_hw_op_param *op_param)
 {
@@ -443,10 +480,10 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
 	ret = write_cmd(kbdev, as->number, AS_COMMAND_UNLOCK);
 
 	/* Wait for UNLOCK command to complete */
-	if (!ret)
+	if (likely(!ret))
 		ret = wait_ready(kbdev, as->number);
 
-	if (!ret) {
+	if (likely(!ret)) {
 		u64 lock_addr = 0x0;
 		/* read MMU_AS_CONTROL.LOCKADDR register */
 		lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
@@ -478,6 +515,16 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
 	return ret;
 }
 
+/**
+ * mmu_hw_do_flush - Flush MMU and wait for its completion.
+ *
+ * @kbdev:           Kbase device to issue the MMU operation on.
+ * @as:              Address space to issue the MMU operation on.
+ * @op_param:        Pointer to a struct containing information about the MMU operation.
+ * @hwaccess_locked: Flag to indicate if the lock has been held.
+ *
+ * Return: 0 if flushing MMU was successful, otherwise an error code.
+ */
 static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 	const struct kbase_mmu_hw_op_param *op_param, bool hwaccess_locked)
 {
@@ -508,12 +555,9 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 		return ret;
 
 #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
-	/* WA for the BASE_HW_ISSUE_GPU2019_3901. No runtime check is used here
-	 * as the WA is applicable to all CSF GPUs where FLUSH_MEM/PT command is
-	 * supported, and this function doesn't gets called for the GPUs where
-	 * FLUSH_MEM/PT command is deprecated.
-	 */
-	if (mmu_cmd == AS_COMMAND_FLUSH_MEM) {
+	/* WA for the BASE_HW_ISSUE_GPU2019_3901. */
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3901) &&
+	    mmu_cmd == AS_COMMAND_FLUSH_MEM) {
 		if (!hwaccess_locked) {
 			unsigned long flags = 0;
 
@@ -529,12 +573,13 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 	}
 #endif
 
-	write_cmd(kbdev, as->number, mmu_cmd);
+	ret = write_cmd(kbdev, as->number, mmu_cmd);
 
 	/* Wait for the command to complete */
-	ret = wait_ready(kbdev, as->number);
+	if (likely(!ret))
+		ret = wait_ready(kbdev, as->number);
 
-	if (!ret)
+	if (likely(!ret))
 		mmu_command_instr(kbdev, op_param->kctx_id, mmu_cmd, lock_addr,
 				  op_param->mmu_sync_info);
 
diff --git a/mali_kbase/tests/build.bp b/mali_kbase/tests/build.bp
index 7abae23..5581ba9 100644
--- a/mali_kbase/tests/build.bp
+++ b/mali_kbase/tests/build.bp
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -38,3 +38,9 @@ bob_defaults {
         kbuild_options: ["CONFIG_UNIT_TEST_KERNEL_MODULES=y"],
     },
 }
+
+bob_defaults {
+    name: "kernel_unit_tests",
+    add_to_alias: ["unit_tests"],
+    srcs: [".*_unit_test/"],
+}
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 34d2223..1e636b9 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -10,6 +10,7 @@
  */
 
 #include "linux/mman.h"
+#include <linux/version_compat_defs.h>
 #include <mali_kbase.h>
 
 /* mali_kbase_mmap.c
@@ -90,7 +91,6 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
 		return false;
 
-
 	return true;
 }
 
@@ -132,6 +132,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
 static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
 		*info, bool is_shader_code, bool is_same_4gb_page)
 {
+#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
@@ -225,7 +226,37 @@ check_current:
 			}
 		}
 	}
+#else
+	unsigned long length, high_limit, gap_start, gap_end;
+
+	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+	/* Adjust search length to account for worst case alignment overhead */
+	length = info->length + info->align_mask;
+	if (length < info->length)
+		return -ENOMEM;
+
+	/*
+	 * Adjust search limits by the desired length.
+	 * See implementation comment at top of unmapped_area().
+	 */
+	gap_end = info->high_limit;
+	if (gap_end < length)
+		return -ENOMEM;
+	high_limit = gap_end - length;
 
+	if (info->low_limit > high_limit)
+		return -ENOMEM;
+
+	while (true) {
+		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+			return -ENOMEM;
+		gap_end = mas.last + 1;
+		gap_start = mas.min;
+
+		if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
+			return gap_end;
+	}
+#endif
 	return -ENOMEM;
 }
 
@@ -242,8 +273,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	struct vm_unmapped_area_info info;
 	unsigned long align_offset = 0;
 	unsigned long align_mask = 0;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
+	unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
+#else
 	unsigned long high_limit = mm->mmap_base;
 	unsigned long low_limit = PAGE_SIZE;
+#endif
 	int cpu_va_bits = BITS_PER_LONG;
 	int gpu_pc_bits =
 	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
@@ -270,6 +306,13 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	struct kbase_reg_zone *zone =
 		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
 	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+
+	/* requested length too big for entire address space */
+	if (len > mmap_end - kbase_mmap_min_addr)
+		return -ENOMEM;
+#endif
 
 	/* err on fixed address */
 	if ((flags & MAP_FIXED) || addr)
@@ -282,7 +325,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 
 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
 		high_limit =
-			min_t(unsigned long, mm->mmap_base, same_va_end_addr);
+			min_t(unsigned long, high_limit, same_va_end_addr);
 
 		/* If there's enough (> 33 bits) of GPU VA space, align
 		 * to 2MB boundaries.
@@ -359,9 +402,15 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 
 	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
 	    high_limit < same_va_end_addr) {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+		/* Retry above TASK_UNMAPPED_BASE */
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
+#else
 		/* Retry above mmap_base */
 		info.low_limit = mm->mmap_base;
 		info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
+#endif
 
 		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
 				is_same_4gb_page);
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index 09de3f0..20356d6 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,8 +24,6 @@
 #include "mali_kbase_tracepoints.h"
 
 #include <mali_kbase.h>
-#include <mali_kbase_jm.h>
-
 #include <linux/atomic.h>
 #include <linux/file.h>
 #include <linux/mutex.h>
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 359d063..ae57006 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,47 @@
 #include <uapi/linux/eventpoll.h>
 #endif
 
+static int kbase_unprivileged_global_profiling;
+
+/**
+ * kbase_unprivileged_global_profiling_set - set permissions for unprivileged processes
+ *
+ * @val: String containing value to set. Only strings representing positive
+ *       integers are accepted as valid; any non-positive integer (including 0)
+ *       is rejected.
+ * @kp: Module parameter associated with this method.
+ *
+ * This method can only be used to enable permissions for unprivileged processes,
+ * if they are disabled: for this reason, the only values which are accepted are
+ * strings representing positive integers. Since it's impossible to disable
+ * permissions once they're set, any integer which is non-positive is rejected,
+ * including 0.
+ *
+ * Return: 0 if success, otherwise error code.
+ */
+static int kbase_unprivileged_global_profiling_set(const char *val, const struct kernel_param *kp)
+{
+	int new_val;
+	int ret = kstrtoint(val, 0, &new_val);
+
+	if (ret == 0) {
+		if (new_val < 1)
+			return -EINVAL;
+
+		kbase_unprivileged_global_profiling = 1;
+	}
+
+	return ret;
+}
+
+static const struct kernel_param_ops kbase_global_unprivileged_profiling_ops = {
+	.get = param_get_int,
+	.set = kbase_unprivileged_global_profiling_set,
+};
+
+module_param_cb(kbase_unprivileged_global_profiling, &kbase_global_unprivileged_profiling_ops,
+		&kbase_unprivileged_global_profiling, 0600);
+
 /* The timeline stream file operations functions. */
 static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
 				       size_t size, loff_t *f_pos);
@@ -43,6 +84,15 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp);
 static int kbasep_timeline_io_fsync(struct file *filp, loff_t start, loff_t end,
 				    int datasync);
 
+static bool timeline_is_permitted(void)
+{
+#if KERNEL_VERSION(5, 8, 0) <= LINUX_VERSION_CODE
+	return kbase_unprivileged_global_profiling || perfmon_capable();
+#else
+	return kbase_unprivileged_global_profiling || capable(CAP_SYS_ADMIN);
+#endif
+}
+
 /**
  * kbasep_timeline_io_packet_pending - check timeline streams for pending
  *                                     packets
@@ -328,6 +378,9 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
 	};
 	int err;
 
+	if (!timeline_is_permitted())
+		return -EPERM;
+
 	if (WARN_ON(!kbdev) || (flags & ~BASE_TLSTREAM_FLAGS_MASK))
 		return -EINVAL;
 
@@ -371,7 +424,7 @@ void kbase_timeline_io_debugfs_init(struct kbase_device *const kbdev)
 	if (WARN_ON(!kbdev) || WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
 		return;
 
-	file = debugfs_create_file("tlstream", 0444, kbdev->mali_debugfs_directory, kbdev,
+	file = debugfs_create_file("tlstream", 0400, kbdev->mali_debugfs_directory, kbdev,
 				   &kbasep_tlstream_debugfs_fops);
 
 	if (IS_ERR_OR_NULL(file))
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index e8a74e9..f62c755 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -87,7 +87,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_GPUCMDQUEUE_KICK,
 	KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
 	KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
-	KBASE_TL_KBASE_DEVICE_HALT_CSG,
+	KBASE_TL_KBASE_DEVICE_HALTING_CSG,
+	KBASE_TL_KBASE_DEVICE_SUSPEND_CSG,
+	KBASE_TL_KBASE_DEVICE_CSG_IDLE,
 	KBASE_TL_KBASE_NEW_CTX,
 	KBASE_TL_KBASE_DEL_CTX,
 	KBASE_TL_KBASE_CTX_ASSIGN_AS,
@@ -98,6 +100,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
@@ -116,6 +120,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END,
 	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START,
@@ -360,13 +367,21 @@ enum tl_msg_id_obj {
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_PROGRAM_CSG, \
 		"CSG is programmed to a slot", \
 		"@IIIII", \
-		"kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resumed") \
+		"kbase_device_id,kernel_ctx_id,gpu_cmdq_grp_handle,kbase_device_csg_slot_index,kbase_device_csg_slot_resuming") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG, \
 		"CSG is deprogrammed from a slot", \
 		"@II", \
 		"kbase_device_id,kbase_device_csg_slot_index") \
-	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALT_CSG, \
-		"CSG is halted", \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_HALTING_CSG, \
+		"CSG is halting", \
+		"@III", \
+		"kbase_device_id,kbase_device_csg_slot_index,kbase_device_csg_slot_suspending") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_SUSPEND_CSG, \
+		"CSG is suspended", \
+		"@II", \
+		"kbase_device_id,kbase_device_csg_slot_index") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_DEVICE_CSG_IDLE, \
+		"KBase device is notified that CSG is idle.", \
 		"@II", \
 		"kbase_device_id,kbase_device_csg_slot_index") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_CTX, \
@@ -404,11 +419,19 @@ enum tl_msg_id_obj {
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
 		"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
 		"@pLII", \
-		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
+		"kcpu_queue,cqs_obj_gpu_addr,compare_value,inherit_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
 		"KCPU Queue enqueues Set on Cross Queue Sync Object", \
 		"@pL", \
 		"kcpu_queue,cqs_obj_gpu_addr") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION, \
+		"KCPU Queue enqueues Wait Operation on Cross Queue Sync Object", \
+		"@pLLIII", \
+		"kcpu_queue,cqs_obj_gpu_addr,compare_value,condition,data_type,inherit_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION, \
+		"KCPU Queue enqueues Set Operation on Cross Queue Sync Object", \
+		"@pLLII", \
+		"kcpu_queue,cqs_obj_gpu_addr,value,operation,data_type") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT, \
 		"KCPU Queue enqueues Map Import", \
 		"@pL", \
@@ -481,6 +504,18 @@ enum tl_msg_id_obj {
 		"KCPU Queue executes a Set on Cross Queue Sync Object", \
 		"@pI", \
 		"kcpu_queue,execute_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START, \
+		"KCPU Queue starts a Wait Operation on Cross Queue Sync Object", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END, \
+		"KCPU Queue ends a Wait Operation on Cross Queue Sync Object", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION, \
+		"KCPU Queue executes a Set Operation on Cross Queue Sync Object", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
 		"KCPU Queue starts a Map Import", \
 		"@p", \
@@ -2130,7 +2165,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed
+	u32 kbase_device_csg_slot_resuming
 )
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
@@ -2139,7 +2174,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 		+ sizeof(kernel_ctx_id)
 		+ sizeof(gpu_cmdq_grp_handle)
 		+ sizeof(kbase_device_csg_slot_index)
-		+ sizeof(kbase_device_csg_slot_resumed)
+		+ sizeof(kbase_device_csg_slot_resuming)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2158,7 +2193,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &kbase_device_csg_slot_resumed, sizeof(kbase_device_csg_slot_resumed));
+		pos, &kbase_device_csg_slot_resuming, sizeof(kbase_device_csg_slot_resuming));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2190,13 +2225,71 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index,
+	u32 kbase_device_csg_slot_suspending
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALTING_CSG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kbase_device_id)
+		+ sizeof(kbase_device_csg_slot_index)
+		+ sizeof(kbase_device_csg_slot_suspending)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_id, sizeof(kbase_device_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_suspending, sizeof(kbase_device_csg_slot_suspending));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_SUSPEND_CSG;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kbase_device_id)
+		+ sizeof(kbase_device_csg_slot_index)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_id, sizeof(kbase_device_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kbase_device_csg_slot_index, sizeof(kbase_device_csg_slot_index));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
 	u32 kbase_device_csg_slot_index
 )
 {
-	const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
+	const u32 msg_id = KBASE_TL_KBASE_DEVICE_CSG_IDLE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kbase_device_id)
 		+ sizeof(kbase_device_csg_slot_index)
@@ -2433,16 +2526,16 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error
+	u32 compare_value,
+	u32 inherit_error
 )
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
 		+ sizeof(cqs_obj_gpu_addr)
-		+ sizeof(cqs_obj_compare_value)
-		+ sizeof(cqs_obj_inherit_error)
+		+ sizeof(compare_value)
+		+ sizeof(inherit_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2457,9 +2550,9 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+		pos, &compare_value, sizeof(compare_value));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
+		pos, &inherit_error, sizeof(inherit_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2491,6 +2584,88 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 compare_value,
+	u32 condition,
+	u32 data_type,
+	u32 inherit_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(compare_value)
+		+ sizeof(condition)
+		+ sizeof(data_type)
+		+ sizeof(inherit_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &compare_value, sizeof(compare_value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &condition, sizeof(condition));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &data_type, sizeof(data_type));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &inherit_error, sizeof(inherit_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 value,
+	u32 operation,
+	u32 data_type
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(cqs_obj_gpu_addr)
+		+ sizeof(value)
+		+ sizeof(operation)
+		+ sizeof(data_type)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &value, sizeof(value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &operation, sizeof(operation));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &data_type, sizeof(data_type));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -2981,6 +3156,83 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 586fe67..f1f4761 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -408,7 +408,7 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed
+	u32 kbase_device_csg_slot_resuming
 );
 
 void __kbase_tlstream_tl_kbase_device_deprogram_csg(
@@ -417,7 +417,20 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	u32 kbase_device_csg_slot_index
 );
 
-void __kbase_tlstream_tl_kbase_device_halt_csg(
+void __kbase_tlstream_tl_kbase_device_halting_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index,
+	u32 kbase_device_csg_slot_suspending
+);
+
+void __kbase_tlstream_tl_kbase_device_suspend_csg(
+	struct kbase_tlstream *stream,
+	u32 kbase_device_id,
+	u32 kbase_device_csg_slot_index
+);
+
+void __kbase_tlstream_tl_kbase_device_csg_idle(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
 	u32 kbase_device_csg_slot_index
@@ -474,8 +487,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error
+	u32 compare_value,
+	u32 inherit_error
 );
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
@@ -484,6 +497,25 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	u64 cqs_obj_gpu_addr
 );
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 compare_value,
+	u32 condition,
+	u32 data_type,
+	u32 inherit_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u64 cqs_obj_gpu_addr,
+	u64 value,
+	u32 operation,
+	u32 data_type
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -593,6 +625,23 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	u32 execute_error
 );
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+);
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue
@@ -2026,7 +2075,7 @@ struct kbase_tlstream;
  * @kernel_ctx_id: Unique ID for the KBase Context
  * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
  * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
- * @kbase_device_csg_slot_resumed: Whether the csg is being resumed
+ * @kbase_device_csg_slot_resuming: Whether the csg is being resumed
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(	\
@@ -2035,7 +2084,7 @@ struct kbase_tlstream;
 	kernel_ctx_id,	\
 	gpu_cmdq_grp_handle,	\
 	kbase_device_csg_slot_index,	\
-	kbase_device_csg_slot_resumed	\
+	kbase_device_csg_slot_resuming	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
@@ -2046,7 +2095,7 @@ struct kbase_tlstream;
 				kernel_ctx_id,	\
 				gpu_cmdq_grp_handle,	\
 				kbase_device_csg_slot_index,	\
-				kbase_device_csg_slot_resumed	\
+				kbase_device_csg_slot_resuming	\
 				);	\
 	} while (0)
 #else
@@ -2056,7 +2105,7 @@ struct kbase_tlstream;
 	kernel_ctx_id,	\
 	gpu_cmdq_grp_handle,	\
 	kbase_device_csg_slot_index,	\
-	kbase_device_csg_slot_resumed	\
+	kbase_device_csg_slot_resuming	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2066,7 +2115,7 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being deprogrammed
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(	\
@@ -2093,14 +2142,49 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG - CSG is halting
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
- * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being halted
+ * @kbase_device_csg_slot_suspending: Whether the csg is being suspended
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index,	\
+	kbase_device_csg_slot_suspending	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_device_halting_csg(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index,	\
+				kbase_device_csg_slot_suspending	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALTING_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index,	\
+	kbase_device_csg_slot_suspending	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG - CSG is suspended
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG is being suspended
  */
 #if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(	\
 	kbdev,	\
 	kbase_device_id,	\
 	kbase_device_csg_slot_index	\
@@ -2108,14 +2192,45 @@ struct kbase_tlstream;
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
-			__kbase_tlstream_tl_kbase_device_halt_csg(	\
+			__kbase_tlstream_tl_kbase_device_suspend_csg(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kbase_device_id,	\
 				kbase_device_csg_slot_index	\
 				);	\
 	} while (0)
 #else
-#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE - KBase device is notified that CSG is idle.
+ *
+ * @kbdev: Kbase device
+ * @kbase_device_id: The ID of the physical hardware
+ * @kbase_device_csg_slot_index: The index of the slot in the scheduler whose CSG for which we are receiving an idle notification
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_device_csg_idle(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(	\
 	kbdev,	\
 	kbase_device_id,	\
 	kbase_device_csg_slot_index	\
@@ -2373,16 +2488,16 @@ struct kbase_tlstream;
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
  * @cqs_obj_gpu_addr: CQS Object GPU pointer
- * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass
- * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ * @compare_value: Semaphore value that should be exceeded for the WAIT to pass
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value,	\
-	cqs_obj_inherit_error	\
+	compare_value,	\
+	inherit_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
@@ -2391,8 +2506,8 @@ struct kbase_tlstream;
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kcpu_queue,	\
 				cqs_obj_gpu_addr,	\
-				cqs_obj_compare_value,	\
-				cqs_obj_inherit_error	\
+				compare_value,	\
+				inherit_error	\
 				);	\
 	} while (0)
 #else
@@ -2400,8 +2515,8 @@ struct kbase_tlstream;
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value,	\
-	cqs_obj_inherit_error	\
+	compare_value,	\
+	inherit_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2438,6 +2553,96 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION - KCPU Queue enqueues Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @compare_value: Value that should be compared to semaphore value for the WAIT to pass
+ * @condition: Condition for unblocking WAITs on Timeline Cross Queue Sync Object (e.g. greater than, less or equal)
+ * @data_type: Data type of a CQS Object's value
+ * @inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	compare_value,	\
+	condition,	\
+	data_type,	\
+	inherit_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr,	\
+				compare_value,	\
+				condition,	\
+				data_type,	\
+				inherit_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	compare_value,	\
+	condition,	\
+	data_type,	\
+	inherit_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION - KCPU Queue enqueues Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @cqs_obj_gpu_addr: CQS Object GPU pointer
+ * @value: Value that will be set or added to semaphore
+ * @operation: Operation type performed on semaphore value (SET or ADD)
+ * @data_type: Data type of a CQS Object's value
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	value,	\
+	operation,	\
+	data_type	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr,	\
+				value,	\
+				operation,	\
+				data_type	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	cqs_obj_gpu_addr,	\
+	value,	\
+	operation,	\
+	data_type	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
  * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
  *
  * @kbdev: Kbase device
@@ -3000,6 +3205,95 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START - KCPU Queue starts a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_START(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END - KCPU Queue ends a Wait Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_operation_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_OPERATION_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION - KCPU Queue executes a Set Operation on Cross Queue Sync Object
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set_operation(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET_OPERATION(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
  * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import
  *
  * @kbdev: Kbase device
author	Toby Sunrise <tobyrs@google.com>	2023-05-01 13:31:16 +0000
committer	Toby Sunrise <tobyrs@google.com>	2023-05-01 13:33:19 +0000
commit	bce5281a0408a175137c08dc93028e2a2c0fb69b (patch)
tree	edc640500ccdf781a123e7fae22fac9c44ddbe46 /mali_kbase
parent	f7a77046d77266482dedf54d134102e6031a7438 (diff)
download	gpu-bce5281a0408a175137c08dc93028e2a2c0fb69b.tar.gz