Mali Valhall Android DDK r41p0-01eac0 KMD

Provenance 7bb206ede984968bd1014b29529e94763b043202 (ipdelivery/EAC/v_r41p0) VX504X08X-BU-00000-r41p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r41p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r41p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r41p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I95f741ffe0ec4ee4c8f2c0338778294f1a2a2836
author: Jörg Wagner <jorwag@google.com> 2022-12-15 16:21:51 +0000
committer: Jörg Wagner <jorwag@google.com> 2022-12-15 16:28:12 +0000
commit: 25e383ffa36a9916065804029fbe3552c71329fe (patch)
tree: 1fd24ee61cf42115c75121f9de544814c76cb5a7
parent: 9ff5b6f2510d94765def3cf7c1fda01e387cabab (diff)
download: gpu-25e383ffa36a9916065804029fbe3552c71329fe.tar.gz
59 files changed, 3752 insertions, 710 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h
index d0a0998..335147c 100644
--- a/common/include/linux/version_compat_defs.h
+++ b/common/include/linux/version_compat_defs.h
@@ -24,10 +24,12 @@
 
 #include <linux/version.h>
 
-#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
 typedef unsigned int __poll_t;
 #endif
 
+#if KERNEL_VERSION(4, 9, 78) >= LINUX_VERSION_CODE
+
 #ifndef EPOLLHUP
 #define EPOLLHUP POLLHUP
 #endif
@@ -44,4 +46,6 @@ typedef unsigned int __poll_t;
 #define EPOLLRDNORM POLLRDNORM
 #endif
 
+#endif
+
 #endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
index 613eb1f..7bb91be 100644
--- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
+++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -29,7 +29,11 @@
 #include <linux/types.h>
 
 #define KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS (4)
+#if MALI_USE_CSF
+#define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (65)
+#else /* MALI_USE_CSF */
 #define KBASE_DUMMY_MODEL_COUNTER_PER_CORE      (60)
+#endif /* !MALI_USE_CSF */
 #define KBASE_DUMMY_MODEL_COUNTERS_PER_BIT      (4)
 #define KBASE_DUMMY_MODEL_COUNTER_ENABLED(enable_mask, ctr_idx) \
 	(enable_mask & (1 << (ctr_idx / KBASE_DUMMY_MODEL_COUNTERS_PER_BIT)))
@@ -57,6 +61,10 @@
 	(KBASE_DUMMY_MODEL_MAX_NUM_PERF_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE)
 
 #define DUMMY_IMPLEMENTATION_SHADER_PRESENT (0xFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX (0x7FFFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX (0x3FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX (0x7FFull)
+#define DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX (0xFFFull)
 #define DUMMY_IMPLEMENTATION_TILER_PRESENT (0x1ull)
 #define DUMMY_IMPLEMENTATION_L2_PRESENT (0x1ull)
 #define DUMMY_IMPLEMENTATION_STACK_PRESENT (0xFull)
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 3c35d59..fc08158 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -69,7 +69,7 @@ endif
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r40p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r41p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_DEBUG), y)
     MALI_UNIT_TEST = 1
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 8e689c1..701b68f 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -94,6 +94,21 @@ config MALI_MIDGARD_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
+config MALI_FW_CORE_DUMP
+	bool "Enable support for FW core dump"
+	depends on MALI_MIDGARD && MALI_CSF_SUPPORT
+	default y
+	help
+	  Adds ability to request firmware core dump through the "fw_core_dump"
+	  debugfs file
+
+	  Example:
+	  * To explicitly request core dump:
+	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
+	  * To output current core dump (after explicitly requesting a core dump,
+	    or kernel driver reported an internal firmware error):
+	      cat /sys/kernel/debug/mali0/fw_core_dump
+
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -207,20 +222,6 @@ config MALI_GEM5_BUILD
 comment "Debug options"
 	depends on MALI_MIDGARD && MALI_EXPERT
 
-config MALI_FW_CORE_DUMP
-        bool "Enable support for FW core dump"
-        depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
-        default n
-        help
-          Adds ability to request firmware core dump
-
-          Example:
-          * To explicitly request core dump:
-                echo 1 >/sys/kernel/debug/mali0/fw_core_dump
-          * To output current core dump (after explicitly requesting a core dump,
-            or kernel driver reported an internal firmware error):
-                cat /sys/kernel/debug/mali0/fw_core_dump
-
 config MALI_DEBUG
 	bool "Enable debug build"
 	depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 01fad8f..e135d86 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -70,6 +70,12 @@ ifeq ($(CONFIG_MALI_MIDGARD),m)
         endif
     endif
 
+    ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+        CONFIG_MALI_FW_CORE_DUMP ?= y
+    else
+        CONFIG_MALI_FW_CORE_DUMP ?= n
+    endif
+
     #
     # Expert/Debug/Test released configurations
     #
@@ -149,6 +155,7 @@ else
     CONFIG_MALI_KUTF_IRQ_TEST = n
     CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+    CONFIG_MALI_FW_CORE_DUMP = n
 endif
 
 # All Mali CONFIG should be listed here
@@ -189,7 +196,8 @@ CONFIGS := \
     CONFIG_MALI_KUTF_IRQ_TEST \
     CONFIG_MALI_KUTF_CLK_RATE_TRACE \
     CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
-    CONFIG_MALI_XEN
+    CONFIG_MALI_XEN \
+    CONFIG_MALI_FW_CORE_DUMP
 
 
 #
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index a7f038f..d294543 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -97,6 +97,21 @@ config MALI_MIDGARD_ENABLE_TRACE
 	  Enables tracing in kbase. Trace log available through
 	  the "mali_trace" debugfs file, when the CONFIG_DEBUG_FS is enabled
 
+config MALI_FW_CORE_DUMP
+	bool "Enable support for FW core dump"
+	depends on MALI_MIDGARD && MALI_CSF_SUPPORT
+	default y
+	help
+	  Adds ability to request firmware core dump through the "fw_core_dump"
+	  debugfs file
+
+	  Example:
+	  * To explicitly request core dump:
+	      echo 1 > /sys/kernel/debug/mali0/fw_core_dump
+	  * To output current core dump (after explicitly requesting a core dump,
+	    or kernel driver reported an internal firmware error):
+	      cat /sys/kernel/debug/mali0/fw_core_dump
+
 config MALI_ARBITER_SUPPORT
 	bool "Enable arbiter support for Mali"
 	depends on MALI_MIDGARD && !MALI_CSF_SUPPORT
@@ -170,20 +185,6 @@ config MALI_CORESTACK
 
 	  If unsure, say N.
 
-config MALI_FW_CORE_DUMP
-	bool "Enable support for FW core dump"
-	depends on MALI_MIDGARD && MALI_EXPERT && MALI_CSF_SUPPORT
-	default n
-	help
-	  Adds ability to request firmware core dump
-
-	  Example:
-	  * To explicitly request core dump:
-	  echo 1 >/sys/kernel/debug/mali0/fw_core_dump
-	  * To output current core dump (after explicitly requesting a core dump,
-	  or kernel driver reported an internal firmware error):
-	  cat /sys/kernel/debug/mali0/fw_core_dump
-
 choice
 	prompt "Error injection level"
 	depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 3967929..19c5021 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -319,7 +319,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0,
 		.gpu_features_hi = 0,
-		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX,
 		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
 	},
 	{
@@ -364,7 +364,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0,
 		.gpu_features_hi = 0,
-		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX,
 		.stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
 	},
 	{
@@ -412,7 +412,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0xf,
 		.gpu_features_hi = 0,
-		.shader_present = 0xFF,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX,
 		.stack_present = 0xF,
 	},
 	{
@@ -428,7 +428,7 @@ static const struct control_reg_values_t all_control_reg_values[] = {
 		.mmu_features = 0x2830,
 		.gpu_features_lo = 0xf,
 		.gpu_features_hi = 0,
-		.shader_present = 0xFF,
+		.shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX,
 		.stack_present = 0xF,
 	},
 };
@@ -530,17 +530,18 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
 		(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
 
 	/* Currently only primary counter blocks are supported */
-	if (WARN_ON(event_index >= 64))
+	if (WARN_ON(event_index >=
+		    (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE)))
 		return 0;
 
 	/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
 	 * TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
 	 * IPA counters. If selected, the value returned for them will be zero.
 	 */
-	if (WARN_ON(event_index <= 3))
+	if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))
 		return 0;
 
-	event_index -= 4;
+	event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
 
 	spin_lock_irqsave(&performance_counters.access_lock, flags);
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 2345db5..f864661 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1142,13 +1142,22 @@ static bool can_power_down_l2(struct kbase_device *kbdev)
 #if MALI_USE_CSF
 	/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
 	 * whilst MMU command is in progress.
+	 * Also defer the power-down if MMU is in process of page migration.
 	 */
-	return !kbdev->mmu_hw_operation_in_progress;
+	return !kbdev->mmu_hw_operation_in_progress && !kbdev->mmu_page_migrate_in_progress;
 #else
-	return true;
+	return !kbdev->mmu_page_migrate_in_progress;
 #endif
 }
 
+static bool can_power_up_l2(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Avoiding l2 transition if MMU is undergoing page migration */
+	return !kbdev->mmu_page_migrate_in_progress;
+}
+
 static bool need_tiler_control(struct kbase_device *kbdev)
 {
 #if MALI_USE_CSF
@@ -1220,7 +1229,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 
 		switch (backend->l2_state) {
 		case KBASE_L2_OFF:
-			if (kbase_pm_is_l2_desired(kbdev)) {
+			if (kbase_pm_is_l2_desired(kbdev) && can_power_up_l2(kbdev)) {
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
 				/* Enable HW timer of IPA control before
 				 * L2 cache is powered-up.
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index cd5a6a3..cdc51d5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -995,4 +995,27 @@ static inline void kbase_pm_disable_db_mirror_interrupt(struct kbase_device *kbd
 }
 #endif
 
+/**
+ * kbase_pm_l2_allow_mmu_page_migration - L2 state allows MMU page migration or not
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Check whether the L2 state is in power transition phase or not. If it is, the MMU
+ * page migration should be deferred. The caller must hold hwaccess_lock, and, if MMU
+ * page migration is intended, immediately start the MMU migration action without
+ * dropping the lock. When page migration begins, a flag is set in kbdev that would
+ * prevent the L2 state machine traversing into power transition phases, until
+ * the MMU migration action ends.
+ *
+ * Return: true if MMU page migration is allowed
+ */
+static inline bool kbase_pm_l2_allow_mmu_page_migration(struct kbase_device *kbdev)
+{
+	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return (backend->l2_state != KBASE_L2_PEND_ON && backend->l2_state != KBASE_L2_PEND_OFF);
+}
+
 #endif /* _KBASE_BACKEND_PM_INTERNAL_H_ */
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index b6abfc4..792f724 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -129,10 +129,6 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	kctx->as_nr = KBASEP_AS_NR_INVALID;
-
-	atomic_set(&kctx->refcount, 0);
-
 	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
@@ -251,15 +247,8 @@ static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
 
 void kbase_context_common_term(struct kbase_context *kctx)
 {
-	unsigned long flags;
 	int pages;
 
-	mutex_lock(&kctx->kbdev->mmu_hw_mutex);
-	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
-	kbase_ctx_sched_remove_ctx(kctx);
-	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
-	mutex_unlock(&kctx->kbdev->mmu_hw_mutex);
-
 	pages = atomic_read(&kctx->used_pages);
 	if (pages != 0)
 		dev_warn(kctx->kbdev->dev,
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 56c69a1..2b02279 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -36,6 +36,7 @@ mali_kbase-y += \
     csf/mali_kbase_csf_cpu_queue_debugfs.o \
     csf/mali_kbase_csf_event.o \
     csf/mali_kbase_csf_firmware_log.o \
+    csf/mali_kbase_csf_firmware_core_dump.o \
     csf/mali_kbase_csf_tiler_heap_reclaim.o
 
 mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
@@ -44,7 +45,6 @@ mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
 
 mali_kbase-$(CONFIG_DEBUG_FS) += csf/mali_kbase_debug_csf_fault.o
 
-
 ifeq ($(KBUILD_EXTMOD),)
 # in-tree
     -include $(src)/csf/ipa_control/Kbuild
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index f48344e..b17c010 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -40,6 +40,8 @@
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
 #define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 
+#define PROTM_ALLOC_MAX_RETRIES ((u8)5)
+
 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
 	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
 	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
@@ -136,7 +138,7 @@ static void gpu_munmap_user_io_pages(struct kbase_context *kctx, struct kbase_va
 	size_t num_pages = 2;
 
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn, phys,
-				 num_pages, MCU_AS_NR);
+				 num_pages, MCU_AS_NR, true);
 
 	WARN_ON(reg->flags & KBASE_REG_FREE);
 
@@ -194,25 +196,25 @@ static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
 		return ret;
 
 	/* Map input page */
-	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
-				     &phys[0], 1, mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_IO, mmu_sync_info);
+	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, &phys[0], 1,
+				     mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info,
+				     NULL, false);
 	if (ret)
 		goto bad_insert;
 
 	/* Map output page, it needs rw access */
 	mem_flags |= KBASE_REG_GPU_WR;
-	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
-				     reg->start_pfn + 1, &phys[1], 1, mem_flags,
-				     MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO,
-				     mmu_sync_info);
+	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn + 1, &phys[1], 1,
+				     mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_IO, mmu_sync_info,
+				     NULL, false);
 	if (ret)
 		goto bad_insert_output_page;
 
 	return 0;
 
 bad_insert_output_page:
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR);
+	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn, phys, 1, MCU_AS_NR,
+				 true);
 bad_insert:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -307,8 +309,7 @@ static void release_queue(struct kbase_queue *queue);
  * If an explicit or implicit unbind was missed by the userspace then the
  * mapping will persist. On process exit kernel itself will remove the mapping.
  */
-static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
-		struct kbase_queue *queue)
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
 {
 	const size_t num_pages = 2;
 
@@ -327,6 +328,7 @@ static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
 	 */
 	release_queue(queue);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
 
 int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 			struct kbase_queue *queue)
@@ -345,7 +347,6 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 
 	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO], num_pages,
 					 queue->phys, false);
-
 	if (ret != num_pages)
 		goto phys_alloc_failed;
 
@@ -396,6 +397,7 @@ phys_alloc_failed:
 
 	return -ENOMEM;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
 
 static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
 	u8 group_handle)
@@ -413,6 +415,12 @@ static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
 	return NULL;
 }
 
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
+{
+	return find_queue_group(kctx, group_handle);
+}
+KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
+
 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
 	u8 group_handle)
 {
@@ -463,6 +471,17 @@ static void release_queue(struct kbase_queue *queue)
 			"Remove any pending command queue fatal from ctx %d_%d",
 			queue->kctx->tgid, queue->kctx->id);
 		kbase_csf_event_remove_error(queue->kctx, &queue->error);
+
+		/* After this the Userspace would be able to free the
+		 * memory for GPU queue. In case the Userspace missed
+		 * terminating the queue, the cleanup will happen on
+		 * context termination where tear down of region tracker
+		 * would free up the GPU queue memory.
+		 */
+		kbase_gpu_vm_lock(queue->kctx);
+		kbase_va_region_no_user_free_put(queue->kctx, queue->queue_reg);
+		kbase_gpu_vm_unlock(queue->kctx);
+
 		kfree(queue);
 	}
 }
@@ -516,7 +535,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
 								    queue_addr);
 
-	if (kbase_is_region_invalid_or_free(region)) {
+	if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
+	    region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
 		ret = -ENOENT;
 		goto out_unlock_vm;
 	}
@@ -565,7 +585,7 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->kctx = kctx;
 	queue->base_addr = queue_addr;
-	queue->queue_reg = region;
+	queue->queue_reg = kbase_va_region_no_user_free_get(kctx, region);
 	queue->size = (queue_size << PAGE_SHIFT);
 	queue->csi_index = KBASEP_IF_NR_INVALID;
 	queue->enabled = false;
@@ -603,7 +623,6 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->extract_ofs = 0;
 
-	region->flags |= KBASE_REG_NO_USER_FREE;
 	region->user_data = queue;
 
 	/* Initialize the cs_trace configuration parameters, When buffer_size
@@ -697,16 +716,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 		unbind_queue(kctx, queue);
 
 		kbase_gpu_vm_lock(kctx);
-		if (!WARN_ON(!queue->queue_reg)) {
-			/* After this the Userspace would be able to free the
-			 * memory for GPU queue. In case the Userspace missed
-			 * terminating the queue, the cleanup will happen on
-			 * context termination where tear down of region tracker
-			 * would free up the GPU queue memory.
-			 */
-			queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		if (!WARN_ON(!queue->queue_reg))
 			queue->queue_reg->user_data = NULL;
-		}
 		kbase_gpu_vm_unlock(kctx);
 
 		release_queue(queue);
@@ -870,6 +881,15 @@ void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 	if (WARN_ON(slot_bitmap > allowed_bitmap))
 		return;
 
+	/* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
+	 * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
+	 * or 2 CSI requests overlap and FW ends up missing the 2nd request.
+	 * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
+	 *
+	 * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
+	 */
+	dmb(osh);
+
 	value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
 	value ^= slot_bitmap;
 	kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value,
@@ -1168,10 +1188,9 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 		goto add_va_region_failed;
 
 	/* Update MMU table */
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
-				     reg->start_pfn, &s_buf->phy[0], nr_pages,
-				     mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu, reg->start_pfn,
+				     &s_buf->phy[0], nr_pages, mem_flags, MCU_AS_NR,
+				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info, NULL, false);
 	if (err)
 		goto mmu_insert_failed;
 
@@ -1198,83 +1217,47 @@ phy_alloc_failed:
 }
 
 /**
- * create_protected_suspend_buffer() - Create protected-mode suspend buffer
- *					per queue group
+ * init_protected_suspend_buffer() -  Reserve the VA range for the protected-mode
+ *                                    suspend buffer of a queue group.
+ *				      Allocation of physical pages will happen when
+ *                                    queue group enters protected mode.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @s_buf: Pointer to suspend buffer that is attached to queue group
  *
- * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
- *         MMU page table. Otherwise -ENOMEM.
+ * Return: 0 if suspend buffer init is successful, Otherwise Negative error value.
  */
-static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
-		struct kbase_protected_suspend_buffer *s_buf)
+static int init_protected_suspend_buffer(struct kbase_device *const kbdev,
+					 struct kbase_protected_suspend_buffer *s_buf)
 {
 	struct kbase_va_region *reg = NULL;
-	struct tagged_addr *phys = NULL;
-	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
 	const size_t nr_pages =
 		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
 	int err = 0;
 
-	/* Calls to this function are inherently asynchronous, with respect to
-	 * MMU operations.
-	 */
-	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	s_buf->reg = NULL;
+	s_buf->pma = NULL;
+	s_buf->alloc_retries = 0;
 
 	/* Allocate and initialize Region Object */
 	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
 			nr_pages, KBASE_REG_ZONE_MCU_SHARED);
 
-	if (!reg)
+	if (unlikely(!reg))
 		return -ENOMEM;
 
-	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
-	if (!phys) {
-		err = -ENOMEM;
-		goto phy_alloc_failed;
-	}
-
-	s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
-			nr_pages, true);
-	if (s_buf->pma == NULL) {
-		err = -ENOMEM;
-		goto pma_alloc_failed;
-	}
-
 	/* Insert Region Object into rbtree and make virtual address available
-	 * to map it to physical page
+	 * to map it to physical page.
 	 */
 	mutex_lock(&kbdev->csf.reg_lock);
 	err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
 	reg->flags &= ~KBASE_REG_FREE;
 	mutex_unlock(&kbdev->csf.reg_lock);
 
-	if (err)
-		goto add_va_region_failed;
-
-	/* Update MMU table */
-	err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, reg->start_pfn,
-				     phys, nr_pages, mem_flags, MCU_AS_NR,
-				     KBASE_MEM_GROUP_CSF_FW, mmu_sync_info);
-	if (err)
-		goto mmu_insert_failed;
-
-	s_buf->reg = reg;
-	kfree(phys);
-	return 0;
-
-mmu_insert_failed:
-	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(kbdev, reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
-
-add_va_region_failed:
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
-pma_alloc_failed:
-	kfree(phys);
-phy_alloc_failed:
-	kfree(reg);
+	if (unlikely(err))
+		kbase_free_alloced_region(reg);
+	else
+		s_buf->reg = reg;
 
 	return err;
 }
@@ -1305,12 +1288,10 @@ static int create_suspend_buffers(struct kbase_context *const kctx,
 	}
 
 	if (kctx->kbdev->csf.pma_dev) {
-		err = create_protected_suspend_buffer(kctx->kbdev,
-				&group->protected_suspend_buf);
+		err = init_protected_suspend_buffer(kctx->kbdev, &group->protected_suspend_buf);
 		if (err) {
 			term_normal_suspend_buffer(kctx,
 					&group->normal_suspend_buf);
-			dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
 		}
 	} else {
 		group->protected_suspend_buf.reg = NULL;
@@ -1521,7 +1502,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
 	lockdep_assert_held(&kctx->csf.lock);
 
 	WARN_ON(kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
-					 s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR));
+					 s_buf->reg->start_pfn, s_buf->phy, nr_pages, MCU_AS_NR,
+					 true));
 
 	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
 
@@ -1540,38 +1522,41 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
 }
 
 /**
- * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
+ * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
  *					queue group
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
- * @s_buf: Pointer to queue group suspend buffer to be freed
+ * @sbuf: Pointer to queue group suspend buffer to be freed
  */
 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
-		struct kbase_protected_suspend_buffer *s_buf)
+					  struct kbase_protected_suspend_buffer *sbuf)
 {
-	const size_t nr_pages =
-		PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
-	struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
-	size_t i = 0;
+	if (sbuf->pma) {
+		const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+		size_t i = 0;
+		struct tagged_addr *phys = kmalloc(sizeof(*phys) * nr_pages, GFP_KERNEL);
 
-	for (i = 0; phys && i < nr_pages; i++)
-		phys[i] = as_tagged(s_buf->pma[i]->pa);
+		for (i = 0; phys && i < nr_pages; i++)
+			phys[i] = as_tagged(sbuf->pma[i]->pa);
 
-	WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, s_buf->reg->start_pfn, phys,
-					 nr_pages, MCU_AS_NR));
+		WARN_ON(kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn,
+						 phys, nr_pages, MCU_AS_NR, true));
 
-	kfree(phys);
+		kfree(phys);
+		kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
+		sbuf->pma = NULL;
+	}
 
-	WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
+	if (sbuf->reg) {
+		WARN_ON(sbuf->reg->flags & KBASE_REG_FREE);
 
-	mutex_lock(&kbdev->csf.reg_lock);
-	kbase_remove_va_region(kbdev, s_buf->reg);
-	mutex_unlock(&kbdev->csf.reg_lock);
+		mutex_lock(&kbdev->csf.reg_lock);
+		kbase_remove_va_region(kbdev, sbuf->reg);
+		mutex_unlock(&kbdev->csf.reg_lock);
 
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
-	s_buf->pma = NULL;
-	kfree(s_buf->reg);
-	s_buf->reg = NULL;
+		kbase_free_alloced_region(sbuf->reg);
+		sbuf->reg = NULL;
+	}
 }
 
 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
@@ -1738,6 +1723,7 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 
 	kfree(group);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
 
 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 				  struct kbase_suspend_copy_buffer *sus_buf,
@@ -2017,12 +2003,10 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		 * registered.
 		 */
 #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
-		if (atomic_read(&queue->refcount) != 1)
+		WARN_ON(atomic_read(&queue->refcount) != 1);
 #else
-		if (refcount_read(&queue->refcount) != 1)
+		WARN_ON(refcount_read(&queue->refcount) != 1);
 #endif
-			dev_warn(kctx->kbdev->dev,
-				 "Releasing queue with incorrect refcounting!\n");
 		list_del_init(&queue->link);
 		release_queue(queue);
 	}
@@ -2369,6 +2353,85 @@ static void handle_progress_timer_event(struct kbase_queue_group *const group)
 }
 
 /**
+ * alloc_grp_protected_suspend_buffer_pages() -  Allocate physical pages from the protected
+ *                                               memory for the protected mode suspend buffer.
+ * @group: Pointer to the GPU queue group.
+ *
+ * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
+ * negative error value.
+ */
+static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
+{
+	struct kbase_device *const kbdev = group->kctx->kbdev;
+	struct kbase_context *kctx = group->kctx;
+	struct tagged_addr *phys = NULL;
+	const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
+	struct protected_memory_allocation **pma = NULL;
+	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+	size_t nr_pages;
+	int err = 0;
+
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	if (likely(sbuf->pma))
+		return 0;
+
+	nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
+	if (unlikely(!phys)) {
+		err = -ENOMEM;
+		goto phys_free;
+	}
+
+	pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
+	if (pma == NULL) {
+		err = -ENOMEM;
+		goto phys_free;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+
+	if (unlikely(!sbuf->reg)) {
+		dev_err(kbdev->dev,
+			"No VA region for the group %d of context %d_%d trying to enter protected mode",
+			group->handle, group->kctx->tgid, group->kctx->id);
+		err = -EINVAL;
+		kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true);
+		goto unlock;
+	}
+
+	/* Update MMU table */
+	err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, sbuf->reg->start_pfn, phys,
+				     nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+				     mmu_sync_info, NULL, true);
+	if (unlikely(err))
+		kbase_csf_protected_memory_free(kbdev, pma, nr_pages, true);
+	else
+		sbuf->pma = pma;
+
+unlock:
+	mutex_unlock(&kctx->csf.lock);
+phys_free:
+	kfree(phys);
+	return err;
+}
+
+static void report_group_fatal_error(struct kbase_queue_group *const group)
+{
+	struct base_gpu_queue_group_error const
+		err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+				.payload = { .fatal_group = {
+						     .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
+					     } } };
+
+	kbase_csf_add_group_fatal_error(group, &err_payload);
+	kbase_event_wakeup(group->kctx);
+}
+
+/**
  * protm_event_worker - Protected mode switch request event handler
  *			called from a workqueue.
  *
@@ -2380,10 +2443,26 @@ static void protm_event_worker(struct work_struct *data)
 {
 	struct kbase_queue_group *const group =
 		container_of(data, struct kbase_queue_group, protm_event_work);
+	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+	int err = 0;
 
 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
 				 group, 0u);
-	kbase_csf_scheduler_group_protm_enter(group);
+
+	err = alloc_grp_protected_suspend_buffer_pages(group);
+	if (!err) {
+		kbase_csf_scheduler_group_protm_enter(group);
+	} else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+		sbuf->alloc_retries++;
+		/* try again to allocate pages */
+		queue_work(group->kctx->csf.wq, &group->protm_event_work);
+	} else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+		dev_err(group->kctx->kbdev->dev,
+			"Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+			group->handle, group->kctx->tgid, group->kctx->id);
+		report_group_fatal_error(group);
+	}
+
 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
 				 group, 0u);
 }
@@ -2750,6 +2829,9 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 			track->protm_grp = group;
 		}
 
+		if (!group->protected_suspend_buf.pma)
+			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+
 		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
 			clear_bit(group->csg_nr,
 				  scheduler->csg_slots_idle_mask);
@@ -2791,8 +2873,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
 		return;
 
-	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
-
 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
 	req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
 	ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2801,7 +2881,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 
 	/* There may not be any pending CSG/CS interrupts to process */
 	if ((req == ack) && (irqreq == irqack))
-		goto out;
+		return;
 
 	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
 	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
@@ -2822,10 +2902,12 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 	 * slot scheduler spinlock is required.
 	 */
 	if (!group)
-		goto out;
+		return;
 
 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
-		goto out;
+		return;
+
+	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
 
 	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo,
@@ -2887,8 +2969,6 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 
 	process_cs_interrupts(group, ginfo, irqreq, irqack, track);
 
-out:
-	/* group may still be NULL here */
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
 				 ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
 }
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index b267740..fc3342e 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -45,7 +45,7 @@
  */
 #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
 
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (10000) /* Default 10 milliseconds */
 
 /* Idle hysteresis time can be scaled down when GPU sleep feature is used */
 #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -124,6 +124,25 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			      struct kbase_ioctl_cs_queue_terminate *term);
 
 /**
+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
+ *				    for a queue at the time of bind.
+ *
+ * @kctx:	Address of the kbase context within which the queue was created.
+ * @queue:	Pointer to the queue to be unlinked.
+ *
+ * This function will free the pair of physical pages allocated for a GPU
+ * command queue, and also release the hardware doorbell page, that were mapped
+ * into the process address space to enable direct submission of commands to
+ * the hardware. Also releases the reference taken on the queue when the mapping
+ * was created.
+ *
+ * If an explicit or implicit unbind was missed by the userspace then the
+ * mapping will persist. On process exit kernel itself will remove the mapping.
+ */
+void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
+					      struct kbase_queue *queue);
+
+/**
  * kbase_csf_alloc_command_stream_user_pages - Allocate resources for a
  *                                             GPU command queue.
  *
@@ -186,6 +205,20 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick);
 
 /**
+ * kbase_csf_queue_group_handle_is_valid - Find the queue group corresponding
+ *                                         to the indicated handle.
+ *
+ * @kctx:          The kbase context under which the queue group exists.
+ * @group_handle:  Handle for the group which uniquely identifies it within
+ *                 the context with which it was created.
+ *
+ * This function is used to find the queue group when passed a handle.
+ *
+ * Return: Pointer to a queue group on success, NULL on failure
+ */
+struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle);
+
+/**
  * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
  *                                         is valid.
  *
@@ -464,4 +497,5 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
 	return 0;
 #endif
 }
+
 #endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index e27c568..b7ceebc 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -437,10 +437,13 @@ struct kbase_normal_suspend_buffer {
  * @pma:	Array of pointer to protected mode allocations containing
  *		information about memory pages allocated for protected mode
  *		suspend	buffer.
+ * @alloc_retries:	Number of times we retried allocing physical pages
+ *			for protected suspend buffers.
  */
 struct kbase_protected_suspend_buffer {
 	struct kbase_va_region *reg;
 	struct protected_memory_allocation **pma;
+	u8 alloc_retries;
 };
 
 /**
@@ -1328,6 +1331,24 @@ struct kbase_csf_firmware_log {
 	u32 func_call_list_va_end;
 };
 
+/**
+ * struct kbase_csf_firmware_core_dump - Object containing members for handling
+ *                                       firmware core dump.
+ *
+ * @mcu_regs_addr: GPU virtual address of the start of the MCU registers buffer
+ *                 in Firmware.
+ * @version:       Version of the FW image header core dump data format. Bits
+ *                 7:0 specify version minor and 15:8 specify version major.
+ * @available:     Flag to identify if the FW core dump buffer is available.
+ *                 True if entry is available in the FW image header and version
+ *                 is supported, False otherwise.
+ */
+struct kbase_csf_firmware_core_dump {
+	u32 mcu_regs_addr;
+	u16 version;
+	bool available;
+};
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 /**
  * struct kbase_csf_dump_on_fault - Faulty information to deliver to the daemon
@@ -1458,9 +1479,9 @@ struct kbase_csf_dump_on_fault {
  *                              the glb_pwoff register. This is separated from
  *                              the @p mcu_core_pwroff_dur_count as an update
  *                              to the latter is asynchronous.
- * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
- *                          window in unit of ms. The firmware does not use it
- *                          directly.
+ * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of microseconds. The firmware does not 
+ *                          use it directly.
  * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
  *                          interface required format, ready to be used
  *                          directly in the firmware.
@@ -1470,6 +1491,8 @@ struct kbase_csf_dump_on_fault {
  *                          HW counters.
  * @fw:                     Copy of the loaded MCU firmware image.
  * @fw_log:                 Contain members required for handling firmware log.
+ * @fw_core_dump:           Contain members required for handling the firmware
+ *                          core dump.
  * @dof:                    Structure for dump on fault.
  */
 struct kbase_csf_device {
@@ -1507,12 +1530,13 @@ struct kbase_csf_device {
 	u32 mcu_core_pwroff_dur_us;
 	u32 mcu_core_pwroff_dur_count;
 	u32 mcu_core_pwroff_reg_shadow;
-	u32 gpu_idle_hysteresis_ms;
+	u32 gpu_idle_hysteresis_us;
 	u32 gpu_idle_dur_count;
 	unsigned int fw_timeout_ms;
 	struct kbase_csf_hwcnt hwcnt;
 	struct kbase_csf_mcu_fw fw;
 	struct kbase_csf_firmware_log fw_log;
+	struct kbase_csf_firmware_core_dump fw_core_dump;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_dump_on_fault dof;
 #endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index fc4121e..1e409ac 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -22,6 +22,7 @@
 #include "mali_kbase.h"
 #include "mali_kbase_csf_firmware_cfg.h"
 #include "mali_kbase_csf_firmware_log.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
@@ -81,7 +82,7 @@ MODULE_PARM_DESC(fw_debug,
 
 #define FIRMWARE_HEADER_MAGIC		(0xC3F13A6Eul)
 #define FIRMWARE_HEADER_VERSION_MAJOR	(0ul)
-#define FIRMWARE_HEADER_VERSION_MINOR	(2ul)
+#define FIRMWARE_HEADER_VERSION_MINOR	(3ul)
 #define FIRMWARE_HEADER_LENGTH		(0x14ul)
 
 #define CSF_FIRMWARE_ENTRY_SUPPORTED_FLAGS \
@@ -93,12 +94,13 @@ MODULE_PARM_DESC(fw_debug,
 	 CSF_FIRMWARE_ENTRY_ZERO | \
 	 CSF_FIRMWARE_ENTRY_CACHE_MODE)
 
-#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE     (0)
-#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION (1)
-#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER  (3)
-#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA (4)
+#define CSF_FIRMWARE_ENTRY_TYPE_INTERFACE           (0)
+#define CSF_FIRMWARE_ENTRY_TYPE_CONFIGURATION       (1)
+#define CSF_FIRMWARE_ENTRY_TYPE_TRACE_BUFFER        (3)
+#define CSF_FIRMWARE_ENTRY_TYPE_TIMELINE_METADATA   (4)
 #define CSF_FIRMWARE_ENTRY_TYPE_BUILD_INFO_METADATA (6)
-#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST    (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST      (7)
+#define CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP           (9)
 
 #define CSF_FIRMWARE_CACHE_MODE_NONE              (0ul << 3)
 #define CSF_FIRMWARE_CACHE_MODE_CACHED            (1ul << 3)
@@ -120,7 +122,6 @@ MODULE_PARM_DESC(fw_debug,
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |                             \
 	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
-
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
 	WARN_ON(offset % sizeof(u32));
@@ -488,6 +489,7 @@ out:
  * @kbdev: Kbase device structure
  * @virtual_start: Start of the virtual address range required for an entry allocation
  * @virtual_end: End of the virtual address range required for an entry allocation
+ * @flags: Firmware entry flags for comparison with the reusable pages found
  * @phys: Pointer to the array of physical (tagged) addresses making up the new
  *        FW interface entry. It is an output parameter which would be made to
  *        point to an already existing array allocated for the previously parsed
@@ -508,10 +510,12 @@ out:
  *
  * Return: true if a large page can be reused, false otherwise.
  */
-static inline bool entry_find_large_page_to_reuse(
-	struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
-	struct tagged_addr **phys, struct protected_memory_allocation ***pma,
-	u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
+static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
+						  const u32 virtual_start, const u32 virtual_end,
+						  const u32 flags, struct tagged_addr **phys,
+						  struct protected_memory_allocation ***pma,
+						  u32 num_pages, u32 *num_pages_aligned,
+						  bool *is_small_page)
 {
 	struct kbase_csf_firmware_interface *interface = NULL;
 	struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -557,7 +561,7 @@ static inline bool entry_find_large_page_to_reuse(
 		if (interface->virtual & (SZ_2M - 1))
 			continue;
 
-		if (virtual_diff < virtual_diff_min) {
+		if ((virtual_diff < virtual_diff_min) && (interface->flags == flags)) {
 			target_interface = interface;
 			virtual_diff_min = virtual_diff;
 		}
@@ -620,6 +624,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	struct protected_memory_allocation **pma = NULL;
 	bool reuse_pages = false;
 	bool is_small_page = true;
+	bool ignore_page_migration = true;
 
 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -662,9 +667,9 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;
 
-	reuse_pages = entry_find_large_page_to_reuse(
-		kbdev, virtual_start, virtual_end, &phys, &pma,
-		num_pages, &num_pages_aligned, &is_small_page);
+	reuse_pages =
+		entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, &phys,
+					       &pma, num_pages, &num_pages_aligned, &is_small_page);
 	if (!reuse_pages)
 		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
 
@@ -685,6 +690,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
 							    is_small_page),
 				num_pages_aligned, phys, false);
+			ignore_page_migration = false;
 		}
 	}
 
@@ -794,7 +800,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
 						      virtual_start >> PAGE_SHIFT, phys,
 						      num_pages_aligned, mem_flags,
-						      KBASE_MEM_GROUP_CSF_FW, NULL);
+						      KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
+						      ignore_page_migration);
 
 		if (ret != 0) {
 			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -1023,20 +1030,26 @@ static int load_firmware_entry(struct kbase_device *kbdev, const struct kbase_cs
 		return parse_build_info_metadata_entry(kbdev, fw, entry, size);
 	case CSF_FIRMWARE_ENTRY_TYPE_FUNC_CALL_LIST:
 		/* Function call list section */
-		if (size < 2 * sizeof(*entry)) {
+		if (size < FUNC_CALL_LIST_ENTRY_NAME_OFFSET + sizeof(*entry)) {
 			dev_err(kbdev->dev, "Function call list entry too short (size=%u)\n",
 				size);
 			return -EINVAL;
 		}
 		kbase_csf_firmware_log_parse_logging_call_list_entry(kbdev, entry);
-		break;
-	}
-
-	if (!optional) {
-		dev_err(kbdev->dev,
-			"Unsupported non-optional entry type %u in firmware\n",
-			type);
-		return -EINVAL;
+		return 0;
+	case CSF_FIRMWARE_ENTRY_TYPE_CORE_DUMP:
+		/* Core Dump section */
+		if (size < CORE_DUMP_ENTRY_START_ADDR_OFFSET + sizeof(*entry)) {
+			dev_err(kbdev->dev, "FW Core dump entry too short (size=%u)\n", size);
+			return -EINVAL;
+		}
+		return kbase_csf_firmware_core_dump_entry_parse(kbdev, entry);
+	default:
+		if (!optional) {
+			dev_err(kbdev->dev, "Unsupported non-optional entry type %u in firmware\n",
+				type);
+			return -EINVAL;
+		}
 	}
 
 	return 0;
@@ -1687,6 +1700,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 		kbdev->csf.gpu_idle_dur_count);
 }
 
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool complete = false;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+	    (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+		complete = true;
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+				     u32 const req_mask)
+{
+	u32 glb_debug_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+	glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_debug_req ^= req_mask;
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+	set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+	unsigned long flags;
+	int ret;
+
+	/* Serialize CORE_DUMP requests. */
+	mutex_lock(&kbdev->csf.reg_lock);
+
+	/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	request_fw_core_dump(global_iface);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+	ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+	if (!ret)
+		WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	return ret;
+}
 
 /**
  * kbasep_enable_rtu - Enable Ray Tracing Unit on powering up shader core
@@ -1714,7 +1792,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
 		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
-		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
+		GLB_REQ_DEBUG_CSF_REQ_MASK | GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1890,12 +1968,12 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
 {
 #define HYSTERESIS_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
-	u64 dur_val = dur_ms;
+	u64 dur_val = dur_us;
 	u32 cnt_val_u32, reg_val_u32;
 	bool src_system_timestamp = freq > 0;
 
@@ -1913,9 +1991,9 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
 			"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
 	}
 
-	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	/* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
 	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
-	dur_val = div_u64(dur_val, 1000);
+	dur_val = div_u64(dur_val, 1000000);
 
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1938,7 +2016,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	dur = kbdev->csf.gpu_idle_hysteresis_us;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
@@ -1955,7 +2033,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
@@ -1986,7 +2064,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_hysteresis_us = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -2166,14 +2244,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
 	kbdev->csf.gpu_idle_dur_count =
-		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
 
 	return 0;
 }
@@ -2353,6 +2431,10 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}
 
+#ifdef CONFIG_MALI_FW_CORE_DUMP
+	kbase_csf_firmware_core_dump_init(kbdev);
+#endif
+
 	/* Firmware loaded successfully, ret = 0 */
 	KBASE_KTRACE_ADD(kbdev, CSF_FIRMWARE_BOOT, NULL,
 			(((u64)version_hash) << 32) |
@@ -2848,7 +2930,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
 	if (ret)
 		goto mmu_insert_pages_error;
 
@@ -2909,4 +2991,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
-
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index bf4bb6f..cc20f9a 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -246,7 +246,6 @@ void kbase_csf_firmware_csg_input_mask(
 u32 kbase_csf_firmware_csg_output(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
-
 /**
  * struct kbase_csf_global_iface - Global CSF interface
  *                                 provided by the firmware.
@@ -858,5 +857,16 @@ static inline u32 kbase_csf_interface_version(u32 major, u32 minor, u32 patch)
  */
 int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_firmware_req_core_dump - Request a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Request a firmware core dump and wait for for firmware to acknowledge.
+ * Firmware will enter infinite loop after the firmware core dump is created.
+ *
+ * Return: 0 if success, or negative error code on failure.
+ */
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev);
 
 #endif
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
new file mode 100644
index 0000000..f0a10d1
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
@@ -0,0 +1,807 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/file.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+
+#include "mali_kbase.h"
+#include "mali_kbase_csf_firmware_core_dump.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/* Page size in bytes in use by MCU. */
+#define FW_PAGE_SIZE 4096
+
+/*
+ * FW image header core dump data format supported.
+ * Currently only version 0.1 is supported.
+ */
+#define FW_CORE_DUMP_DATA_VERSION_MAJOR 0
+#define FW_CORE_DUMP_DATA_VERSION_MINOR 1
+
+/* Full version of the image header core dump data format */
+#define FW_CORE_DUMP_DATA_VERSION                                                                  \
+	((FW_CORE_DUMP_DATA_VERSION_MAJOR << 8) | FW_CORE_DUMP_DATA_VERSION_MINOR)
+
+/* Validity flag to indicate if the MCU registers in the buffer are valid */
+#define FW_MCU_STATUS_MASK 0x1
+#define FW_MCU_STATUS_VALID (1 << 0)
+
+/* Core dump entry fields */
+#define FW_CORE_DUMP_VERSION_INDEX 0
+#define FW_CORE_DUMP_START_ADDR_INDEX 1
+
+/* MCU registers stored by a firmware core dump */
+struct fw_core_dump_mcu {
+	u32 r0;
+	u32 r1;
+	u32 r2;
+	u32 r3;
+	u32 r4;
+	u32 r5;
+	u32 r6;
+	u32 r7;
+	u32 r8;
+	u32 r9;
+	u32 r10;
+	u32 r11;
+	u32 r12;
+	u32 sp;
+	u32 lr;
+	u32 pc;
+};
+
+/* Any ELF definitions used in this file are from elf.h/elfcore.h except
+ * when specific 32-bit versions are required (mainly for the
+ * ELF_PRSTATUS32 note that is used to contain the MCU registers).
+ */
+
+/* - 32-bit version of timeval structures used in ELF32 PRSTATUS note. */
+struct prstatus32_timeval {
+	int tv_sec;
+	int tv_usec;
+};
+
+/* - Structure defining ELF32 PRSTATUS note contents, as defined by the
+ *   GNU binutils BFD library used by GDB, in bfd/hosts/x86-64linux.h.
+ *   Note: GDB checks for the size of this structure to be 0x94.
+ *   Modified pr_reg (array containing the Arm 32-bit MCU registers) to
+ *   use u32[18] instead of elf_gregset32_t to prevent introducing new typedefs.
+ */
+struct elf_prstatus32 {
+	struct elf_siginfo pr_info;		/* Info associated with signal. */
+	short int pr_cursig;			/* Current signal. */
+	unsigned int pr_sigpend;		/* Set of pending signals. */
+	unsigned int pr_sighold;		/* Set of held signals. */
+	pid_t pr_pid;
+	pid_t pr_ppid;
+	pid_t pr_pgrp;
+	pid_t pr_sid;
+	struct prstatus32_timeval pr_utime;	/* User time. */
+	struct prstatus32_timeval pr_stime;	/* System time. */
+	struct prstatus32_timeval pr_cutime;	/* Cumulative user time. */
+	struct prstatus32_timeval pr_cstime;	/* Cumulative system time. */
+	u32 pr_reg[18];				/* GP registers. */
+	int pr_fpvalid;				/* True if math copro being used. */
+};
+
+/**
+ * struct fw_core_dump_data - Context for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+struct fw_core_dump_data {
+	struct kbase_device *kbdev;
+};
+
+/*
+ * struct fw_core_dump_seq_off - Iterator for seq_file operations used on 'fw_core_dump'
+ * debugfs file.
+ * @interface: current firmware memory interface
+ * @page_num: current page number (0..) within @interface
+ */
+struct fw_core_dump_seq_off {
+	struct kbase_csf_firmware_interface *interface;
+	u32 page_num;
+};
+
+/**
+ * fw_get_core_dump_mcu - Get the MCU registers saved by a firmware core dump
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @regs:  Pointer to a core dump mcu struct where the MCU registers are copied
+ *         to. Should be allocated by the called.
+ *
+ * Return: 0 if successfully copied the MCU registers, negative error code otherwise.
+ */
+static int fw_get_core_dump_mcu(struct kbase_device *kbdev, struct fw_core_dump_mcu *regs)
+{
+	unsigned int i;
+	u32 status = 0;
+	u32 data_addr = kbdev->csf.fw_core_dump.mcu_regs_addr;
+	u32 *data = (u32 *)regs;
+
+	/* Check if the core dump entry exposed the buffer */
+	if (!regs || !kbdev->csf.fw_core_dump.available)
+		return -EPERM;
+
+	/* Check if the data in the buffer is valid, if not, return error */
+	kbase_csf_read_firmware_memory(kbdev, data_addr, &status);
+	if ((status & FW_MCU_STATUS_MASK) != FW_MCU_STATUS_VALID)
+		return -EPERM;
+
+	/* According to image header documentation, the MCU registers core dump
+	 * buffer is 32-bit aligned.
+	 */
+	for (i = 1; i <= sizeof(struct fw_core_dump_mcu) / sizeof(u32); ++i)
+		kbase_csf_read_firmware_memory(kbdev, data_addr + i * sizeof(u32), &data[i - 1]);
+
+	return 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_header - Initializes an ELF32 header
+ * @hdr:	ELF32 header to initialize
+ * @sections:	Number of entries in the ELF program header table
+ *
+ * Initializes an ELF32 header for an ARM 32-bit little-endian
+ * 'Core file' object file.
+ */
+static void fw_core_dump_fill_elf_header(struct elf32_hdr *hdr, unsigned int sections)
+{
+	/* Reset all members in header. */
+	memset(hdr, 0, sizeof(*hdr));
+
+	/* Magic number identifying file as an ELF object. */
+	memcpy(hdr->e_ident, ELFMAG, SELFMAG);
+
+	/* Identify file as 32-bit, little-endian, using current
+	 * ELF header version, with no OS or ABI specific ELF
+	 * extensions used.
+	 */
+	hdr->e_ident[EI_CLASS] = ELFCLASS32;
+	hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+	hdr->e_ident[EI_VERSION] = EV_CURRENT;
+	hdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+
+	/* 'Core file' type of object file. */
+	hdr->e_type = ET_CORE;
+
+	/* ARM 32-bit architecture (AARCH32) */
+	hdr->e_machine = EM_ARM;
+
+	/* Object file version: the original format. */
+	hdr->e_version = EV_CURRENT;
+
+	/* Offset of program header table in file. */
+	hdr->e_phoff = sizeof(struct elf32_hdr);
+
+	/* No processor specific flags. */
+	hdr->e_flags = 0;
+
+	/* Size of the ELF header in bytes. */
+	hdr->e_ehsize = sizeof(struct elf32_hdr);
+
+	/* Size of the ELF program header entry in bytes. */
+	hdr->e_phentsize = sizeof(struct elf32_phdr);
+
+	/* Number of entries in the program header table. */
+	hdr->e_phnum = sections;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header_note - Initializes an ELF32 program header
+ * for holding auxiliary information
+ * @phdr:		ELF32 program header
+ * @file_offset:	Location of the note in the file in bytes
+ * @size:		Size of the note in bytes.
+ *
+ * Initializes an ELF32 program header describing auxiliary information (containing
+ * one or more notes) of @size bytes alltogether located in the file at offset
+ * @file_offset.
+ */
+static void fw_core_dump_fill_elf_program_header_note(struct elf32_phdr *phdr, u32 file_offset,
+						      u32 size)
+{
+	/* Auxiliary information (note) in program header. */
+	phdr->p_type = PT_NOTE;
+
+	/* Location of first note in file in bytes. */
+	phdr->p_offset = file_offset;
+
+	/* Size of all notes combined in bytes. */
+	phdr->p_filesz = size;
+
+	/* Other members not relevant for a note. */
+	phdr->p_vaddr = 0;
+	phdr->p_paddr = 0;
+	phdr->p_memsz = 0;
+	phdr->p_align = 0;
+	phdr->p_flags = 0;
+}
+
+/**
+ * fw_core_dump_fill_elf_program_header - Initializes an ELF32 program header for a loadable segment
+ * @phdr:		ELF32 program header to initialize.
+ * @file_offset:	Location of loadable segment in file in bytes
+ *                      (aligned to FW_PAGE_SIZE bytes)
+ * @vaddr:		32-bit virtual address where to write the segment
+ *                      (aligned to FW_PAGE_SIZE bytes)
+ * @size:		Size of the segment in bytes.
+ * @flags:		CSF_FIRMWARE_ENTRY_* flags describing access permissions.
+ *
+ * Initializes an ELF32 program header describing a loadable segment of
+ * @size bytes located in the file at offset @file_offset to be loaded
+ * at virtual address @vaddr with access permissions as described by
+ * CSF_FIRMWARE_ENTRY_* flags in @flags.
+ */
+static void fw_core_dump_fill_elf_program_header(struct elf32_phdr *phdr, u32 file_offset,
+						 u32 vaddr, u32 size, u32 flags)
+{
+	/* Loadable segment in program header. */
+	phdr->p_type = PT_LOAD;
+
+	/* Location of segment in file in bytes. Aligned to p_align bytes. */
+	phdr->p_offset = file_offset;
+
+	/* Virtual address of segment. Aligned to p_align bytes. */
+	phdr->p_vaddr = vaddr;
+
+	/* Physical address of segment. Not relevant. */
+	phdr->p_paddr = 0;
+
+	/* Size of segment in file and memory. */
+	phdr->p_filesz = size;
+	phdr->p_memsz = size;
+
+	/* Alignment of segment in the file and memory in bytes (integral power of 2). */
+	phdr->p_align = FW_PAGE_SIZE;
+
+	/* Set segment access permissions. */
+	phdr->p_flags = 0;
+	if (flags & CSF_FIRMWARE_ENTRY_READ)
+		phdr->p_flags |= PF_R;
+	if (flags & CSF_FIRMWARE_ENTRY_WRITE)
+		phdr->p_flags |= PF_W;
+	if (flags & CSF_FIRMWARE_ENTRY_EXECUTE)
+		phdr->p_flags |= PF_X;
+}
+
+/**
+ * fw_core_dump_get_prstatus_note_size - Calculates size of a ELF32 PRSTATUS note
+ * @name:	Name given to the PRSTATUS note.
+ *
+ * Calculates the size of a 32-bit PRSTATUS note (which contains information
+ * about a process like the current MCU registers) taking into account
+ * @name must be padded to a 4-byte multiple.
+ *
+ * Return: size of 32-bit PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_get_prstatus_note_size(char *name)
+{
+	return sizeof(struct elf32_note) + roundup(strlen(name) + 1, 4) +
+	       sizeof(struct elf_prstatus32);
+}
+
+/**
+ * fw_core_dump_fill_elf_prstatus - Initializes an ELF32 PRSTATUS structure
+ * @prs:	ELF32 PRSTATUS note to initialize
+ * @regs:	MCU registers to copy into the PRSTATUS note
+ *
+ * Initializes an ELF32 PRSTATUS structure with MCU registers @regs.
+ * Other process information is N/A for CSF Firmware.
+ */
+static void fw_core_dump_fill_elf_prstatus(struct elf_prstatus32 *prs,
+					   struct fw_core_dump_mcu *regs)
+{
+	/* Only fill in registers (32-bit) of PRSTATUS note. */
+	memset(prs, 0, sizeof(*prs));
+	prs->pr_reg[0] = regs->r0;
+	prs->pr_reg[1] = regs->r1;
+	prs->pr_reg[2] = regs->r2;
+	prs->pr_reg[3] = regs->r3;
+	prs->pr_reg[4] = regs->r4;
+	prs->pr_reg[5] = regs->r5;
+	prs->pr_reg[6] = regs->r0;
+	prs->pr_reg[7] = regs->r7;
+	prs->pr_reg[8] = regs->r8;
+	prs->pr_reg[9] = regs->r9;
+	prs->pr_reg[10] = regs->r10;
+	prs->pr_reg[11] = regs->r11;
+	prs->pr_reg[12] = regs->r12;
+	prs->pr_reg[13] = regs->sp;
+	prs->pr_reg[14] = regs->lr;
+	prs->pr_reg[15] = regs->pc;
+}
+
+/**
+ * fw_core_dump_create_prstatus_note - Creates an ELF32 PRSTATUS note
+ * @name:	Name for the PRSTATUS note
+ * @prs:	ELF32 PRSTATUS structure to put in the PRSTATUS note
+ * @created_prstatus_note:
+ *		Pointer to the allocated ELF32 PRSTATUS note
+ *
+ * Creates an ELF32 note with one PRSTATUS entry containing the
+ * ELF32 PRSTATUS structure @prs. Caller needs to free the created note in
+ * @created_prstatus_note.
+ *
+ * Return: 0 on failure, otherwise size of ELF32 PRSTATUS note in bytes.
+ */
+static unsigned int fw_core_dump_create_prstatus_note(char *name, struct elf_prstatus32 *prs,
+						      struct elf32_note **created_prstatus_note)
+{
+	struct elf32_note *note;
+	unsigned int note_name_sz;
+	unsigned int note_sz;
+
+	/* Allocate memory for ELF32 note containing a PRSTATUS note. */
+	note_name_sz = strlen(name) + 1;
+	note_sz = sizeof(struct elf32_note) + roundup(note_name_sz, 4) +
+		  sizeof(struct elf_prstatus32);
+	note = kmalloc(note_sz, GFP_KERNEL);
+	if (!note)
+		return 0;
+
+	/* Fill in ELF32 note with one entry for a PRSTATUS note. */
+	note->n_namesz = note_name_sz;
+	note->n_descsz = sizeof(struct elf_prstatus32);
+	note->n_type = NT_PRSTATUS;
+	memcpy(note + 1, name, note_name_sz);
+	memcpy((char *)(note + 1) + roundup(note_name_sz, 4), prs, sizeof(*prs));
+
+	/* Return pointer and size of the created ELF32 note. */
+	*created_prstatus_note = note;
+	return note_sz;
+}
+
+/**
+ * fw_core_dump_write_elf_header - Writes ELF header for the FW core dump
+ * @m: the seq_file handle
+ *
+ * Writes the ELF header of the core dump including program headers for
+ * memory sections and a note containing the current MCU register
+ * values.
+ *
+ * Excludes memory sections without read access permissions or
+ * are for protected memory.
+ *
+ * The data written is as follows:
+ * - ELF header
+ * - ELF PHDRs for memory sections
+ * - ELF PHDR for program header NOTE
+ * - ELF PRSTATUS note
+ * - 0-bytes padding to multiple of ELF_EXEC_PAGESIZE
+ *
+ * The actual memory section dumps should follow this (not written
+ * by this function).
+ *
+ * Retrieves the necessary information via the struct
+ * fw_core_dump_data stored in the private member of the seq_file
+ * handle.
+ *
+ * Return:
+ * * 0		- success
+ * * -ENOMEM	- not enough memory for allocating ELF32 note
+ */
+static int fw_core_dump_write_elf_header(struct seq_file *m)
+{
+	struct elf32_hdr hdr;
+	struct elf32_phdr phdr;
+	struct fw_core_dump_data *dump_data = m->private;
+	struct kbase_device *const kbdev = dump_data->kbdev;
+	struct kbase_csf_firmware_interface *interface;
+	struct elf_prstatus32 elf_prs;
+	struct elf32_note *elf_prstatus_note;
+	unsigned int sections = 0;
+	unsigned int elf_prstatus_note_size;
+	u32 elf_prstatus_offset;
+	u32 elf_phdr_note_offset;
+	u32 elf_memory_sections_data_offset;
+	u32 total_pages = 0;
+	u32 padding_size, *padding;
+	struct fw_core_dump_mcu regs = { 0 };
+
+	/* Count number of memory sections. */
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+		sections++;
+	}
+
+	/* Prepare ELF header. */
+	fw_core_dump_fill_elf_header(&hdr, sections + 1);
+	seq_write(m, &hdr, sizeof(struct elf32_hdr));
+
+	elf_prstatus_note_size = fw_core_dump_get_prstatus_note_size("CORE");
+	/* PHDRs of PT_LOAD type. */
+	elf_phdr_note_offset = sizeof(struct elf32_hdr) + sections * sizeof(struct elf32_phdr);
+	/* PHDR of PT_NOTE type. */
+	elf_prstatus_offset = elf_phdr_note_offset + sizeof(struct elf32_phdr);
+	elf_memory_sections_data_offset = elf_prstatus_offset + elf_prstatus_note_size;
+
+	/* Calculate padding size to page offset. */
+	padding_size = roundup(elf_memory_sections_data_offset, ELF_EXEC_PAGESIZE) -
+		       elf_memory_sections_data_offset;
+	elf_memory_sections_data_offset += padding_size;
+
+	/* Prepare ELF program header table. */
+	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+
+		fw_core_dump_fill_elf_program_header(&phdr, elf_memory_sections_data_offset,
+						     interface->virtual,
+						     interface->num_pages * FW_PAGE_SIZE,
+						     interface->flags);
+
+		seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+		elf_memory_sections_data_offset += interface->num_pages * FW_PAGE_SIZE;
+		total_pages += interface->num_pages;
+	}
+
+	/* Prepare PHDR of PT_NOTE type. */
+	fw_core_dump_fill_elf_program_header_note(&phdr, elf_prstatus_offset,
+						  elf_prstatus_note_size);
+	seq_write(m, &phdr, sizeof(struct elf32_phdr));
+
+	/* Prepare ELF note of PRSTATUS type. */
+	if (fw_get_core_dump_mcu(kbdev, &regs))
+		dev_dbg(kbdev->dev, "MCU Registers not available, all registers set to zero");
+	/* Even if MCU Registers are not available the ELF prstatus is still
+	 * filled with the registers equal to zero.
+	 */
+	fw_core_dump_fill_elf_prstatus(&elf_prs, &regs);
+	elf_prstatus_note_size =
+		fw_core_dump_create_prstatus_note("CORE", &elf_prs, &elf_prstatus_note);
+	if (elf_prstatus_note_size == 0)
+		return -ENOMEM;
+
+	seq_write(m, elf_prstatus_note, elf_prstatus_note_size);
+	kfree(elf_prstatus_note);
+
+	/* Pad file to page size. */
+	padding = kzalloc(padding_size, GFP_KERNEL);
+	seq_write(m, padding, padding_size);
+	kfree(padding);
+
+	return 0;
+}
+
+/**
+ * fw_core_dump_create - Requests firmware to save state for a firmware core dump
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_create(struct kbase_device *kbdev)
+{
+	int err;
+
+	/* Ensure MCU is active before requesting the core dump. */
+	kbase_csf_scheduler_pm_active(kbdev);
+	err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+	if (!err)
+		err = kbase_csf_firmware_req_core_dump(kbdev);
+
+	kbase_csf_scheduler_pm_idle(kbdev);
+
+	return err;
+}
+
+/**
+ * fw_core_dump_seq_start - seq_file start operation for firmware core dump file
+ * @m: the seq_file handle
+ * @_pos: holds the current position in pages
+ *        (0 or most recent position used in previous session)
+ *
+ * Starts a seq_file session, positioning the iterator for the session to page @_pos - 1
+ * within the firmware interface memory sections. @_pos value 0 is used to indicate the
+ * position of the ELF header at the start of the file.
+ *
+ * Retrieves the necessary information via the struct fw_core_dump_data stored in
+ * the private member of the seq_file handle.
+ *
+ * Return:
+ * * iterator pointer	- pointer to iterator struct fw_core_dump_seq_off
+ * * SEQ_START_TOKEN	- special iterator pointer indicating its is the start of the file
+ * * NULL		- iterator could not be allocated
+ */
+static void *fw_core_dump_seq_start(struct seq_file *m, loff_t *_pos)
+{
+	struct fw_core_dump_data *dump_data = m->private;
+	struct fw_core_dump_seq_off *data;
+	struct kbase_csf_firmware_interface *interface;
+	loff_t pos = *_pos;
+
+	if (pos == 0)
+		return SEQ_START_TOKEN;
+
+	/* Move iterator in the right position based on page number within
+	 * available pages of firmware interface memory sections.
+	 */
+	pos--; /* ignore start token */
+	list_for_each_entry(interface, &dump_data->kbdev->csf.firmware_interfaces, node) {
+		/* Skip memory sections that cannot be read or are protected. */
+		if ((interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) ||
+		    (interface->flags & CSF_FIRMWARE_ENTRY_READ) == 0)
+			continue;
+
+		if (pos >= interface->num_pages) {
+			pos -= interface->num_pages;
+		} else {
+			data = kmalloc(sizeof(*data), GFP_KERNEL);
+			if (!data)
+				return NULL;
+			data->interface = interface;
+			data->page_num = pos;
+			return data;
+		}
+	}
+
+	return NULL;
+}
+
+/**
+ * fw_core_dump_seq_stop - seq_file stop operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Closes the current session and frees any memory related.
+ */
+static void fw_core_dump_seq_stop(struct seq_file *m, void *v)
+{
+	kfree(v);
+}
+
+/**
+ * fw_core_dump_seq_next - seq_file next operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ * @pos: holds the current position in pages
+ *        (0 or most recent position used in previous session)
+ *
+ * Moves the iterator @v forward to the next page within the firmware interface
+ * memory sections and returns the updated position in @pos.
+ * @v value SEQ_START_TOKEN indicates the ELF header position.
+ *
+ * Return:
+ * * iterator pointer	- pointer to iterator struct fw_core_dump_seq_off
+ * * NULL		- iterator could not be allocated
+ */
+static void *fw_core_dump_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct fw_core_dump_data *dump_data = m->private;
+	struct fw_core_dump_seq_off *data = v;
+	struct kbase_csf_firmware_interface *interface;
+	struct list_head *interfaces = &dump_data->kbdev->csf.firmware_interfaces;
+
+	/* Is current position at the ELF header ? */
+	if (v == SEQ_START_TOKEN) {
+		if (list_empty(interfaces))
+			return NULL;
+
+		/* Prepare iterator for starting at first page in firmware interface
+		 * memory sections.
+		 */
+		data = kmalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return NULL;
+		data->interface =
+			list_first_entry(interfaces, struct kbase_csf_firmware_interface, node);
+		data->page_num = 0;
+		++*pos;
+		return data;
+	}
+
+	/* First attempt to satisfy from current firmware interface memory section. */
+	interface = data->interface;
+	if (data->page_num + 1 < interface->num_pages) {
+		data->page_num++;
+		++*pos;
+		return data;
+	}
+
+	/* Need next firmware interface memory section. This could be the last one. */
+	if (list_is_last(&interface->node, interfaces)) {
+		kfree(data);
+		return NULL;
+	}
+
+	/* Move to first page in next firmware interface memory section. */
+	data->interface = list_next_entry(interface, node);
+	data->page_num = 0;
+	++*pos;
+
+	return data;
+}
+
+/**
+ * fw_core_dump_seq_show - seq_file show operation for firmware core dump file
+ * @m: the seq_file handle
+ * @v: the current iterator (pointer to struct fw_core_dump_seq_off)
+ *
+ * Writes the current page in a firmware interface memory section indicated
+ * by the iterator @v to the file. If @v is SEQ_START_TOKEN the ELF
+ * header is written.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_seq_show(struct seq_file *m, void *v)
+{
+	struct fw_core_dump_seq_off *data = v;
+	struct page *page;
+	u32 *p;
+
+	/* Either write the ELF header or current page. */
+	if (v == SEQ_START_TOKEN)
+		return fw_core_dump_write_elf_header(m);
+
+	/* Write the current page. */
+	page = as_page(data->interface->phys[data->page_num]);
+	p = kmap_atomic(page);
+	seq_write(m, p, FW_PAGE_SIZE);
+	kunmap_atomic(p);
+
+	return 0;
+}
+
+/* Sequence file operations for firmware core dump file. */
+static const struct seq_operations fw_core_dump_seq_ops = {
+	.start = fw_core_dump_seq_start,
+	.next = fw_core_dump_seq_next,
+	.stop = fw_core_dump_seq_stop,
+	.show = fw_core_dump_seq_show,
+};
+
+/**
+ * fw_core_dump_debugfs_open - callback for opening the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file:  file pointer
+ *
+ * Prepares for servicing a write request to request a core dump from firmware and
+ * a read request to retrieve the core dump.
+ *
+ * Returns an error if the firmware is not initialized yet.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct kbase_device *const kbdev = inode->i_private;
+	struct fw_core_dump_data *dump_data;
+	int ret;
+
+	/* Fail if firmware is not initialized yet. */
+	if (!kbdev->csf.firmware_inited) {
+		ret = -ENODEV;
+		goto open_fail;
+	}
+
+	/* Open a sequence file for iterating through the pages in the
+	 * firmware interface memory pages. seq_open stores a
+	 * struct seq_file * in the private_data field of @file.
+	 */
+	ret = seq_open(file, &fw_core_dump_seq_ops);
+	if (ret)
+		goto open_fail;
+
+	/* Allocate a context for sequence file operations. */
+	dump_data = kmalloc(sizeof(*dump_data), GFP_KERNEL);
+	if (!dump_data) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* Kbase device will be shared with sequence file operations. */
+	dump_data->kbdev = kbdev;
+
+	/* Link our sequence file context. */
+	((struct seq_file *)file->private_data)->private = dump_data;
+
+	return 0;
+out:
+	seq_release(inode, file);
+open_fail:
+	return ret;
+}
+
+/**
+ * fw_core_dump_debugfs_write - callback for a write to the 'fw_core_dump' debugfs file
+ * @file:  file pointer
+ * @ubuf:  user buffer containing data to store
+ * @count: number of bytes in user buffer
+ * @ppos:  file position
+ *
+ * Any data written to the file triggers a firmware core dump request which
+ * subsequently can be retrieved by reading from the file.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t fw_core_dump_debugfs_write(struct file *file, const char __user *ubuf, size_t count,
+					  loff_t *ppos)
+{
+	int err;
+	struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+	struct kbase_device *const kbdev = dump_data->kbdev;
+
+	CSTD_UNUSED(ppos);
+
+	err = fw_core_dump_create(kbdev);
+
+	return err ? err : count;
+}
+
+/**
+ * fw_core_dump_debugfs_release - callback for releasing the 'fw_core_dump' debugfs file
+ * @inode: inode of the file
+ * @file:  file pointer
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int fw_core_dump_debugfs_release(struct inode *inode, struct file *file)
+{
+	struct fw_core_dump_data *dump_data = ((struct seq_file *)file->private_data)->private;
+
+	seq_release(inode, file);
+
+	kfree(dump_data);
+
+	return 0;
+}
+/* Debugfs file operations for firmware core dump file. */
+static const struct file_operations kbase_csf_fw_core_dump_fops = {
+	.owner = THIS_MODULE,
+	.open = fw_core_dump_debugfs_open,
+	.read = seq_read,
+	.write = fw_core_dump_debugfs_write,
+	.llseek = seq_lseek,
+	.release = fw_core_dump_debugfs_release,
+};
+
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev)
+{
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	debugfs_create_file("fw_core_dump", 0600, kbdev->mali_debugfs_directory, kbdev,
+			    &kbase_csf_fw_core_dump_fops);
+#endif /* CONFIG_DEBUG_FS */
+}
+
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry)
+{
+	/* Casting to u16 as version is defined by bits 15:0 */
+	kbdev->csf.fw_core_dump.version = (u16)entry[FW_CORE_DUMP_VERSION_INDEX];
+
+	if (kbdev->csf.fw_core_dump.version != FW_CORE_DUMP_DATA_VERSION)
+		return -EPERM;
+
+	kbdev->csf.fw_core_dump.mcu_regs_addr = entry[FW_CORE_DUMP_START_ADDR_INDEX];
+	kbdev->csf.fw_core_dump.available = true;
+
+	return 0;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h
new file mode 100644
index 0000000..0537dca
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+#define _KBASE_CSF_FIRMWARE_CORE_DUMP_H_
+
+struct kbase_device;
+
+/** Offset of the last field of core dump entry from the image header */
+#define CORE_DUMP_ENTRY_START_ADDR_OFFSET (0x4)
+
+/**
+ * kbase_csf_firmware_core_dump_entry_parse() - Parse a "core dump" entry from
+ *                                              the image header.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @entry: Pointer to section.
+ *
+ * Read a "core dump" entry from the image header, check the version for
+ * compatibility and store the address pointer.
+ *
+ * Return: 0 if successfully parse entry, negative error code otherwise.
+ */
+int kbase_csf_firmware_core_dump_entry_parse(struct kbase_device *kbdev, const u32 *entry);
+
+/**
+ * kbase_csf_firmware_core_dump_init() - Initialize firmware core dump support
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *         Must be zero-initialized.
+ *
+ * Creates the fw_core_dump debugfs file through which to request a firmware
+ * core dump. The created debugfs file is cleaned up as part of kbdev debugfs
+ * cleanup.
+ *
+ * The fw_core_dump debugs file that case be used in the following way:
+ *
+ * To explicitly request core dump:
+ *     echo 1 >/sys/kernel/debug/mali0/fw_core_dump
+ *
+ * To output current core dump (after explicitly requesting a core dump, or
+ * kernel driver reported an internal firmware error):
+ *     cat /sys/kernel/debug/mali0/fw_core_dump
+ */
+void kbase_csf_firmware_core_dump_init(struct kbase_device *const kbdev);
+
+#endif /* _KBASE_CSF_FIRMWARE_CORE_DUMP_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
index a046112..77d3b1e 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
@@ -85,7 +85,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
 		dev_dbg(kbdev->dev, "Limit enabled bits count from %u to 64", enable_bits_count);
 		enable_bits_count = 64;
 	}
-	new_mask = val & ((1 << enable_bits_count) - 1);
+	new_mask = val & (UINT64_MAX >> (64 - enable_bits_count));
 
 	if (new_mask != kbase_csf_firmware_trace_buffer_get_active_mask64(tb))
 		return kbase_csf_firmware_trace_buffer_set_active_mask64(tb, new_mask);
@@ -350,7 +350,7 @@ static void toggle_logging_calls_in_loaded_image(struct kbase_device *kbdev, boo
 
 			diff = callee_address - calling_address - 4;
 			sign = !!(diff & 0x80000000);
-			if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff &&
+			if (ARMV7_T1_BL_IMM_RANGE_MIN > (int32_t)diff ||
 					ARMV7_T1_BL_IMM_RANGE_MAX < (int32_t)diff) {
 				dev_warn(kbdev->dev, "FW log patch 0x%x out of range, skipping",
 						calling_address);
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.h b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
index 8d7a221..1008320 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.h
@@ -24,6 +24,9 @@
 
 #include <mali_kbase.h>
 
+/** Offset of the last field of functions call list entry from the image header */
+#define FUNC_CALL_LIST_ENTRY_NAME_OFFSET (0x8)
+
 /*
  * Firmware log dumping buffer size.
  */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 0eaaddf..2e2b59f 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -33,6 +33,7 @@
 #include "mmu/mali_kbase_mmu.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <backend/gpu/mali_kbase_model_dummy.h>
+#include <csf/mali_kbase_csf_registers.h>
 
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -104,7 +105,6 @@ struct dummy_firmware_interface {
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
 	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
-
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
 	WARN_ON(offset % sizeof(u32));
@@ -716,6 +716,71 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 		kbdev->csf.gpu_idle_dur_count);
 }
 
+static bool global_debug_request_complete(struct kbase_device *const kbdev, u32 const req_mask)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	bool complete = false;
+	unsigned long flags;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+
+	if ((kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK) & req_mask) ==
+	    (kbase_csf_firmware_global_input_read(global_iface, GLB_DEBUG_REQ) & req_mask))
+		complete = true;
+
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return complete;
+}
+
+static void set_global_debug_request(const struct kbase_csf_global_iface *const global_iface,
+				     u32 const req_mask)
+{
+	u32 glb_debug_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
+
+	glb_debug_req = kbase_csf_firmware_global_output(global_iface, GLB_DEBUG_ACK);
+	glb_debug_req ^= req_mask;
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_DEBUG_REQ, glb_debug_req, req_mask);
+}
+
+static void request_fw_core_dump(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	uint32_t run_mode = GLB_DEBUG_REQ_RUN_MODE_SET(0, GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP);
+
+	set_global_debug_request(global_iface, GLB_DEBUG_REQ_DEBUG_RUN_MASK | run_mode);
+
+	set_global_request(global_iface, GLB_REQ_DEBUG_CSF_REQ_MASK);
+}
+
+int kbase_csf_firmware_req_core_dump(struct kbase_device *const kbdev)
+{
+	const struct kbase_csf_global_iface *const global_iface =
+		&kbdev->csf.global_iface;
+	unsigned long flags;
+	int ret;
+
+	/* Serialize CORE_DUMP requests. */
+	mutex_lock(&kbdev->csf.reg_lock);
+
+	/* Update GLB_REQ with CORE_DUMP request and make firmware act on it. */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	request_fw_core_dump(global_iface);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	/* Wait for firmware to acknowledge completion of the CORE_DUMP request. */
+	ret = wait_for_global_request(kbdev, GLB_REQ_DEBUG_CSF_REQ_MASK);
+	if (!ret)
+		WARN_ON(!global_debug_request_complete(kbdev, GLB_DEBUG_REQ_DEBUG_RUN_MASK));
+
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	return ret;
+}
 
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
@@ -724,8 +789,7 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
 		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
-		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK |
-		0;
+		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK | GLB_REQ_DEBUG_CSF_REQ_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -917,7 +981,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	dur = kbdev->csf.gpu_idle_hysteresis_us;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
@@ -934,7 +998,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
@@ -965,7 +1029,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_hysteresis_us = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
 	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -1076,14 +1140,14 @@ void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
 	kbdev->csf.gpu_idle_dur_count =
-		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
 
 	return 0;
 }
@@ -1533,7 +1597,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
 	if (ret)
 		goto mmu_insert_pages_error;
 
@@ -1594,4 +1658,3 @@ void kbase_csf_firmware_mcu_shared_mapping_term(
 	vunmap(csf_mapping->cpu_addr);
 	kfree(csf_mapping->phys);
 }
-
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 1876d50..f357e9e 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -142,7 +142,14 @@ void kbase_csf_heap_context_allocator_term(
 
 	if (ctx_alloc->region) {
 		kbase_gpu_vm_lock(kctx);
-		ctx_alloc->region->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * We can't enforce (nor check) the no_user_free refcount
+		 * to be 0 here as other code regions can take such a reference.
+		 * Anyway, this isn't an issue as the region will eventually
+		 * be freed by the region tracker if its refcount didn't drop
+		 * to 0.
+		 */
+		kbase_va_region_no_user_free_put(kctx, ctx_alloc->region);
 		kbase_mem_free_region(kctx, ctx_alloc->region);
 		kbase_gpu_vm_unlock(kctx);
 	}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 99ab002..06a6990 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -674,9 +674,8 @@ static int kbase_csf_queue_group_suspend_prepare(
 		    (kbase_reg_current_backed_size(reg) < nr_pages) ||
 		    !(reg->flags & KBASE_REG_CPU_WR) ||
 		    (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
-		    (reg->flags & KBASE_REG_DONT_NEED) ||
-		    (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC) ||
-		    (reg->flags & KBASE_REG_NO_USER_FREE)) {
+		    (kbase_is_region_shrinkable(reg)) ||
+		    (kbase_va_region_is_no_user_free(kctx, reg))) {
 			ret = -EINVAL;
 			goto out_clean_pages;
 		}
@@ -1234,9 +1233,8 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
 	queue_work(kcpu_queue->wq, &kcpu_queue->work);
 }
 
-static void kbase_kcpu_fence_wait_cancel(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct kbase_kcpu_command_fence_info *fence_info)
+static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
+					  struct kbase_kcpu_command_fence_info *fence_info)
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 
@@ -1410,15 +1408,14 @@ static int kbase_kcpu_fence_wait_process(
 	 */
 
 	if (fence_status)
-		kbase_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
+		kbasep_kcpu_fence_wait_cancel(kcpu_queue, fence_info);
 
 	return fence_status;
 }
 
-static int kbase_kcpu_fence_wait_prepare(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct base_kcpu_command_fence_info *fence_info,
-		struct kbase_kcpu_command *current_command)
+static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+					 struct base_kcpu_command_fence_info *fence_info,
+					 struct kbase_kcpu_command *current_command)
 {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence_in;
@@ -1429,8 +1426,7 @@ static int kbase_kcpu_fence_wait_prepare(
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
-	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
-			sizeof(fence)))
+	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
 		return -ENOMEM;
 
 	fence_in = sync_file_get_fence(fence.basep.fd);
@@ -1444,9 +1440,8 @@ static int kbase_kcpu_fence_wait_prepare(
 	return 0;
 }
 
-static int kbase_kcpu_fence_signal_process(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct kbase_kcpu_command_fence_info *fence_info)
+static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+					    struct kbase_kcpu_command_fence_info *fence_info)
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	int ret;
@@ -1467,37 +1462,37 @@ static int kbase_kcpu_fence_signal_process(
 				  fence_info->fence->seqno);
 
 	/* dma_fence refcount needs to be decreased to release it. */
-	dma_fence_put(fence_info->fence);
+	kbase_fence_put(fence_info->fence);
 	fence_info->fence = NULL;
 
 	return ret;
 }
 
-static int kbase_kcpu_fence_signal_prepare(
-		struct kbase_kcpu_command_queue *kcpu_queue,
-		struct base_kcpu_command_fence_info *fence_info,
-		struct kbase_kcpu_command *current_command)
+static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+					 struct kbase_kcpu_command *current_command,
+					 struct base_fence *fence, struct sync_file **sync_file,
+					 int *fd)
 {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence *fence_out;
 #else
 	struct dma_fence *fence_out;
 #endif
-	struct base_fence fence;
-	struct sync_file *sync_file;
+	struct kbase_kcpu_dma_fence *kcpu_fence;
 	int ret = 0;
-	int fd;
 
 	lockdep_assert_held(&kcpu_queue->lock);
 
-	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence),
-			sizeof(fence)))
-		return -EFAULT;
-
-	fence_out = kzalloc(sizeof(*fence_out), GFP_KERNEL);
-	if (!fence_out)
+	kcpu_fence = kzalloc(sizeof(*kcpu_fence), GFP_KERNEL);
+	if (!kcpu_fence)
 		return -ENOMEM;
 
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	fence_out = (struct fence *)kcpu_fence;
+#else
+	fence_out = (struct dma_fence *)kcpu_fence;
+#endif
+
 	dma_fence_init(fence_out,
 		       &kbase_fence_ops,
 		       &kbase_csf_fence_lock,
@@ -1513,28 +1508,70 @@ static int kbase_kcpu_fence_signal_prepare(
 	dma_fence_get(fence_out);
 #endif
 
+	/* Set reference to KCPU metadata and increment refcount */
+	kcpu_fence->metadata = kcpu_queue->metadata;
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	WARN_ON(!atomic_inc_not_zero(&kcpu_fence->metadata->refcount));
+#else
+	WARN_ON(!refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+#endif
+
 	/* create a sync_file fd representing the fence */
-	sync_file = sync_file_create(fence_out);
-	if (!sync_file) {
+	*sync_file = sync_file_create(fence_out);
+	if (!(*sync_file)) {
 		ret = -ENOMEM;
 		goto file_create_fail;
 	}
 
-	fd = get_unused_fd_flags(O_CLOEXEC);
-	if (fd < 0) {
-		ret = fd;
+	*fd = get_unused_fd_flags(O_CLOEXEC);
+	if (*fd < 0) {
+		ret = *fd;
 		goto fd_flags_fail;
 	}
 
-	fence.basep.fd = fd;
+	fence->basep.fd = *fd;
 
 	current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
 	current_command->info.fence.fence = fence_out;
 
+	return 0;
+
+fd_flags_fail:
+	fput((*sync_file)->file);
+file_create_fail:
+	/*
+	 * Upon failure, dma_fence refcount that was increased by
+	 * dma_fence_get() or sync_file_create() needs to be decreased
+	 * to release it.
+	 */
+	kbase_fence_put(fence_out);
+	current_command->info.fence.fence = NULL;
+
+	return ret;
+}
+
+static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
+					   struct base_kcpu_command_fence_info *fence_info,
+					   struct kbase_kcpu_command *current_command)
+{
+	struct base_fence fence;
+	struct sync_file *sync_file = NULL;
+	int fd;
+	int ret = 0;
+
+	lockdep_assert_held(&kcpu_queue->lock);
+
+	if (copy_from_user(&fence, u64_to_user_ptr(fence_info->fence), sizeof(fence)))
+		return -EFAULT;
+
+	ret = kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, &fence, &sync_file, &fd);
+	if (ret)
+		return ret;
+
 	if (copy_to_user(u64_to_user_ptr(fence_info->fence), &fence,
 			sizeof(fence))) {
 		ret = -EFAULT;
-		goto fd_flags_fail;
+		goto fail;
 	}
 
 	/* 'sync_file' pointer can't be safely dereferenced once 'fd' is
@@ -1544,21 +1581,34 @@ static int kbase_kcpu_fence_signal_prepare(
 	fd_install(fd, sync_file->file);
 	return 0;
 
-fd_flags_fail:
+fail:
 	fput(sync_file->file);
-file_create_fail:
-	/*
-	 * Upon failure, dma_fence refcount that was increased by
-	 * dma_fence_get() or sync_file_create() needs to be decreased
-	 * to release it.
-	 */
-	dma_fence_put(fence_out);
-
+	kbase_fence_put(current_command->info.fence.fence);
 	current_command->info.fence.fence = NULL;
-	kfree(fence_out);
 
 	return ret;
 }
+
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+				    struct kbase_kcpu_command_fence_info *fence_info)
+{
+	if (!kcpu_queue || !fence_info)
+		return -EINVAL;
+
+	return kbasep_kcpu_fence_signal_process(kcpu_queue, fence_info);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_process);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+				 struct kbase_kcpu_command *current_command,
+				 struct base_fence *fence, struct sync_file **sync_file, int *fd)
+{
+	if (!kcpu_queue || !current_command || !fence || !sync_file || !fd)
+		return -EINVAL;
+
+	return kbasep_kcpu_fence_signal_init(kcpu_queue, current_command, fence, sync_file, fd);
+}
+KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
 #endif /* CONFIG_SYNC_FILE */
 
 static void kcpu_queue_process_worker(struct work_struct *data)
@@ -1595,6 +1645,9 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
 
 		mutex_lock(&queue->lock);
 
+		/* Metadata struct may outlive KCPU queue.  */
+		kbase_kcpu_dma_fence_meta_put(queue->metadata);
+
 		/* Drain the remaining work for this queue first and go past
 		 * all the waits.
 		 */
@@ -1701,8 +1754,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			status = 0;
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 			if (drain_queue) {
-				kbase_kcpu_fence_wait_cancel(queue,
-					&cmd->info.fence);
+				kbasep_kcpu_fence_wait_cancel(queue, &cmd->info.fence);
 			} else {
 				status = kbase_kcpu_fence_wait_process(queue,
 					&cmd->info.fence);
@@ -1732,8 +1784,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			status = 0;
 
 #if IS_ENABLED(CONFIG_SYNC_FILE)
-			status = kbase_kcpu_fence_signal_process(
-				queue, &cmd->info.fence);
+			status = kbasep_kcpu_fence_signal_process(queue, &cmd->info.fence);
 
 			if (status < 0)
 				queue->has_error = true;
@@ -2275,6 +2326,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
 
 	mutex_destroy(&kctx->csf.kcpu_queues.lock);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
 
 int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
 			struct kbase_ioctl_kcpu_queue_delete *del)
@@ -2288,7 +2340,9 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	struct kbase_kcpu_command_queue *queue;
 	int idx;
 	int ret = 0;
-
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+	struct kbase_kcpu_dma_fence_meta *metadata;
+#endif
 	/* The queue id is of u8 type and we use the index of the kcpu_queues
 	 * array as an id, so the number of elements in the array can't be
 	 * more than 256.
@@ -2334,7 +2388,27 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	queue->fence_context = dma_fence_context_alloc(1);
 	queue->fence_seqno = 0;
 	queue->fence_wait_processed = false;
+
+	metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+	if (!metadata) {
+		kfree(queue);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	metadata->kbdev = kctx->kbdev;
+	metadata->kctx_id = kctx->id;
+	snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%d-%d_%d-%lld-kcpu", kctx->kbdev->id,
+		 kctx->tgid, kctx->id, queue->fence_context);
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	atomic_set(&metadata->refcount, 1);
+#else
+	refcount_set(&metadata->refcount, 1);
 #endif
+	queue->metadata = metadata;
+	atomic_inc(&kctx->kbdev->live_fence_metadata);
+#endif /* CONFIG_SYNC_FILE */
 	queue->enqueue_failed = false;
 	queue->command_started = false;
 	INIT_LIST_HEAD(&queue->jit_blocked);
@@ -2360,3 +2434,4 @@ out:
 
 	return ret;
 }
+KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 9848652..bc3cafa 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -22,6 +22,9 @@
 #ifndef _KBASE_CSF_KCPU_H_
 #define _KBASE_CSF_KCPU_H_
 
+#include <mali_kbase_fence.h>
+#include <mali_kbase_sync.h>
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 #include <linux/fence.h>
 #else
@@ -44,8 +47,8 @@ struct kbase_kcpu_command_import_info {
 };
 
 /**
- * struct kbase_kcpu_command_fence_info - Structure which holds information
- *		about the fence object enqueued in the kcpu command queue
+ * struct kbase_kcpu_command_fence_info - Structure which holds information about the
+ *                                        fence object enqueued in the kcpu command queue
  *
  * @fence_cb:      Fence callback
  * @fence:         Fence
@@ -274,6 +277,8 @@ struct kbase_kcpu_command {
  * @jit_blocked:		Used to keep track of command queues blocked
  *				by a pending JIT allocation command.
  * @fence_timeout:		Timer used to detect the fence wait timeout.
+ * @metadata:      Metadata structure containing basic information about this
+ *              queue for any fence objects associated with this queue.
  */
 struct kbase_kcpu_command_queue {
 	struct mutex lock;
@@ -295,6 +300,9 @@ struct kbase_kcpu_command_queue {
 #ifdef CONFIG_MALI_FENCE_DEBUG
 	struct timer_list fence_timeout;
 #endif /* CONFIG_MALI_FENCE_DEBUG */
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+	struct kbase_kcpu_dma_fence_meta *metadata;
+#endif /* CONFIG_SYNC_FILE */
 };
 
 /**
@@ -359,4 +367,14 @@ int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx);
  */
 void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx);
 
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+/* Test wrappers for dma fence operations. */
+int kbase_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
+				    struct kbase_kcpu_command_fence_info *fence_info);
+
+int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
+				 struct kbase_kcpu_command *current_command,
+				 struct base_fence *fence, struct sync_file **sync_file, int *fd);
+#endif /* CONFIG_SYNC_FILE */
+
 #endif /* _KBASE_CSF_KCPU_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index 6dde56c..b133efd 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -229,20 +229,32 @@
 #define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
 #define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
 
-#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
-#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
-#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
-#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
+#define GLB_DEBUG_ARG_IN0 0x0FE0 /* Firmware Debug argument array element 0 */
+#define GLB_DEBUG_ARG_IN1 0x0FE4 /* Firmware Debug argument array element 1 */
+#define GLB_DEBUG_ARG_IN2 0x0FE8 /* Firmware Debug argument array element 2 */
+#define GLB_DEBUG_ARG_IN3 0x0FEC /* Firmware Debug argument array element 3 */
+
+/* Mappings based on GLB_DEBUG_REQ.FWUTF_RUN bit being different from GLB_DEBUG_ACK.FWUTF_RUN */
+#define GLB_DEBUG_FWUTF_DESTROY GLB_DEBUG_ARG_IN0 /* () Test fixture destroy function address */
+#define GLB_DEBUG_FWUTF_TEST GLB_DEBUG_ARG_IN1 /* () Test index */
+#define GLB_DEBUG_FWUTF_FIXTURE GLB_DEBUG_ARG_IN2 /* () Test fixture index */
+#define GLB_DEBUG_FWUTF_CREATE GLB_DEBUG_ARG_IN3 /* () Test fixture create function address */
+
 #define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
 #define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
 
 /* GLB_OUTPUT_BLOCK register offsets */
+#define GLB_DEBUG_ARG_OUT0 0x0FE0 /* Firmware debug result element 0 */
+#define GLB_DEBUG_ARG_OUT1 0x0FE4 /* Firmware debug result element 1 */
+#define GLB_DEBUG_ARG_OUT2 0x0FE8 /* Firmware debug result element 2 */
+#define GLB_DEBUG_ARG_OUT3 0x0FEC /* Firmware debug result element 3 */
+
 #define GLB_ACK 0x0000 /* () Global acknowledge */
 #define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
 #define GLB_HALT_STATUS 0x0010 /* () Global halt status */
 #define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
 #define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
-#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
+#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
 #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
 
 /* USER register offsets */
@@ -1590,4 +1602,43 @@
 	 ((GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SET_MOD(value) << GLB_PRFCNT_SIZE_FIRMWARE_SIZE_SHIFT) &  \
 	  GLB_PRFCNT_SIZE_FIRMWARE_SIZE_MASK))
 
+/* GLB_DEBUG_REQ register */
+#define GLB_DEBUG_REQ_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_REQ_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_GET(reg_val)                                                       \
+	(((reg_val)&GLB_DEBUG_REQ_DEBUG_RUN_MASK) >> GLB_DEBUG_REQ_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_REQ_DEBUG_RUN_SET(reg_val, value)                                                \
+	(((reg_val) & ~GLB_DEBUG_REQ_DEBUG_RUN_MASK) |                                             \
+	 (((value) << GLB_DEBUG_REQ_DEBUG_RUN_SHIFT) & GLB_DEBUG_REQ_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_REQ_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_REQ_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_GET(reg_val)                                                        \
+	(((reg_val)&GLB_DEBUG_REQ_RUN_MODE_MASK) >> GLB_DEBUG_REQ_RUN_MODE_SHIFT)
+#define GLB_DEBUG_REQ_RUN_MODE_SET(reg_val, value)                                                 \
+	(((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) |                                              \
+	 (((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
+
+/* GLB_DEBUG_ACK register */
+#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
+#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_GET(reg_val)                                                       \
+	(((reg_val)&GLB_DEBUG_ACK_DEBUG_RUN_MASK) >> GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
+#define GLB_DEBUG_ACK_DEBUG_RUN_SET(reg_val, value)                                                \
+	(((reg_val) & ~GLB_DEBUG_ACK_DEBUG_RUN_MASK) |                                             \
+	 (((value) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT) & GLB_DEBUG_ACK_DEBUG_RUN_MASK))
+
+#define GLB_DEBUG_ACK_RUN_MODE_SHIFT GPU_U(24)
+#define GLB_DEBUG_ACK_RUN_MODE_MASK (GPU_U(0xFF) << GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_GET(reg_val)                                                        \
+	(((reg_val)&GLB_DEBUG_ACK_RUN_MODE_MASK) >> GLB_DEBUG_ACK_RUN_MODE_SHIFT)
+#define GLB_DEBUG_ACK_RUN_MODE_SET(reg_val, value)                                                 \
+	(((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) |                                              \
+	 (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
+
+/* RUN_MODE values */
+#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
+#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
+/* End of RUN_MODE values */
+
 #endif /* _KBASE_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index cab2ebb..282f7e2 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -553,7 +553,7 @@ void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 			 * updated whilst gpu_idle_worker() is executing.
 			 */
 			scheduler->fast_gpu_idle_handling =
-				(kbdev->csf.gpu_idle_hysteresis_ms == 0) ||
+				(kbdev->csf.gpu_idle_hysteresis_us == 0) ||
 				!kbase_csf_scheduler_all_csgs_idle(kbdev);
 
 			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
@@ -2297,7 +2297,7 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
 	insert_group_to_idle_wait(group);
 }
 
-static void update_offslot_non_idle_cnt_for_faulty_grp(struct kbase_queue_group *group)
+static void update_offslot_non_idle_cnt(struct kbase_queue_group *group)
 {
 	struct kbase_device *kbdev = group->kctx->kbdev;
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -2789,7 +2789,7 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
 }
 
 static void sched_evict_group(struct kbase_queue_group *group, bool fault,
-			      bool update_non_idle_offslot_grps_cnt)
+			      bool update_non_idle_offslot_grps_cnt_from_run_state)
 {
 	struct kbase_context *kctx = group->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -2800,7 +2800,7 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 	if (queue_group_scheduled_locked(group)) {
 		u32 i;
 
-		if (update_non_idle_offslot_grps_cnt &&
+		if (update_non_idle_offslot_grps_cnt_from_run_state &&
 		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
 		     group->run_state == KBASE_CSF_GROUP_RUNNABLE)) {
 			int new_val = atomic_dec_return(
@@ -2815,8 +2815,11 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
 		}
 
 		if (group->prepared_seq_num !=
-				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID)
+				KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID) {
+			if (!update_non_idle_offslot_grps_cnt_from_run_state)
+				update_offslot_non_idle_cnt(group);
 			remove_scheduled_group(kbdev, group);
+		}
 
 		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)
 			remove_group_from_idle_wait(group);
@@ -3222,8 +3225,7 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
 					scheduler->remaining_tick_slots--;
 				}
 			} else {
-				update_offslot_non_idle_cnt_for_faulty_grp(
-					group);
+				update_offslot_non_idle_cnt(group);
 				remove_scheduled_group(kbdev, group);
 			}
 		}
@@ -3413,8 +3415,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				 */
 				clear_bit(i, slot_mask);
 				set_bit(i, scheduler->csgs_events_enable_mask);
-				update_offslot_non_idle_cnt_for_onslot_grp(
-					group);
 			}
 
 			suspend_wait_failed = true;
@@ -3874,11 +3874,16 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				struct kbase_queue_group *const input_grp)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	struct kbase_protected_suspend_buffer *sbuf = &input_grp->protected_suspend_buf;
 	unsigned long flags;
 	bool protm_in_use;
 
 	lockdep_assert_held(&scheduler->lock);
 
+	/* Return early if the physical pages have not been allocated yet */
+	if (unlikely(!sbuf->pma))
+		return;
+
 	/* This lock is taken to prevent the issuing of MMU command during the
 	 * transition to protected mode. This helps avoid the scenario where the
 	 * entry to protected mode happens with a memory region being locked and
@@ -4049,8 +4054,7 @@ static void scheduler_apply(struct kbase_device *kbdev)
 
 			if (!kctx_as_enabled(group->kctx) || group->faulted) {
 				/* Drop the head group and continue */
-				update_offslot_non_idle_cnt_for_faulty_grp(
-					group);
+				update_offslot_non_idle_cnt(group);
 				remove_scheduled_group(kbdev, group);
 				continue;
 			}
@@ -4329,6 +4333,8 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 			set_bit(i, csg_bitmap);
 		} else {
 			group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+						group->run_state);
 		}
 	}
 
@@ -5165,16 +5171,12 @@ redo_local_tock:
 	 * queue jobs.
 	 */
 	if (protm_grp && scheduler->top_grp == protm_grp) {
-		int new_val;
-
 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
 			protm_grp->handle);
-		new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
-		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_NONIDLE_OFFSLOT_GRP_DEC, protm_grp,
-					 new_val);
-
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
+		update_offslot_non_idle_cnt_for_onslot_grp(protm_grp);
+		remove_scheduled_group(kbdev, protm_grp);
 		scheduler_check_pmode_progress(kbdev);
 	} else if (scheduler->top_grp) {
 		if (protm_grp)
@@ -5988,8 +5990,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 
 	mutex_lock(&scheduler->lock);
 
-	if (group->run_state == KBASE_CSF_GROUP_IDLE)
+	if (group->run_state == KBASE_CSF_GROUP_IDLE) {
 		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
+					group->run_state);
+	}
 	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
 		scheduler_group_check_protm_enter(kbdev, group);
@@ -6257,6 +6262,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	int priority;
 	int err;
 
+	kbase_ctx_sched_init_ctx(kctx);
+
 	for (priority = 0; priority < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
 	     ++priority) {
 		INIT_LIST_HEAD(&kctx->csf.sched.runnable_groups[priority]);
@@ -6273,7 +6280,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	if (!kctx->csf.sched.sync_update_wq) {
 		dev_err(kctx->kbdev->dev,
 			"Failed to initialize scheduler context workqueue");
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto alloc_wq_failed;
 	}
 
 	INIT_WORK(&kctx->csf.sched.sync_update_work,
@@ -6286,10 +6294,16 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	if (err) {
 		dev_err(kctx->kbdev->dev,
 			"Failed to register a sync update callback");
-		destroy_workqueue(kctx->csf.sched.sync_update_wq);
+		goto event_wait_add_failed;
 	}
 
 	return err;
+
+event_wait_add_failed:
+	destroy_workqueue(kctx->csf.sched.sync_update_wq);
+alloc_wq_failed:
+	kbase_ctx_sched_remove_ctx(kctx);
+	return err;
 }
 
 void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
@@ -6297,6 +6311,8 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
 	kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
 	cancel_work_sync(&kctx->csf.sched.sync_update_work);
 	destroy_workqueue(kctx->csf.sched.sync_update_wq);
+
+	kbase_ctx_sched_remove_ctx(kctx);
 }
 
 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 909362d..14d8097 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -101,7 +101,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
  * @kctx:  kbase context the chunk belongs to.
  * @chunk: The chunk whose external mappings are going to be removed.
  *
- * This function marks the region as DONT NEED. Along with KBASE_REG_NO_USER_FREE, this indicates
+ * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
  * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
  * parts of kbase outside of tiler heap management should not take references on its physical
  * pages, and should not modify them.
@@ -227,12 +227,14 @@ static void remove_unlinked_chunk(struct kbase_context *kctx,
 	kbase_gpu_vm_lock(kctx);
 	kbase_vunmap(kctx, &chunk->map);
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
-	 * regions), and so we must clear that flag too before freeing
+	 * regions), and so we must clear that flag too before freeing.
+	 * For "no user free", we check that the refcount is 1 as it is a shrinkable region;
+	 * no other code part within kbase can take a reference to it.
 	 */
+	WARN_ON(chunk->region->no_user_free_refcnt > 1);
+	kbase_va_region_no_user_free_put(kctx, chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
-	chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
-#else
-	chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
 	kbase_mem_free_region(kctx, chunk->region);
 	kbase_gpu_vm_unlock(kctx);
@@ -297,7 +299,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 
 	kbase_gpu_vm_lock(kctx);
 
-	/* Some checks done here as KBASE_REG_NO_USER_FREE still allows such things to be made
+	/* Some checks done here as NO_USER_FREE still allows such things to be made
 	 * whilst we had dropped the region lock
 	 */
 	if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
@@ -305,32 +307,45 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 		goto unroll_region;
 	}
 
+	/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
+	 * thread can have the "no user free" refcount increased between kbase_mem_alloc
+	 * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
+	 * remove_external_chunk_mappings (below).
+	 *
+	 * It should be fine and not a security risk if we let the region leak till
+	 * region tracker termination in such a case.
+	 */
+	if (unlikely(chunk->region->no_user_free_refcnt > 1)) {
+		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_refcnt > 1!\n");
+		goto unroll_region;
+	}
+
 	/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
 	 * being requested, it's useful to document in code what those restrictions are, and ensure
 	 * they remain in place in future.
 	 */
 	if (WARN(!chunk->region->gpu_alloc,
-		 "KBASE_REG_NO_USER_FREE chunks should not have had their alloc freed")) {
+		 "NO_USER_FREE chunks should not have had their alloc freed")) {
 		goto unroll_region;
 	}
 
 	if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
-		 "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
 		goto unroll_region;
 	}
 
 	if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
-		 "KBASE_REG_NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
+		 "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
 		goto unroll_region;
 	}
 
 	if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
-		 "KBASE_REG_NO_USER_FREE chunks should not have been made ephemeral")) {
+		 "NO_USER_FREE chunks should not have been made ephemeral")) {
 		goto unroll_region;
 	}
 
 	if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
-		 "KBASE_REG_NO_USER_FREE chunks should not have been aliased")) {
+		 "NO_USER_FREE chunks should not have been aliased")) {
 		goto unroll_region;
 	}
 
@@ -344,16 +359,21 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	remove_external_chunk_mappings(kctx, chunk);
 	kbase_gpu_vm_unlock(kctx);
 
+	/* If page migration is enabled, we don't want to migrate tiler heap pages.
+	 * This does not change if the constituent pages are already marked as isolated.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
+
 	return chunk;
 
 unroll_region:
 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
 	 * regions), and so we must clear that flag too before freeing.
 	 */
+	kbase_va_region_no_user_free_put(kctx, chunk->region);
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
-	chunk->region->flags &= ~(KBASE_REG_NO_USER_FREE | KBASE_REG_DONT_NEED);
-#else
-	chunk->region->flags &= ~KBASE_REG_NO_USER_FREE;
+	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
 #endif
 	kbase_mem_free_region(kctx, chunk->region);
 	kbase_gpu_vm_unlock(kctx);
@@ -511,7 +531,7 @@ static void delete_heap(struct kbase_csf_tiler_heap *heap)
 	if (heap->buf_desc_reg) {
 		kbase_vunmap(kctx, &heap->buf_desc_map);
 		kbase_gpu_vm_lock(kctx);
-		heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 
@@ -629,8 +649,8 @@ static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *co
 		return false;
 	}
 
-	if (!(reg->flags & KBASE_REG_CPU_RD) || (reg->flags & KBASE_REG_DONT_NEED) ||
-	    (reg->flags & KBASE_REG_PF_GROW) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)) {
+	if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
+	    (reg->flags & KBASE_REG_PF_GROW)) {
 		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
 		return false;
 	}
@@ -719,14 +739,17 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 		/* If we don't prevent userspace from unmapping this, we may run into
 		 * use-after-free, as we don't check for the existence of the region throughout.
 		 */
-		buf_desc_reg->flags |= KBASE_REG_NO_USER_FREE;
 
 		heap->buf_desc_va = buf_desc_va;
-		heap->buf_desc_reg = buf_desc_reg;
+		heap->buf_desc_reg = kbase_va_region_no_user_free_get(kctx, buf_desc_reg);
 
 		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
 					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
+
+		if (kbase_page_migration_enabled)
+			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
+
 		kbase_gpu_vm_unlock(kctx);
 
 		if (unlikely(!vmap_ptr)) {
@@ -811,7 +834,7 @@ heap_context_alloc_failed:
 buf_desc_vmap_failed:
 	if (heap->buf_desc_reg) {
 		kbase_gpu_vm_lock(kctx);
-		heap->buf_desc_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		kbase_va_region_no_user_free_put(kctx, heap->buf_desc_reg);
 		kbase_gpu_vm_unlock(kctx);
 	}
 buf_desc_not_suitable:
@@ -866,6 +889,25 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
 	return err;
 }
 
+/**
+ * validate_allocation_request - Check whether the chunk allocation request
+ *                               received on tiler OOM should be handled at
+ *                               current time.
+ *
+ * @heap:               The tiler heap the OOM is associated with
+ * @nr_in_flight:       Number of fragment jobs in flight
+ * @pending_frag_count: Number of pending fragment jobs
+ *
+ * Context: must hold the tiler heap lock to guarantee its lifetime
+ *
+ * Return:
+ * * 0       - allowed to allocate an additional chunk
+ * * -EINVAL - invalid
+ * * -EBUSY  - there are fragment jobs still in flight, which may free chunks
+ *             after completing
+ * * -ENOMEM - the targeted number of in-flight chunks has been reached and
+ *             no new ones will be allocated
+ */
 static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
 				       u32 pending_frag_count)
 {
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
index bcab31d..069e827 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
@@ -346,7 +346,11 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
 	reclaim->batch = HEAP_SHRINKER_BATCH;
 
 #if !defined(CONFIG_MALI_VECTOR_DUMP)
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(reclaim);
+#else
+	register_shrinker(reclaim, "mali-csf-tiler-heap");
+#endif
 #endif
 }
 
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index ddd2fa8..6e7c64b 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -149,9 +149,6 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 
 		dev_dbg(kbdev->dev, "Doorbell mirror interrupt received");
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-#ifdef CONFIG_MALI_DEBUG
-		WARN_ON(!kbase_csf_scheduler_get_nr_active_csgs(kbdev));
-#endif
 		kbase_pm_disable_db_mirror_interrupt(kbdev);
 		kbdev->pm.backend.exit_gpu_sleep_mode = true;
 		kbase_csf_scheduler_invoke_tick(kbdev);
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 5a12b32..4f5ac22 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -321,6 +321,10 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 			"Unable to register OOM notifier for Mali - but will continue\n");
 		kbdev->oom_notifier_block.notifier_call = NULL;
 	}
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	atomic_set(&kbdev->live_fence_metadata, 0);
+#endif
 	return 0;
 
 term_as:
@@ -344,6 +348,11 @@ void kbase_device_misc_term(struct kbase_device *kbdev)
 
 	if (kbdev->oom_notifier_block.notifier_call)
 		unregister_oom_notifier(&kbdev->oom_notifier_block);
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	if (atomic_read(&kbdev->live_fence_metadata) > 0)
+		dev_warn(kbdev->dev, "Terminating Kbase device with live fence metadata!");
+#endif
 }
 
 void kbase_device_free(struct kbase_device *kbdev)
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index a3a0e02..a2ecd08 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -345,7 +345,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
 	/* Update MMU table */
 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
 				     num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
-				     mmu_sync_info);
+				     mmu_sync_info, NULL, false);
 	if (ret)
 		goto mmu_insert_failed;
 
@@ -480,7 +480,7 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 
 		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
 						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
-						 fw_ring_buf->num_pages, MCU_AS_NR));
+						 fw_ring_buf->num_pages, MCU_AS_NR, true));
 
 		vunmap(fw_ring_buf->cpu_dump_base);
 
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
index 43cdf18..21b4e52 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -23,10 +23,13 @@
 #include "mali_kbase.h"
 
 /* MEMSYS counter block offsets */
+#define L2_RD_MSG_IN_CU         (13)
 #define L2_RD_MSG_IN            (16)
 #define L2_WR_MSG_IN            (18)
+#define L2_SNP_MSG_IN           (20)
 #define L2_RD_MSG_OUT           (22)
 #define L2_READ_LOOKUP          (26)
+#define L2_EXT_READ_NOSNP       (30)
 #define L2_EXT_WRITE_NOSNP_FULL (43)
 
 /* SC counter block offsets */
@@ -36,17 +39,23 @@
 #define FULL_QUAD_WARPS         (21)
 #define EXEC_INSTR_FMA          (27)
 #define EXEC_INSTR_CVT          (28)
+#define EXEC_INSTR_SFU          (29)
 #define EXEC_INSTR_MSG          (30)
 #define TEX_FILT_NUM_OPS        (39)
 #define LS_MEM_READ_SHORT       (45)
 #define LS_MEM_WRITE_SHORT      (47)
 #define VARY_SLOT_16            (51)
+#define BEATS_RD_LSC_EXT        (57)
+#define BEATS_RD_TEX            (58)
+#define BEATS_RD_TEX_EXT        (59)
+#define FRAG_QUADS_COARSE       (68)
 
 /* Tiler counter block offsets */
 #define IDVS_POS_SHAD_STALL     (23)
 #define PREFETCH_STALL          (25)
 #define VFETCH_POS_READ_WAIT    (29)
 #define VFETCH_VERTEX_WAIT      (30)
+#define PRIMASSY_STALL          (32)
 #define IDVS_VAR_SHAD_STALL     (38)
 #define ITER_STALL              (40)
 #define PMGR_PTR_RD_STALL       (48)
@@ -111,6 +120,15 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
 	TILER_COUNTER_DEF("vfetch_vertex_wait", -391964, VFETCH_VERTEX_WAIT),
 };
 
+static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttix[] = {
+	TILER_COUNTER_DEF("primassy_stall", 471953, PRIMASSY_STALL),
+	TILER_COUNTER_DEF("idvs_var_shad_stall", -460559, IDVS_VAR_SHAD_STALL),
+
+	MEMSYS_COUNTER_DEF("l2_rd_msg_in_cu", -6189604, L2_RD_MSG_IN_CU),
+	MEMSYS_COUNTER_DEF("l2_snp_msg_in", 6289609, L2_SNP_MSG_IN),
+	MEMSYS_COUNTER_DEF("l2_ext_read_nosnp", 512341, L2_EXT_READ_NOSNP),
+};
+
 /* These tables provide a description of each performance counter
  * used by the shader cores counter model for energy estimation.
  */
@@ -150,6 +168,17 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 	SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
 };
 
+static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttix[] = {
+	SC_COUNTER_DEF("exec_instr_fma", 192642, EXEC_INSTR_FMA),
+	SC_COUNTER_DEF("exec_instr_msg", 1326465, EXEC_INSTR_MSG),
+	SC_COUNTER_DEF("beats_rd_tex", 163518, BEATS_RD_TEX),
+	SC_COUNTER_DEF("beats_rd_lsc_ext", 127475, BEATS_RD_LSC_EXT),
+	SC_COUNTER_DEF("frag_quads_coarse", -36247, FRAG_QUADS_COARSE),
+	SC_COUNTER_DEF("ls_mem_write_short", 51547, LS_MEM_WRITE_SHORT),
+	SC_COUNTER_DEF("beats_rd_tex_ext", -43370, BEATS_RD_TEX_EXT),
+	SC_COUNTER_DEF("exec_instr_sfu", 31583, EXEC_INSTR_SFU),
+};
+
 #define IPA_POWER_MODEL_OPS(gpu, init_token) \
 	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
 		.name = "mali-" #gpu "-power-model", \
@@ -181,13 +210,13 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 #define ALIAS_POWER_MODEL(gpu, as_gpu) \
 	IPA_POWER_MODEL_OPS(gpu, as_gpu)
 
-/* Reference voltage value is 750 mV.
- */
+/* Reference voltage value is 750 mV. */
 STANDARD_POWER_MODEL(todx, 750);
 STANDARD_POWER_MODEL(tgrx, 750);
 STANDARD_POWER_MODEL(tvax, 750);
-
 STANDARD_POWER_MODEL(ttux, 750);
+/* Reference voltage value is 550 mV. */
+STANDARD_POWER_MODEL(ttix, 550);
 
 /* Assuming LODX is an alias of TODX for IPA */
 ALIAS_POWER_MODEL(lodx, todx);
@@ -195,10 +224,14 @@ ALIAS_POWER_MODEL(lodx, todx);
 /* Assuming LTUX is an alias of TTUX for IPA */
 ALIAS_POWER_MODEL(ltux, ttux);
 
+/* Assuming LTUX is an alias of TTUX for IPA */
+ALIAS_POWER_MODEL(ltix, ttix);
+
 static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
 	&kbase_todx_ipa_model_ops, &kbase_lodx_ipa_model_ops,
 	&kbase_tgrx_ipa_model_ops, &kbase_tvax_ipa_model_ops,
-	&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops
+	&kbase_ttux_ipa_model_ops, &kbase_ltux_ipa_model_ops,
+	&kbase_ttix_ipa_model_ops, &kbase_ltix_ipa_model_ops,
 };
 
 const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
@@ -237,6 +270,10 @@ const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 		return "mali-ttux-power-model";
 	case GPU_ID2_PRODUCT_LTUX:
 		return "mali-ltux-power-model";
+	case GPU_ID2_PRODUCT_TTIX:
+		return "mali-ttix-power-model";
+	case GPU_ID2_PRODUCT_LTIX:
+		return "mali-ltix-power-model";
 	default:
 		return NULL;
 	}
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index a360984..35c3828 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -596,7 +596,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tDU
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
@@ -606,8 +605,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tODx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
@@ -617,7 +614,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -626,8 +622,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGRx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -636,7 +630,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -645,8 +638,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -655,7 +646,17 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_TURSEHW_2716,
+	BASE_HW_ISSUE_GPU2019_3901,
+	BASE_HW_ISSUE_GPU2021PRO_290,
+	BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p1[] = {
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_1997,
@@ -667,8 +668,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -679,7 +678,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -690,7 +688,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -701,7 +698,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p1
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3878,
@@ -712,8 +708,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p2
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTIx[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_2716,
@@ -722,7 +716,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTI
 };
 
 __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTIx_r0p0[] = {
-	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_2716,
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 4522d6c..9c867d1 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -663,8 +663,10 @@ static int kbase_open(struct inode *inode, struct file *filp)
 	if (!kbdev)
 		return -ENODEV;
 
-	/* Set address space operation for page migration */
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	/* Set address space operations for page migration */
 	kbase_mem_migrate_set_address_space_ops(kbdev, filp);
+#endif
 
 	/* Device-wide firmware load is moved here from probing to comply with
 	 * Android GKI vendor guideline.
@@ -4578,8 +4580,18 @@ int power_control_init(struct kbase_device *kbdev)
 	 * from completing its initialization.
 	 */
 #if defined(CONFIG_PM_OPP)
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
-	defined(CONFIG_REGULATOR))
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	if (kbdev->nr_regulators > 0) {
+		kbdev->token = dev_pm_opp_set_regulators(kbdev->dev, regulator_names);
+
+		if (kbdev->token < 0) {
+			err = kbdev->token;
+			goto regulators_probe_defer;
+		}
+
+	}
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	if (kbdev->nr_regulators > 0) {
 		kbdev->opp_table = dev_pm_opp_set_regulators(kbdev->dev,
 			regulator_names, BASE_MAX_NR_CLOCKS_REGULATORS);
@@ -4589,7 +4601,8 @@ int power_control_init(struct kbase_device *kbdev)
 			goto regulators_probe_defer;
 		}
 	}
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
 	err = dev_pm_opp_of_add_table(kbdev->dev);
 	CSTD_UNUSED(err);
 #endif /* CONFIG_PM_OPP */
@@ -4624,11 +4637,15 @@ void power_control_term(struct kbase_device *kbdev)
 
 #if defined(CONFIG_PM_OPP)
 	dev_pm_opp_of_remove_table(kbdev->dev);
-#if ((KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) && \
-	defined(CONFIG_REGULATOR))
+#if defined(CONFIG_REGULATOR)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	if (kbdev->token > -EPERM)
+		dev_pm_opp_put_regulators(kbdev->token);
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	if (!IS_ERR_OR_NULL(kbdev->opp_table))
 		dev_pm_opp_put_regulators(kbdev->opp_table);
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+#endif /* CONFIG_REGULATOR */
 #endif /* CONFIG_PM_OPP */
 
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
@@ -5491,6 +5508,11 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 	}
 
 	kbdev->dev = &pdev->dev;
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	kbdev->token = -EPERM;
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
+
 	dev_set_drvdata(kbdev->dev, kbdev);
 #if (KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE)
 	mutex_lock(&kbase_probe_mutex);
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 60afde2..3e58500 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -69,6 +69,12 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev)
 	}
 }
 
+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx)
+{
+	kctx->as_nr = KBASEP_AS_NR_INVALID;
+	atomic_set(&kctx->refcount, 0);
+}
+
 /* kbasep_ctx_sched_find_as_for_ctx - Find a free address space
  *
  * @kbdev: The context for which to find a free address space
@@ -201,9 +207,10 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx)
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
 {
 	struct kbase_device *const kbdev = kctx->kbdev;
+	unsigned long flags;
 
-	lockdep_assert_held(&kbdev->mmu_hw_mutex);
-	lockdep_assert_held(&kbdev->hwaccess_lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	WARN_ON(atomic_read(&kctx->refcount) != 0);
 
@@ -215,6 +222,9 @@ void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx)
 		kbdev->as_to_kctx[kctx->as_nr] = NULL;
 		kctx->as_nr = KBASEP_AS_NR_INVALID;
 	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kbdev->mmu_hw_mutex);
 }
 
 void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h
index f787cc3..5a8d175 100644
--- a/mali_kbase/mali_kbase_ctx_sched.h
+++ b/mali_kbase/mali_kbase_ctx_sched.h
@@ -60,6 +60,15 @@ int kbase_ctx_sched_init(struct kbase_device *kbdev);
 void kbase_ctx_sched_term(struct kbase_device *kbdev);
 
 /**
+ * kbase_ctx_sched_ctx_init - Initialize per-context data fields for scheduling
+ * @kctx: The context to initialize
+ *
+ * This must be called during context initialization before any other context
+ * scheduling functions are called on @kctx
+ */
+void kbase_ctx_sched_init_ctx(struct kbase_context *kctx);
+
+/**
  * kbase_ctx_sched_retain_ctx - Retain a reference to the @ref kbase_context
  * @kctx: The context to which to retain a reference
  *
@@ -113,9 +122,6 @@ void kbase_ctx_sched_release_ctx(struct kbase_context *kctx);
  * This function should be called when a context is being destroyed. The
  * context must no longer have any reference. If it has been assigned an
  * address space before then the AS will be unprogrammed.
- *
- * The kbase_device::mmu_hw_mutex and kbase_device::hwaccess_lock locks must be
- * held whilst calling this function.
  */
 void kbase_ctx_sched_remove_ctx(struct kbase_context *kctx);
 
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 48f8795..722ffc7 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -643,7 +643,6 @@ struct kbase_process {
  * struct kbase_mem_migrate - Object representing an instance for managing
  *                            page migration.
  *
- * @mapping:          Pointer to address space struct used for page migration.
  * @free_pages_list:  List of deferred pages to free. Mostly used when page migration
  *                    is enabled. Pages in memory pool that require migrating
  *                    will be freed instead. However page cannot be freed
@@ -654,13 +653,17 @@ struct kbase_process {
  * @free_pages_workq: Work queue to process the work items queued to free
  *                    pages in @free_pages_list.
  * @free_pages_work:  Work item to free pages in @free_pages_list.
+ * @inode:            Pointer to inode whose address space operations are used
+ *                    for page migration purposes.
  */
 struct kbase_mem_migrate {
-	struct address_space *mapping;
 	struct list_head free_pages_list;
 	spinlock_t free_pages_lock;
 	struct workqueue_struct *free_pages_workq;
 	struct work_struct free_pages_work;
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	struct inode *inode;
+#endif
 };
 
 /**
@@ -701,6 +704,10 @@ struct kbase_mem_migrate {
  * @opp_table:             Pointer to the device OPP structure maintaining the
  *                         link to OPPs attached to a device. This is obtained
  *                         after setting regulator names for the device.
+ * @token:                 Integer replacement for opp_table in kernel versions
+ *                         6 and greater. Value is a token id number when 0 or greater,
+ *                         and a linux errno when negative. Must be initialised
+ *                         to an non-zero value as 0 is valid token id.
  * @devname:               string containing the name used for GPU device instance,
  *                         miscellaneous device is registered using the same name.
  * @id:                    Unique identifier for the device, indicates the number of
@@ -898,6 +905,10 @@ struct kbase_mem_migrate {
  *                         GPU2019-3878. PM state machine is invoked after
  *                         clearing this flag and @hwaccess_lock is used to
  *                         serialize the access.
+ * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction
+ *                         and cleared after the transaction completes. PM L2 state is
+ *                         prevented from entering powering up/down transitions when the
+ *                         flag is set, @hwaccess_lock is used to serialize the access.
  * @poweroff_pending:      Set when power off operation for GPU is started, reset when
  *                         power on for GPU is started.
  * @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -978,6 +989,10 @@ struct kbase_mem_migrate {
  * @oom_notifier_block:     notifier_block containing kernel-registered out-of-
  *                          memory handler.
  * @mem_migrate:            Per device object for managing page migration.
+ * @live_fence_metadata:    Count of live fence metadata structures created by
+ *                          KCPU queue. These structures may outlive kbase module
+ *                          itself. Therefore, in such a case, a warning should be
+ *                          be produced.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1002,9 +1017,11 @@ struct kbase_device {
 #if IS_ENABLED(CONFIG_REGULATOR)
 	struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
 	unsigned int nr_regulators;
-#if (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	int token;
+#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
 	struct opp_table *opp_table;
-#endif /* (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE */
+#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
 #endif /* CONFIG_REGULATOR */
 	char devname[DEVNAME_SIZE];
 	u32  id;
@@ -1173,6 +1190,7 @@ struct kbase_device {
 #if MALI_USE_CSF
 	bool mmu_hw_operation_in_progress;
 #endif
+	bool mmu_page_migrate_in_progress;
 	bool poweroff_pending;
 
 	bool infinite_cache_active_default;
@@ -1261,6 +1279,10 @@ struct kbase_device {
 
 
 	struct kbase_mem_migrate mem_migrate;
+
+#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+	atomic_t live_fence_metadata;
+#endif
 };
 
 /**
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index dfe33e5..25986f6 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -33,6 +33,49 @@
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
 
+#if MALI_USE_CSF
+/* Maximum number of characters in DMA fence timeline name. */
+#define MAX_TIMELINE_NAME (32)
+
+/**
+ * struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
+ *                                    information about KCPU queue. One instance per KCPU
+ *                                    queue.
+ *
+ * @refcount:       Atomic value to keep track of number of references to an instance.
+ *                  An instance can outlive the KCPU queue itself.
+ * @kbdev:          Pointer to Kbase device.
+ * @kctx_id:        Kbase context ID.
+ * @timeline_name:  String of timeline name for associated fence object.
+ */
+struct kbase_kcpu_dma_fence_meta {
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	atomic_t refcount;
+#else
+	refcount_t refcount;
+#endif
+	struct kbase_device *kbdev;
+	int kctx_id;
+	char timeline_name[MAX_TIMELINE_NAME];
+};
+
+/**
+ * struct kbase_kcpu_dma_fence - Structure which extends a dma fence object to include a
+ *                               reference to metadata containing more informaiton about it.
+ *
+ * @base:      Fence object itself.
+ * @metadata:  Pointer to metadata structure.
+ */
+struct kbase_kcpu_dma_fence {
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence base;
+#else
+	struct dma_fence base;
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
+	struct kbase_kcpu_dma_fence_meta *metadata;
+};
+#endif
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 extern const struct fence_ops kbase_fence_ops;
 #else
@@ -167,12 +210,56 @@ static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
  */
 #define kbase_fence_get(fence_info) dma_fence_get((fence_info)->fence)
 
+#if MALI_USE_CSF
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct fence *fence)
+#else
+static inline struct kbase_kcpu_dma_fence *kbase_kcpu_dma_fence_get(struct dma_fence *fence)
+#endif
+{
+	if (fence->ops == &kbase_fence_ops)
+		return (struct kbase_kcpu_dma_fence *)fence;
+
+	return NULL;
+}
+
+static inline void kbase_kcpu_dma_fence_meta_put(struct kbase_kcpu_dma_fence_meta *metadata)
+{
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+	if (atomic_dec_and_test(&metadata->refcount)) {
+#else
+	if (refcount_dec_and_test(&metadata->refcount)) {
+#endif
+		atomic_dec(&metadata->kbdev->live_fence_metadata);
+		kfree(metadata);
+	}
+}
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_kcpu_dma_fence_put(struct fence *fence)
+#else
+static inline void kbase_kcpu_dma_fence_put(struct dma_fence *fence)
+#endif
+{
+	struct kbase_kcpu_dma_fence *kcpu_fence = kbase_kcpu_dma_fence_get(fence);
+
+	if (kcpu_fence)
+		kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+}
+#endif /* MALI_USE_CSF */
+
 /**
  * kbase_fence_put() - Releases a reference to a fence
  * @fence: Fence to release reference for.
  */
-#define kbase_fence_put(fence) dma_fence_put(fence)
-
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+static inline void kbase_fence_put(struct fence *fence)
+#else
+static inline void kbase_fence_put(struct dma_fence *fence)
+#endif
+{
+	dma_fence_put(fence);
+}
 
 #endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
 
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index be14155..dd0b63e 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -21,7 +21,7 @@
 
 #include <linux/atomic.h>
 #include <linux/list.h>
-#include <mali_kbase_fence_defs.h>
+#include <mali_kbase_fence.h>
 #include <mali_kbase.h>
 
 static const char *
@@ -41,7 +41,13 @@ kbase_fence_get_timeline_name(struct fence *fence)
 kbase_fence_get_timeline_name(struct dma_fence *fence)
 #endif
 {
+#if MALI_USE_CSF
+	struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+	return kcpu_fence->metadata->timeline_name;
+#else
 	return kbase_timeline_name;
+#endif /* MALI_USE_CSF */
 }
 
 static bool
@@ -68,18 +74,36 @@ kbase_fence_fence_value_str(struct dma_fence *fence, char *str, int size)
 #endif
 }
 
+#if MALI_USE_CSF
+static void
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+kbase_fence_release(struct fence *fence)
+#else
+kbase_fence_release(struct dma_fence *fence)
+#endif
+{
+	struct kbase_kcpu_dma_fence *kcpu_fence = (struct kbase_kcpu_dma_fence *)fence;
+
+	kbase_kcpu_dma_fence_meta_put(kcpu_fence->metadata);
+	kfree(kcpu_fence);
+}
+#endif
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 extern const struct fence_ops kbase_fence_ops; /* silence checker warning */
-const struct fence_ops kbase_fence_ops = {
-	.wait = fence_default_wait,
+const struct fence_ops kbase_fence_ops = { .wait = fence_default_wait,
 #else
 extern const struct dma_fence_ops kbase_fence_ops; /* silence checker warning */
-const struct dma_fence_ops kbase_fence_ops = {
-	.wait = dma_fence_default_wait,
+const struct dma_fence_ops kbase_fence_ops = { .wait = dma_fence_default_wait,
+#endif
+					   .get_driver_name = kbase_fence_get_driver_name,
+					   .get_timeline_name = kbase_fence_get_timeline_name,
+					   .enable_signaling = kbase_fence_enable_signaling,
+#if MALI_USE_CSF
+					   .fence_value_str = kbase_fence_fence_value_str,
+					   .release = kbase_fence_release
+#else
+					    .fence_value_str = kbase_fence_fence_value_str
 #endif
-	.get_driver_name = kbase_fence_get_driver_name,
-	.get_timeline_name = kbase_fence_get_timeline_name,
-	.enable_signaling = kbase_fence_enable_signaling,
-	.fence_value_str = kbase_fence_fence_value_str
 };
-
+KBASE_EXPORT_TEST_API(kbase_fence_ops);
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index b6a8a2e..bb079c2 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -235,6 +235,7 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 
 		{ GPU_ID2_PRODUCT_TTUX,
 		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(0, 1, 0), base_hw_issues_tTUx_r0p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { GPU_ID2_VERSION_MAKE(1, 1, 0), base_hw_issues_tTUx_r1p1 },
 		    { GPU_ID2_VERSION_MAKE(1, 2, 0), base_hw_issues_tTUx_r1p2 },
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 86d311a..491bc06 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -645,6 +645,8 @@ int kbasep_js_kctx_init(struct kbase_context *const kctx)
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
+	kbase_ctx_sched_init_ctx(kctx);
+
 	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
 		INIT_LIST_HEAD(&kctx->jctx.sched_info.ctx.ctx_list_entry[i]);
 
@@ -722,6 +724,8 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 		kbase_backend_ctx_count_changed(kbdev);
 		mutex_unlock(&kbdev->js_data.runpool_mutex);
 	}
+
+	kbase_ctx_sched_remove_ctx(kctx);
 }
 
 /*
@@ -4030,4 +4034,3 @@ base_jd_prio kbase_js_priority_check(struct kbase_device *kbdev, base_jd_prio pr
 									    req_priority);
 	return kbasep_js_sched_prio_to_atom_prio(kbdev, out_priority);
 }
-
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index f67e00c..ca74540 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -48,6 +48,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/wait.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* Define static_assert().
  *
  * The macro was introduced in kernel 5.1. But older vendor kernels may define
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index 7aa0ce9..ef9d224 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -39,6 +39,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/workqueue.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* The minimum allowed interval between dumps, in nanoseconds
  * (equivalent to 10KHz)
  */
@@ -128,6 +133,34 @@ struct kbase_kinstr_prfcnt_async {
 };
 
 /**
+ * enum kbase_kinstr_prfcnt_client_init_state - A list of
+ *                                              initialisation states that the
+ *                                              kinstr_prfcnt client can be at
+ *                                              during initialisation. Useful
+ *                                              for terminating a partially
+ *                                              initialised client.
+ *
+ * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised
+ * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session
+ * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map
+ * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer
+ * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array
+ * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client
+ * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue
+ * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised
+ */
+enum kbase_kinstr_prfcnt_client_init_state {
+	KINSTR_PRFCNT_UNINITIALISED,
+	KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED,
+	KINSTR_PRFCNT_ENABLE_MAP,
+	KINSTR_PRFCNT_DUMP_BUFFER,
+	KINSTR_PRFCNT_SAMPLE_ARRAY,
+	KINSTR_PRFCNT_VIRTUALIZER_CLIENT,
+	KINSTR_PRFCNT_WAITQ_MUTEX,
+	KINSTR_PRFCNT_INITIALISED
+};
+
+/**
  * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached
  *                                     to a kinstr_prfcnt context.
  * @kinstr_ctx:           kinstr_prfcnt context client is attached to.
@@ -1163,19 +1196,46 @@ static void kbasep_kinstr_prfcnt_sample_array_free(
 	memset(sample_arr, 0, sizeof(*sample_arr));
 }
 
-void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+static void
+kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli,
+					    enum kbase_kinstr_prfcnt_client_init_state init_state)
 {
 	if (!cli)
 		return;
 
-	kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
-	kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
-	kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
-	kbase_hwcnt_enable_map_free(&cli->enable_map);
-	mutex_destroy(&cli->cmd_sync_lock);
+	while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) {
+		switch (init_state) {
+		case KINSTR_PRFCNT_INITIALISED:
+			/* This shouldn't be reached */
+			break;
+		case KINSTR_PRFCNT_WAITQ_MUTEX:
+			mutex_destroy(&cli->cmd_sync_lock);
+			break;
+		case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+			kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
+			break;
+		case KINSTR_PRFCNT_SAMPLE_ARRAY:
+			kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
+			break;
+		case KINSTR_PRFCNT_DUMP_BUFFER:
+			kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
+			break;
+		case KINSTR_PRFCNT_ENABLE_MAP:
+			kbase_hwcnt_enable_map_free(&cli->enable_map);
+			break;
+		case KINSTR_PRFCNT_PARSE_SETUP:
+			/* Nothing to do here */
+			break;
+		}
+	}
 	kfree(cli);
 }
 
+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+{
+	kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED);
+}
+
 /**
  * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release.
  * @inode: Non-NULL pointer to inode structure.
@@ -1790,6 +1850,7 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
 {
 	int err;
 	struct kbase_kinstr_prfcnt_client *cli;
+	enum kbase_kinstr_prfcnt_client_init_state init_state;
 
 	WARN_ON(!kinstr_ctx);
 	WARN_ON(!setup);
@@ -1800,73 +1861,86 @@ int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinst
 	if (!cli)
 		return -ENOMEM;
 
-	cli->kinstr_ctx = kinstr_ctx;
-	err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr);
-
-	if (err < 0)
-		goto error;
+	for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED;
+	     init_state++) {
+		err = 0;
+		switch (init_state) {
+		case KINSTR_PRFCNT_PARSE_SETUP:
+			cli->kinstr_ctx = kinstr_ctx;
+			err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config,
+							       req_arr);
 
-	cli->config.buffer_count = MAX_BUFFER_COUNT;
-	cli->dump_interval_ns = cli->config.period_ns;
-	cli->next_dump_time_ns = 0;
-	cli->active = false;
-	atomic_set(&cli->write_idx, 0);
-	atomic_set(&cli->read_idx, 0);
-	atomic_set(&cli->fetch_idx, 0);
+			break;
 
-	err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
-					   &cli->enable_map);
+		case KINSTR_PRFCNT_ENABLE_MAP:
+			cli->config.buffer_count = MAX_BUFFER_COUNT;
+			cli->dump_interval_ns = cli->config.period_ns;
+			cli->next_dump_time_ns = 0;
+			cli->active = false;
+			atomic_set(&cli->write_idx, 0);
+			atomic_set(&cli->read_idx, 0);
+			atomic_set(&cli->fetch_idx, 0);
 
-	if (err < 0)
-		goto error;
+			err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map);
+			break;
 
-	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em);
+		case KINSTR_PRFCNT_DUMP_BUFFER:
+			kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+								 &cli->config.phys_em);
 
-	cli->sample_count = cli->config.buffer_count;
-	atomic_set(&cli->sync_sample_count, cli->sample_count);
-	cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
+			cli->sample_count = cli->config.buffer_count;
+			atomic_set(&cli->sync_sample_count, cli->sample_count);
+			cli->sample_size =
+				kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
 
-	/* Use virtualizer's metadata to alloc tmp buffer which interacts with
-	 * the HWC virtualizer.
-	 */
-	err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata,
-					    &cli->tmp_buf);
+			/* Use virtualizer's metadata to alloc tmp buffer which interacts with
+			 * the HWC virtualizer.
+			 */
+			err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf);
+			break;
 
-	if (err < 0)
-		goto error;
+		case KINSTR_PRFCNT_SAMPLE_ARRAY:
+			/* Disable clock map in setup, and enable clock map when start */
+			cli->enable_map.clk_enable_map = 0;
 
-	/* Disable clock map in setup, and enable clock map when start */
-	cli->enable_map.clk_enable_map = 0;
+			/* Use metadata from virtualizer to allocate dump buffers  if
+			 * kinstr_prfcnt doesn't have the truncated metadata.
+			 */
+			err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
 
-	/* Use metadata from virtualizer to allocate dump buffers  if
-	 * kinstr_prfcnt doesn't have the truncated metadata.
-	 */
-	err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
+			break;
 
-	if (err < 0)
-		goto error;
+		case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
+			/* Set enable map to be 0 to prevent virtualizer to init and kick the
+			 * backend to count.
+			 */
+			kbase_hwcnt_gpu_enable_map_from_physical(
+				&cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 });
 
-	/* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */
-	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
-						 &(struct kbase_hwcnt_physical_enable_map){ 0 });
+			err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt,
+								    &cli->enable_map, &cli->hvcli);
+			break;
 
-	err = kbase_hwcnt_virtualizer_client_create(
-		kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
+		case KINSTR_PRFCNT_WAITQ_MUTEX:
+			init_waitqueue_head(&cli->waitq);
+			INIT_WORK(&cli->async.dump_work, kbasep_kinstr_prfcnt_async_dump_worker);
+			mutex_init(&cli->cmd_sync_lock);
+			break;
 
-	if (err < 0)
-		goto error;
+		case KINSTR_PRFCNT_INITIALISED:
+			/* This shouldn't be reached */
+			break;
+		}
 
-	init_waitqueue_head(&cli->waitq);
-	INIT_WORK(&cli->async.dump_work,
-		  kbasep_kinstr_prfcnt_async_dump_worker);
-	mutex_init(&cli->cmd_sync_lock);
+		if (err < 0) {
+			kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state);
+			return err;
+		}
+	}
 	*out_vcli = cli;
 
 	return 0;
 
-error:
-	kbasep_kinstr_prfcnt_client_destroy(cli);
-	return err;
 }
 
 static size_t kbasep_kinstr_prfcnt_get_block_info_count(
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 3743b4d..abd01c1 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -803,6 +803,40 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 }
 #endif /* MALI_USE_CSF */
 
+static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+{
+	struct kbase_context *kctx = NULL;
+	struct rb_root *rbtree = reg->rbtree;
+
+	switch (reg->flags & KBASE_REG_ZONE_MASK) {
+	case KBASE_REG_ZONE_CUSTOM_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
+		break;
+	case KBASE_REG_ZONE_SAME_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
+		break;
+	case KBASE_REG_ZONE_EXEC_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
+		break;
+#if MALI_USE_CSF
+	case KBASE_REG_ZONE_EXEC_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
+		break;
+	case KBASE_REG_ZONE_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
+		break;
+	case KBASE_REG_ZONE_MCU_SHARED:
+		/* This is only expected to be called on driver unload. */
+		break;
+#endif
+	default:
+		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
+		break;
+	}
+
+	return kctx;
+}
+
 static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 {
 	struct rb_node *rbnode;
@@ -814,6 +848,8 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 			rb_erase(rbnode, rbtree);
 			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
 			WARN_ON(reg->va_refcnt != 1);
+			if (kbase_page_migration_enabled)
+				kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
 			/* Reset the start_pfn - as the rbtree is being
 			 * destroyed and we've already erased this region, there
 			 * is no further need to attempt to remove it.
@@ -830,6 +866,10 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 
 void kbase_region_tracker_term(struct kbase_context *kctx)
 {
+	WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+	     kctx->tgid, kctx->id);
+
 	kbase_gpu_vm_lock(kctx);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
@@ -1554,6 +1594,7 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 		return NULL;
 
 	new_reg->va_refcnt = 1;
+	new_reg->no_user_free_refcnt = 0;
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->rbtree = rbtree;
@@ -1572,41 +1613,6 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 
 KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
 
-static struct kbase_context *kbase_reg_flags_to_kctx(
-		struct kbase_va_region *reg)
-{
-	struct kbase_context *kctx = NULL;
-	struct rb_root *rbtree = reg->rbtree;
-
-	switch (reg->flags & KBASE_REG_ZONE_MASK) {
-	case KBASE_REG_ZONE_CUSTOM_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_custom);
-		break;
-	case KBASE_REG_ZONE_SAME_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_same);
-		break;
-	case KBASE_REG_ZONE_EXEC_VA:
-		kctx = container_of(rbtree, struct kbase_context,
-				reg_rbtree_exec);
-		break;
-#if MALI_USE_CSF
-	case KBASE_REG_ZONE_EXEC_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
-		break;
-	case KBASE_REG_ZONE_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
-		break;
-#endif
-	default:
-		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
-		break;
-	}
-
-	return kctx;
-}
-
 /**
  * kbase_free_alloced_region - Free a region object.
  *
@@ -1720,6 +1726,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	unsigned long gwt_mask = ~0;
 	int group_id;
 	struct kbase_mem_phy_alloc *alloc;
+	bool ignore_page_migration = false;
 
 #ifdef CONFIG_MALI_CINSTR_GWT
 	if (kctx->gwt_enabled)
@@ -1749,15 +1756,12 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		for (i = 0; i < alloc->imported.alias.nents; i++) {
 			if (alloc->imported.alias.aliased[i].alloc) {
 				err = kbase_mmu_insert_pages(
-					kctx->kbdev, &kctx->mmu,
-					reg->start_pfn + (i * stride),
-					alloc->imported.alias.aliased[i]
-							.alloc->pages +
-						alloc->imported.alias.aliased[i]
-							.offset,
+					kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+					alloc->imported.alias.aliased[i].alloc->pages +
+						alloc->imported.alias.aliased[i].offset,
 					alloc->imported.alias.aliased[i].length,
-					reg->flags & gwt_mask, kctx->as_nr,
-					group_id, mmu_sync_info);
+					reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
+					NULL, ignore_page_migration);
 				if (err)
 					goto bad_insert;
 
@@ -1777,12 +1781,15 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 			}
 		}
 	} else {
-		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
-					     reg->start_pfn,
+		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
+		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF)
+			ignore_page_migration = true;
+
+		err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 					     kbase_get_gpu_phy_pages(reg),
 					     kbase_reg_current_backed_size(reg),
-					     reg->flags & gwt_mask, kctx->as_nr,
-					     group_id, mmu_sync_info);
+					     reg->flags & gwt_mask, kctx->as_nr, group_id,
+					     mmu_sync_info, reg, ignore_page_migration);
 		if (err)
 			goto bad_insert;
 		kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1816,7 +1823,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 
 bad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 reg->nr_pages, kctx->as_nr);
+				 reg->nr_pages, kctx->as_nr, ignore_page_migration);
 
 	kbase_remove_va_region(kctx->kbdev, reg);
 
@@ -1845,7 +1852,6 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 	switch (alloc->type) {
 	case KBASE_MEM_TYPE_ALIAS: {
 			size_t i = 0;
-
 			/* Due to the way the number of valid PTEs and ATEs are tracked
 			 * currently, only the GPU virtual range that is backed & mapped
 			 * should be passed to the kbase_mmu_teardown_pages() function,
@@ -1853,27 +1859,37 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 			 * separately.
 			 */
 			for (i = 0; i < alloc->imported.alias.nents; i++) {
-				if (alloc->imported.alias.aliased[i].alloc) {
-					int err_loop = kbase_mmu_teardown_pages(
-						kctx->kbdev, &kctx->mmu,
-						reg->start_pfn + (i * alloc->imported.alias.stride),
-						alloc->pages + (i * alloc->imported.alias.stride),
-						alloc->imported.alias.aliased[i].length,
-						kctx->as_nr);
-					if (WARN_ON_ONCE(err_loop))
-						err = err_loop;
-				}
+				struct tagged_addr *phys_alloc = NULL;
+				int err_loop;
+
+				if (alloc->imported.alias.aliased[i].alloc != NULL)
+					phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+						     alloc->imported.alias.aliased[i].offset;
+
+				err_loop = kbase_mmu_teardown_pages(
+					kctx->kbdev, &kctx->mmu,
+					reg->start_pfn + (i * alloc->imported.alias.stride),
+					phys_alloc, alloc->imported.alias.aliased[i].length,
+					kctx->as_nr, false);
+
+				if (WARN_ON_ONCE(err_loop))
+					err = err_loop;
 			}
 		}
 		break;
 	case KBASE_MEM_TYPE_IMPORTED_UMM:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr);
+					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
+		break;
+	case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
+		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+					       alloc->pages, kbase_reg_current_backed_size(reg),
+					       kctx->as_nr, true);
 		break;
 	default:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 					       alloc->pages, kbase_reg_current_backed_size(reg),
-					       kctx->as_nr);
+					       kctx->as_nr, false);
 		break;
 	}
 
@@ -2197,7 +2213,7 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 		__func__, (void *)reg, (void *)kctx);
 	lockdep_assert_held(&kctx->reg_lock);
 
-	if (reg->flags & KBASE_REG_NO_USER_FREE) {
+	if (kbase_va_region_is_no_user_free(kctx, reg)) {
 		dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
 		return -EINVAL;
 	}
@@ -2416,8 +2432,11 @@ int kbase_update_region_flags(struct kbase_context *kctx,
 	if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
 		reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
 
-	if (flags & BASEP_MEM_NO_USER_FREE)
-		reg->flags |= KBASE_REG_NO_USER_FREE;
+	if (flags & BASEP_MEM_NO_USER_FREE) {
+		kbase_gpu_vm_lock(kctx);
+		kbase_va_region_no_user_free_get(kctx, reg);
+		kbase_gpu_vm_unlock(kctx);
+	}
 
 	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
 		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
@@ -3206,9 +3225,32 @@ out_rollback:
 out_term:
 	return -1;
 }
-
 KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
 
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+				     enum kbase_page_status status)
+{
+	u32 i = 0;
+
+	for (; i < alloc->nents; i++) {
+		struct tagged_addr phys = alloc->pages[i];
+		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(phys) || is_partial(phys))
+			continue;
+
+		if (!page_md)
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
 bool kbase_check_alloc_flags(unsigned long flags)
 {
 	/* Only known input flags should be set. */
@@ -3766,7 +3808,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
 		mutex_unlock(&kctx->jit_evict_lock);
 
 		kbase_gpu_vm_lock(kctx);
-		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(reg->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, reg);
 		kbase_mem_free_region(kctx, reg);
 		kbase_gpu_vm_unlock(kctx);
 	} while (1);
@@ -4419,7 +4469,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		if (ret < 0) {
 			/*
 			 * An update to an allocation from the pool failed,
-			 * chances are slim a new allocation would fair any
+			 * chances are slim a new allocation would fare any
 			 * better so return the allocation to the pool and
 			 * return the function with failure.
 			 */
@@ -4441,6 +4491,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			mutex_unlock(&kctx->jit_evict_lock);
 			reg = NULL;
 			goto end;
+		} else {
+			/* A suitable JIT allocation existed on the evict list, so we need
+			 * to make sure that the NOT_MOVABLE property is cleared.
+			 */
+			if (kbase_page_migration_enabled) {
+				kbase_gpu_vm_lock(kctx);
+				mutex_lock(&kctx->jit_evict_lock);
+				kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+				mutex_unlock(&kctx->jit_evict_lock);
+				kbase_gpu_vm_unlock(kctx);
+			}
 		}
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
@@ -4497,6 +4558,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		}
 	}
 
+	/* Similarly to tiler heap init, there is a short window of time
+	 * where the (either recycled or newly allocated, in our case) region has
+	 * "no user free" refcount incremented but is still missing the DONT_NEED flag, and
+	 * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
+	 * allocation is the least bad option that doesn't lead to a security issue down the
+	 * line (it will eventually be cleaned up during context termination).
+	 *
+	 * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
+	 * flags.
+	 */
+	kbase_gpu_vm_lock(kctx);
+	if (unlikely(reg->no_user_free_refcnt > 1)) {
+		kbase_gpu_vm_unlock(kctx);
+		dev_err(kctx->kbdev->dev, "JIT region has no_user_free_refcnt > 1!\n");
+
+		mutex_lock(&kctx->jit_evict_lock);
+		list_move(&reg->jit_node, &kctx->jit_pool_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+
+		reg = NULL;
+		goto end;
+	}
+
 	trace_mali_jit_alloc(reg, info->id);
 
 	kctx->jit_current_allocations++;
@@ -4514,6 +4598,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 	kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
 			KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+	kbase_gpu_vm_unlock(kctx);
 
 end:
 	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
@@ -4584,6 +4669,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 	list_move(&reg->jit_node, &kctx->jit_pool_head);
 
+	/* Inactive JIT regions should be freed by the shrinker and not impacted
+	 * by page migration. Once freed, they will enter into the page migration
+	 * state machine via the mempools.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
 	mutex_unlock(&kctx->jit_evict_lock);
 }
 
@@ -4630,7 +4721,14 @@ bool kbase_jit_evict(struct kbase_context *kctx)
 	mutex_unlock(&kctx->jit_evict_lock);
 
 	if (reg) {
-		reg->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(reg->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, reg);
 		kbase_mem_free_region(kctx, reg);
 	}
 
@@ -4652,7 +4750,14 @@ void kbase_jit_term(struct kbase_context *kctx)
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(walker->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4664,7 +4769,14 @@ void kbase_jit_term(struct kbase_context *kctx)
 		list_del(&walker->jit_node);
 		list_del_init(&walker->gpu_alloc->evict_node);
 		mutex_unlock(&kctx->jit_evict_lock);
-		walker->flags &= ~KBASE_REG_NO_USER_FREE;
+		/*
+		 * Incrementing the refcount is prevented on JIT regions.
+		 * If/when this ever changes we would need to compensate
+		 * by implementing "free on putting the last reference",
+		 * but only for JIT regions.
+		 */
+		WARN_ON(walker->no_user_free_refcnt > 1);
+		kbase_va_region_no_user_free_put(kctx, walker);
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
@@ -4922,10 +5034,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-				     pa, kbase_reg_current_backed_size(reg),
-				     reg->flags & gwt_mask, kctx->as_nr,
-				     alloc->group_id, mmu_sync_info);
+	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
+				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
 	if (err == 0)
 		return 0;
 
@@ -5113,7 +5224,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 				kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 							 alloc->pages,
 							 kbase_reg_current_backed_size(reg),
-							 kctx->as_nr);
+							 kctx->as_nr, true);
 			}
 
 			if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 7e791b3..f727538 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -193,10 +193,11 @@ struct kbase_mem_phy_alloc {
  * @SPILL_IN_PROGRESS: Transitory state. Corner case where pages in a memory
  *                     pool of a dying context are being moved to the device
  *                     memory pool.
+ * @NOT_MOVABLE: Stable state. Page has been allocated for an object that is
+ *               not movable, but may return to be movable when the object
+ *               is freed.
  * @ALLOCATED_MAPPED: Stable state. Page has been allocated, mapped to GPU
  *                    and has reference to kbase_mem_phy_alloc object.
- * @MULTI_MAPPED: Stable state. This state is used to manage all use cases
- *                where a page may have "unusual" mappings.
  * @PT_MAPPED: Stable state. Similar to ALLOCATED_MAPPED, but page doesn't
  *             reference kbase_mem_phy_alloc object. Used as a page in MMU
  *             page table.
@@ -205,9 +206,11 @@ struct kbase_mem_phy_alloc {
  *                    unmapping it. This status means that a memory release is
  *                    happening and it's still not complete.
  * @FREE_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
- *                             A page is isolated while it is in ALLOCATED_MAPPED or
- *                             PT_MAPPED state, but then the driver tries to destroy the
- *                             allocation.
+ *                             A page is isolated while it is in ALLOCATED_MAPPED state,
+ *                             but then the driver tries to destroy the allocation.
+ * @FREE_PT_ISOLATED_IN_PROGRESS: Transitory state. This is a very particular corner case.
+ *                                A page is isolated while it is in PT_MAPPED state, but
+ *                                then the driver tries to destroy the allocation.
  *
  * Pages can only be migrated in stable states.
  */
@@ -215,23 +218,32 @@ enum kbase_page_status {
 	MEM_POOL = 0,
 	ALLOCATE_IN_PROGRESS,
 	SPILL_IN_PROGRESS,
+	NOT_MOVABLE,
 	ALLOCATED_MAPPED,
-	MULTI_MAPPED,
 	PT_MAPPED,
 	FREE_IN_PROGRESS,
 	FREE_ISOLATED_IN_PROGRESS,
+	FREE_PT_ISOLATED_IN_PROGRESS,
 };
 
+#define PGD_VPFN_LEVEL_MASK ((u64)0x3)
+#define PGD_VPFN_LEVEL_GET_LEVEL(pgd_vpfn_level) (pgd_vpfn_level & PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_GET_VPFN(pgd_vpfn_level) (pgd_vpfn_level & ~PGD_VPFN_LEVEL_MASK)
+#define PGD_VPFN_LEVEL_SET(pgd_vpfn, level)                                                        \
+	((pgd_vpfn & ~PGD_VPFN_LEVEL_MASK) | (level & PGD_VPFN_LEVEL_MASK))
+
 /**
  * struct kbase_page_metadata - Metadata for each page in kbase
  *
  * @kbdev:         Pointer to kbase device.
  * @dma_addr:      DMA address mapped to page.
  * @migrate_lock:  A spinlock to protect the private metadata.
+ * @data:          Member in union valid based on @status.
  * @status:        Status to keep track if page can be migrated at any
  *                 given moment. MSB will indicate if page is isolated.
  *                 Protected by @migrate_lock.
- * @data:          Member in union valid based on @status.
+ * @vmap_count:    Counter of kernel mappings.
+ * @group_id:      Memory group ID obtained at the time of page allocation.
  *
  * Each 4KB page will have a reference to this struct in the private field.
  * This will be used to keep track of information required for Linux page
@@ -240,7 +252,6 @@ enum kbase_page_status {
 struct kbase_page_metadata {
 	dma_addr_t dma_addr;
 	spinlock_t migrate_lock;
-	u8 status;
 
 	union {
 		struct {
@@ -251,19 +262,25 @@ struct kbase_page_metadata {
 			struct kbase_device *kbdev;
 		} mem_pool;
 		struct {
-			struct kbase_mem_phy_alloc *phy_alloc;
 			struct kbase_va_region *reg;
 			struct kbase_mmu_table *mmut;
-			struct page *pgd;
 			u64 vpfn;
-			size_t page_array_index;
 		} mapped;
 		struct {
 			struct kbase_mmu_table *mmut;
-			struct page *pgd;
-			u16 entry_info;
+			u64 pgd_vpfn_level;
 		} pt_mapped;
+		struct {
+			struct kbase_device *kbdev;
+		} free_isolated;
+		struct {
+			struct kbase_device *kbdev;
+		} free_pt_isolated;
 	} data;
+
+	u8 status;
+	u8 vmap_count;
+	u8 group_id;
 };
 
 /* The top bit of kbase_alloc_import_user_buf::current_mapping_usage_count is
@@ -288,6 +305,20 @@ enum kbase_jit_report_flags {
 	KBASE_JIT_REPORT_ON_ALLOC_OR_FREE = (1u << 0)
 };
 
+/**
+ * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
+ *                                   physical allocation.
+ * @alloc:  the physical allocation containing the pages whose metadata is going
+ *          to be modified
+ * @status: the status the pages should end up in
+ *
+ * Note that this function does not go through all of the checking to ensure that
+ * proper states are set. Instead, it is only used when we change the allocation
+ * to NOT_MOVABLE or from NOT_MOVABLE to ALLOCATED_MAPPED
+ */
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+				     enum kbase_page_status status);
+
 static inline void kbase_mem_phy_alloc_gpu_mapped(struct kbase_mem_phy_alloc *alloc)
 {
 	KBASE_DEBUG_ASSERT(alloc);
@@ -388,6 +419,8 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @jit_usage_id: The last just-in-time memory usage ID for this region.
  * @jit_bin_id:   The just-in-time memory bin this region came from.
  * @va_refcnt:    Number of users of this region. Protected by reg_lock.
+ * @no_user_free_refcnt:    Number of users that want to prevent the region from
+ *                          being freed by userspace.
  * @heap_info_gpu_addr: Pointer to an object in GPU memory defining an end of
  *                      an allocated region
  *                      The object can be one of:
@@ -508,10 +541,7 @@ struct kbase_va_region {
 #define KBASE_REG_RESERVED_BIT_23   (1ul << 23)
 #endif /* !MALI_USE_CSF */
 
-/* Whilst this flag is set the GPU allocation is not supposed to be freed by
- * user space. The flag will remain set for the lifetime of JIT allocations.
- */
-#define KBASE_REG_NO_USER_FREE      (1ul << 24)
+/* Bit 24 is currently unused and is available for use for a new flag */
 
 /* Memory has permanent kernel side mapping */
 #define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
@@ -652,6 +682,7 @@ struct kbase_va_region {
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	int    va_refcnt;
+	int no_user_free_refcnt;
 };
 
 /**
@@ -694,6 +725,23 @@ static inline bool kbase_is_region_invalid_or_free(struct kbase_va_region *reg)
 	return (kbase_is_region_invalid(reg) ||	kbase_is_region_free(reg));
 }
 
+/**
+ * kbase_is_region_shrinkable - Check if a region is "shrinkable".
+ * A shrinkable regions is a region for which its backing pages (reg->gpu_alloc->pages)
+ * can be freed at any point, even though the kbase_va_region structure itself
+ * may have been refcounted.
+ * Regions that aren't on a shrinker, but could be shrunk at any point in future
+ * without warning are still considered "shrinkable" (e.g. Active JIT allocs)
+ *
+ * @reg: Pointer to region
+ *
+ * Return: true if the region is "shrinkable", false if not.
+ */
+static inline bool kbase_is_region_shrinkable(struct kbase_va_region *reg)
+{
+	return (reg->flags & KBASE_REG_DONT_NEED) || (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC);
+}
+
 void kbase_remove_va_region(struct kbase_device *kbdev,
 			    struct kbase_va_region *reg);
 static inline void kbase_region_refcnt_free(struct kbase_device *kbdev,
@@ -714,6 +762,7 @@ static inline struct kbase_va_region *kbase_va_region_alloc_get(
 	lockdep_assert_held(&kctx->reg_lock);
 
 	WARN_ON(!region->va_refcnt);
+	WARN_ON(region->va_refcnt == INT_MAX);
 
 	/* non-atomic as kctx->reg_lock is held */
 	dev_dbg(kctx->kbdev->dev, "va_refcnt %d before get %pK\n",
@@ -741,6 +790,69 @@ static inline struct kbase_va_region *kbase_va_region_alloc_put(
 	return NULL;
 }
 
+/**
+ * kbase_va_region_is_no_user_free - Check if user free is forbidden for the region.
+ * A region that must not be freed by userspace indicates that it is owned by some other
+ * kbase subsystem, for example tiler heaps, JIT memory or CSF queues.
+ * Such regions must not be shrunk (i.e. have their backing pages freed), except by the
+ * current owner.
+ * Hence, callers cannot rely on this check alone to determine if a region might be shrunk
+ * by any part of kbase. Instead they should use kbase_is_region_shrinkable().
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region.
+ *
+ * Return: true if userspace cannot free the region, false if userspace can free the region.
+ */
+static inline bool kbase_va_region_is_no_user_free(struct kbase_context *kctx,
+						   struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+	return region->no_user_free_refcnt > 0;
+}
+
+/**
+ * kbase_va_region_no_user_free_get - Increment "no user free" refcount for a region.
+ * Calling this function will prevent the region to be shrunk by parts of kbase that
+ * don't own the region (as long as the refcount stays above zero). Refer to
+ * kbase_va_region_is_no_user_free() for more information.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ *
+ * Return: the pointer to the region passed as argument.
+ */
+static inline struct kbase_va_region *
+kbase_va_region_no_user_free_get(struct kbase_context *kctx, struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(kbase_is_region_shrinkable(region));
+	WARN_ON(region->no_user_free_refcnt == INT_MAX);
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->no_user_free_refcnt++;
+
+	return region;
+}
+
+/**
+ * kbase_va_region_no_user_free_put - Decrement "no user free" refcount for a region.
+ *
+ * @kctx: Pointer to kbase context.
+ * @region: Pointer to region (not shrinkable).
+ */
+static inline void kbase_va_region_no_user_free_put(struct kbase_context *kctx,
+						    struct kbase_va_region *region)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(!kbase_va_region_is_no_user_free(kctx, region));
+
+	/* non-atomic as kctx->reg_lock is held */
+	region->no_user_free_refcnt--;
+}
+
 /* Common functions */
 static inline struct tagged_addr *kbase_get_cpu_phy_pages(
 		struct kbase_va_region *reg)
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 6ae1f05..f815144 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -36,6 +36,7 @@
 #include <linux/cache.h>
 #include <linux/memory_group_manager.h>
 #include <linux/math64.h>
+#include <linux/migrate.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
@@ -791,7 +792,11 @@ int kbase_mem_evictable_init(struct kbase_context *kctx)
 	 * struct shrinker does not define batch
 	 */
 	kctx->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(&kctx->reclaim);
+#else
+	register_shrinker(&kctx->reclaim, "mali-mem");
+#endif
 	return 0;
 }
 
@@ -855,6 +860,9 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 
 	lockdep_assert_held(&kctx->reg_lock);
 
+	/* Memory is in the process of transitioning to the shrinker, and
+	 * should ignore migration attempts
+	 */
 	kbase_mem_shrink_cpu_mapping(kctx, gpu_alloc->reg,
 			0, gpu_alloc->nents);
 
@@ -862,12 +870,17 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 	/* This allocation can't already be on a list. */
 	WARN_ON(!list_empty(&gpu_alloc->evict_node));
 
-	/*
-	 * Add the allocation to the eviction list, after this point the shrink
+	/* Add the allocation to the eviction list, after this point the shrink
 	 * can reclaim it.
 	 */
 	list_add(&gpu_alloc->evict_node, &kctx->evict_list);
 	atomic_add(gpu_alloc->nents, &kctx->evict_nents);
+
+	/* Indicate to page migration that the memory can be reclaimed by the shrinker.
+	 */
+	if (kbase_page_migration_enabled)
+		kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
+
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_mem_evictable_mark_reclaim(gpu_alloc);
 
@@ -919,6 +932,15 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 					gpu_alloc->evicted, 0, mmu_sync_info);
 
 			gpu_alloc->evicted = 0;
+
+			/* Since the allocation is no longer evictable, and we ensure that
+			 * it grows back to its pre-eviction size, we will consider the
+			 * state of it to be ALLOCATED_MAPPED, as that is the only state
+			 * in which a physical allocation could transition to NOT_MOVABLE
+			 * from.
+			 */
+			if (kbase_page_migration_enabled)
+				kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
 		}
 	}
 
@@ -977,7 +999,7 @@ int kbase_mem_flags_change(struct kbase_context *kctx, u64 gpu_addr, unsigned in
 	 * & GPU queue ringbuffer and none of them needs to be explicitly marked
 	 * as evictable by Userspace.
 	 */
-	if (reg->flags & KBASE_REG_NO_USER_FREE)
+	if (kbase_va_region_is_no_user_free(kctx, reg))
 		goto out_unlock;
 
 	/* Is the region being transitioning between not needed and needed? */
@@ -1299,9 +1321,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 
 	err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 				     kbase_get_gpu_phy_pages(reg),
-				     kbase_reg_current_backed_size(reg),
-				     reg->flags & gwt_mask, kctx->as_nr,
-				     alloc->group_id, mmu_sync_info);
+				     kbase_reg_current_backed_size(reg), reg->flags & gwt_mask,
+				     kctx->as_nr, alloc->group_id, mmu_sync_info, NULL, true);
 	if (err)
 		goto bad_insert;
 
@@ -1327,7 +1348,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 
 bad_pad_insert:
 	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 alloc->nents, kctx->as_nr);
+				 alloc->nents, kctx->as_nr, true);
 bad_insert:
 	kbase_mem_umm_unmap_attachment(kctx, alloc);
 bad_map_attachment:
@@ -1356,7 +1377,7 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 		int err;
 
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, kctx->as_nr);
+					       alloc->pages, reg->nr_pages, kctx->as_nr, true);
 		WARN_ON(err);
 	}
 
@@ -1885,9 +1906,9 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 			/* validate found region */
 			if (kbase_is_region_invalid_or_free(aliasing_reg))
 				goto bad_handle; /* Not found/already free */
-			if (aliasing_reg->flags & KBASE_REG_DONT_NEED)
+			if (kbase_is_region_shrinkable(aliasing_reg))
 				goto bad_handle; /* Ephemeral region */
-			if (aliasing_reg->flags & KBASE_REG_NO_USER_FREE)
+			if (kbase_va_region_is_no_user_free(kctx, aliasing_reg))
 				goto bad_handle; /* JIT regions can't be
 						  * aliased. NO_USER_FREE flag
 						  * covers the entire lifetime
@@ -2161,11 +2182,9 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
 
 	/* Map the new pages into the GPU */
 	phy_pages = kbase_get_gpu_phy_pages(reg);
-	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
-				     reg->start_pfn + old_pages,
-				     phy_pages + old_pages, delta, reg->flags,
-				     kctx->as_nr, reg->gpu_alloc->group_id,
-				     mmu_sync_info);
+	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
+				     phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
+				     reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
 
 	return ret;
 }
@@ -2194,7 +2213,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 	int ret = 0;
 
 	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
-				       alloc->pages + new_pages, delta, kctx->as_nr);
+				       alloc->pages + new_pages, delta, kctx->as_nr, false);
 
 	return ret;
 }
@@ -2259,10 +2278,10 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (atomic_read(&reg->cpu_alloc->kernel_mappings) > 0)
 		goto out_unlock;
 
-	if (reg->flags & KBASE_REG_DONT_NEED)
+	if (kbase_is_region_shrinkable(reg))
 		goto out_unlock;
 
-	if (reg->flags & KBASE_REG_NO_USER_FREE)
+	if (kbase_va_region_is_no_user_free(kctx, reg))
 		goto out_unlock;
 
 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -2659,6 +2678,8 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 	size_t size;
 	int err = 0;
 
+	lockdep_assert_held(&kctx->reg_lock);
+
 	dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
 	size = (vma->vm_end - vma->vm_start);
 	nr_pages = size >> PAGE_SHIFT;
@@ -2772,7 +2793,6 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 
 	/* adjust down nr_pages to what we have physically */
 	*nr_pages = kbase_reg_current_backed_size(reg);
-
 	if (kbase_gpu_mmap(kctx, reg, vma->vm_start + *aligned_offset,
 			   reg->nr_pages, 1, mmu_sync_info) != 0) {
 		dev_err(kctx->kbdev->dev, "%s:%d\n", __FILE__, __LINE__);
@@ -3013,6 +3033,99 @@ void kbase_sync_mem_regions(struct kbase_context *kctx,
 	}
 }
 
+/**
+ * kbase_vmap_phy_pages_migrate_count_increment - Increment VMAP count for
+ *                                                array of physical pages
+ *
+ * @pages:      Array of pages.
+ * @page_count: Number of pages.
+ * @flags:      Region flags.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is incremented by 1. Errors are handled by making pages
+ * not movable. Permanent kernel mappings will be marked as not movable, too.
+ */
+static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pages,
+							 size_t page_count, unsigned long flags)
+{
+	size_t i;
+
+	for (i = 0; i < page_count; i++) {
+		struct page *p = as_page(pages[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(pages[i]) || is_partial(pages[i]))
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		/* Mark permanent kernel mappings as NOT_MOVABLE because they're likely
+		 * to stay mapped for a long time. However, keep on counting the number
+		 * of mappings even for them: they don't represent an exception for the
+		 * vmap_count.
+		 *
+		 * At the same time, errors need to be handled if a client tries to add
+		 * too many mappings, hence a page may end up in the NOT_MOVABLE state
+		 * anyway even if it's not a permanent kernel mapping.
+		 */
+		if (flags & KBASE_REG_PERMANENT_KERNEL_MAPPING)
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		if (page_md->vmap_count < U8_MAX)
+			page_md->vmap_count++;
+		else
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
+/**
+ * kbase_vunmap_phy_pages_migrate_count_decrement - Decrement VMAP count for
+ *                                                  array of physical pages
+ *
+ * @pages:      Array of pages.
+ * @page_count: Number of pages.
+ *
+ * This function is supposed to be called only if page migration support
+ * is enabled in the driver.
+ *
+ * The counter of kernel CPU mappings of the physical pages involved in a
+ * mapping operation is decremented by 1. Errors are handled by making pages
+ * not movable.
+ */
+static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *pages,
+							   size_t page_count)
+{
+	size_t i;
+
+	for (i = 0; i < page_count; i++) {
+		struct page *p = as_page(pages[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(p);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(pages[i]) || is_partial(pages[i]))
+			continue;
+
+		spin_lock(&page_md->migrate_lock);
+		/* Decrement the number of mappings for all kinds of pages, including
+		 * pages which are NOT_MOVABLE (e.g. permanent kernel mappings).
+		 * However, errors still need to be handled if a client tries to remove
+		 * more mappings than created.
+		 */
+		if (page_md->vmap_count == 0)
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+		else
+			page_md->vmap_count--;
+		spin_unlock(&page_md->migrate_lock);
+	}
+}
+
 static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_region *reg,
 				u64 offset_bytes, size_t size, struct kbase_vmap_struct *map,
 				kbase_vmap_flag vmap_flags)
@@ -3085,6 +3198,13 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 	 */
 	cpu_addr = vmap(pages, page_count, VM_MAP, prot);
 
+	/* If page migration is enabled, increment the number of VMA mappings
+	 * of all physical pages. In case of errors, e.g. too many mappings,
+	 * make the page not movable to prevent trouble.
+	 */
+	if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+		kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
+
 	kfree(pages);
 
 	if (!cpu_addr)
@@ -3108,6 +3228,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 		atomic_add(page_count, &kctx->permanent_mapped_pages);
 
 	kbase_mem_phy_alloc_kernel_mapped(reg->cpu_alloc);
+
 	return 0;
 }
 
@@ -3186,6 +3307,17 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
 
 	vunmap(addr);
 
+	/* If page migration is enabled, decrement the number of VMA mappings
+	 * for all physical pages. Now is a good time to do it because references
+	 * haven't been released yet.
+	 */
+	if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+		const size_t page_count = PFN_UP(map->offset_in_page + map->size);
+		struct tagged_addr *pages_array = map->cpu_pages;
+
+		kbase_vunmap_phy_pages_migrate_count_decrement(pages_array, page_count);
+	}
+
 	if (map->flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
 		kbase_sync_mem_regions(kctx, map, KBASE_SYNC_TO_DEVICE);
 	if (map->flags & KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING) {
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 5b12e18..6dda44b 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -284,7 +284,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *kctx, struct kbase_va_reg
  *   have been released in the mean time.
  * * Or, it must have been refcounted with a call to kbase_va_region_alloc_get(), and the region
  *   lock is now held again.
- * * Or, @reg has had KBASE_REG_NO_USER_FREE set at creation time or under the region lock, and the
+ * * Or, @reg has had NO_USER_FREE set at creation time or under the region lock, and the
  *   region lock is now held again.
  *
  * The acceptable @vmap_flags are those in %KBASE_VMAP_INPUT_FLAGS.
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index dfa7025..8c62bd3 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -22,11 +22,11 @@
 /**
  * DOC: Base kernel page migration implementation.
  */
-
 #include <linux/migrate.h>
 
 #include <mali_kbase.h>
 #include <mali_kbase_mem_migrate.h>
+#include <mmu/mali_kbase_mmu.h>
 
 /* Global integer used to determine if module parameter value has been
  * provided and if page migration feature is enabled.
@@ -36,7 +36,12 @@ int kbase_page_migration_enabled;
 module_param(kbase_page_migration_enabled, int, 0444);
 KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
 
-bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr)
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops;
+#endif
+
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+			       u8 group_id)
 {
 	struct kbase_page_metadata *page_md =
 		kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
@@ -48,17 +53,40 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
 	set_page_private(p, (unsigned long)page_md);
 	page_md->dma_addr = dma_addr;
 	page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATE_IN_PROGRESS);
+	page_md->vmap_count = 0;
+	page_md->group_id = group_id;
 	spin_lock_init(&page_md->migrate_lock);
 
 	lock_page(p);
-	if (kbdev->mem_migrate.mapping)
-		__SetPageMovable(p, kbdev->mem_migrate.mapping);
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+	__SetPageMovable(p, &movable_ops);
+#else
+	/* In some corner cases, the driver may attempt to allocate memory pages
+	 * even before the device file is open and the mapping for address space
+	 * operations is created. In that case, it is impossible to assign address
+	 * space operations to memory pages: simply pretend that they are movable,
+	 * even if they are not.
+	 *
+	 * The page will go through all state transitions but it will never be
+	 * actually considered movable by the kernel. This is due to the fact that
+	 * the page cannot be marked as NOT_MOVABLE upon creation, otherwise the
+	 * memory pool will always refuse to add it to the pool and schedule
+	 * a worker thread to free it later.
+	 *
+	 * Page metadata may seem redundant in this case, but they are not,
+	 * because memory pools expect metadata to be present when page migration
+	 * is enabled and because the pages may always return to memory pools and
+	 * gain the movable property later on in their life cycle.
+	 */
+	if (kbdev->mem_migrate.inode && kbdev->mem_migrate.inode->i_mapping)
+		__SetPageMovable(p, kbdev->mem_migrate.inode->i_mapping);
+#endif
 	unlock_page(p);
 
 	return true;
 }
 
-static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p)
+static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, u8 *group_id)
 {
 	struct device *const dev = kbdev->dev;
 	struct kbase_page_metadata *page_md;
@@ -68,6 +96,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p)
 	if (!page_md)
 		return;
 
+	if (group_id)
+		*group_id = page_md->group_id;
 	dma_addr = kbase_dma_addr(p);
 	dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 
@@ -88,6 +118,7 @@ static void kbase_free_pages_worker(struct work_struct *work)
 	spin_unlock(&mem_migrate->free_pages_lock);
 
 	list_for_each_entry_safe(p, tmp, &free_list, lru) {
+		u8 group_id = 0;
 		list_del_init(&p->lru);
 
 		lock_page(p);
@@ -95,8 +126,8 @@ static void kbase_free_pages_worker(struct work_struct *work)
 			__ClearPageMovable(p);
 		unlock_page(p);
 
-		kbase_free_page_metadata(kbdev, p);
-		__free_pages(p, 0);
+		kbase_free_page_metadata(kbdev, p, &group_id);
+		kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0);
 	}
 }
 
@@ -110,6 +141,135 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
 }
 
 /**
+ * kbasep_migrate_page_pt_mapped - Migrate a memory page that is mapped
+ *                                 in a PGD of kbase_mmu_table.
+ *
+ * @old_page:  Existing PGD page to remove
+ * @new_page:  Destination for migrating the existing PGD page to
+ *
+ * Replace an existing PGD page with a new page by migrating its content. More specifically:
+ * the new page shall replace the existing PGD page in the MMU page table. Before returning,
+ * the new page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property. The meta data attached to the PGD page is transferred to the
+ * new (replacement) page.
+ *
+ * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure
+ *          and the migration is aborted.
+ */
+static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new_page)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+	struct kbase_context *kctx = page_md->data.pt_mapped.mmut->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	dma_addr_t old_dma_addr = page_md->dma_addr;
+	dma_addr_t new_dma_addr;
+	int ret;
+
+	/* Create a new dma map for the new page */
+	new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kbdev->dev, new_dma_addr))
+		return -ENOMEM;
+
+	/* Lock context to protect access to the page in physical allocation.
+	 * This blocks the CPU page fault handler from remapping pages.
+	 * Only MCU's mmut is device wide, i.e. no corresponding kctx.
+	 */
+	kbase_gpu_vm_lock(kctx);
+
+	ret = kbase_mmu_migrate_page(
+		as_tagged(page_to_phys(old_page)), as_tagged(page_to_phys(new_page)), old_dma_addr,
+		new_dma_addr, PGD_VPFN_LEVEL_GET_LEVEL(page_md->data.pt_mapped.pgd_vpfn_level));
+
+	if (ret == 0) {
+		dma_unmap_page(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__ClearPageMovable(old_page);
+		ClearPagePrivate(old_page);
+		put_page(old_page);
+
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+		__SetPageMovable(new_page, &movable_ops);
+#else
+		if (kbdev->mem_migrate.inode->i_mapping)
+			__SetPageMovable(new_page, kbdev->mem_migrate.inode->i_mapping);
+#endif
+		SetPagePrivate(new_page);
+		get_page(new_page);
+	} else
+		dma_unmap_page(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Page fault handler for CPU mapping unblocked. */
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+/*
+ * kbasep_migrate_page_allocated_mapped - Migrate a memory page that is both
+ *                                        allocated and mapped.
+ *
+ * @old_page:  Page to remove.
+ * @new_page:  Page to add.
+ *
+ * Replace an old page with a new page by migrating its content and all its
+ * CPU and GPU mappings. More specifically: the new page shall replace the
+ * old page in the MMU page table, as well as in the page array of the physical
+ * allocation, which is used to create CPU mappings. Before returning, the new
+ * page shall be set as movable and not isolated, while the old page shall lose
+ * the movable property.
+ */
+static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
+	struct kbase_context *kctx = page_md->data.mapped.mmut->kctx;
+	dma_addr_t old_dma_addr, new_dma_addr;
+	int ret;
+
+	old_dma_addr = page_md->dma_addr;
+	new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr))
+		return -ENOMEM;
+
+	/* Lock context to protect access to array of pages in physical allocation.
+	 * This blocks the CPU page fault handler from remapping pages.
+	 */
+	kbase_gpu_vm_lock(kctx);
+
+	/* Unmap the old physical range. */
+	unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT,
+			    PAGE_SIZE, 1);
+
+	ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)),
+				     as_tagged(page_to_phys(new_page)), old_dma_addr, new_dma_addr,
+				     MIDGARD_MMU_BOTTOMLEVEL);
+
+	if (ret == 0) {
+		dma_unmap_page(kctx->kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+		SetPagePrivate(new_page);
+		get_page(new_page);
+
+		/* Clear PG_movable from the old page and release reference. */
+		ClearPagePrivate(old_page);
+		__ClearPageMovable(old_page);
+		put_page(old_page);
+
+		/* Set PG_movable to the new page. */
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+		__SetPageMovable(new_page, &movable_ops);
+#else
+		if (kctx->kbdev->mem_migrate.inode->i_mapping)
+			__SetPageMovable(new_page, kctx->kbdev->mem_migrate.inode->i_mapping);
+#endif
+	} else
+		dma_unmap_page(kctx->kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Page fault handler for CPU mapping unblocked. */
+	kbase_gpu_vm_unlock(kctx);
+
+	return ret;
+}
+
+/**
  * kbase_page_isolate - Isolate a page for migration.
  *
  * @p:    Pointer of the page struct of page to isolate.
@@ -127,6 +287,9 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 
 	CSTD_UNUSED(mode);
 
+	if (!PageMovable(p) || !page_md)
+		return false;
+
 	if (!spin_trylock(&page_md->migrate_lock))
 		return false;
 
@@ -146,17 +309,28 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 		atomic_inc(&mem_pool->isolation_in_progress_cnt);
 		break;
 	case ALLOCATED_MAPPED:
+		/* Mark the page into isolated state, but only if it has no
+		 * kernel CPU mappings
+		 */
+		if (page_md->vmap_count == 0)
+			page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
+		break;
 	case PT_MAPPED:
-		/* Only pages in a memory pool can be isolated for now. */
+		/* Mark the page into isolated state. */
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 1);
 		break;
 	case SPILL_IN_PROGRESS:
 	case ALLOCATE_IN_PROGRESS:
 	case FREE_IN_PROGRESS:
-		/* Transitory state: do nothing. */
+		break;
+	case NOT_MOVABLE:
+		/* Opportunistically clear the movable property for these pages */
+		__ClearPageMovable(p);
 		break;
 	default:
 		/* State should always fall in one of the previous cases!
-		 * Also notice that FREE_ISOLATED_IN_PROGRESS is impossible because
+		 * Also notice that FREE_ISOLATED_IN_PROGRESS or
+		 * FREE_PT_ISOLATED_IN_PROGRESS is impossible because
 		 * that state only applies to pages that are already isolated.
 		 */
 		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
@@ -204,17 +378,31 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
  *
  * Return: 0 on success, error code otherwise.
  */
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 static int kbase_page_migrate(struct address_space *mapping, struct page *new_page,
 			      struct page *old_page, enum migrate_mode mode)
+#else
+static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum migrate_mode mode)
+#endif
 {
 	int err = 0;
 	bool status_mem_pool = false;
+	bool status_free_pt_isolated_in_progress = false;
+	bool status_free_isolated_in_progress = false;
+	bool status_pt_mapped = false;
+	bool status_mapped = false;
+	bool status_not_movable = false;
 	struct kbase_page_metadata *page_md = kbase_page_private(old_page);
-	struct kbase_device *kbdev;
+	struct kbase_device *kbdev = NULL;
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 	CSTD_UNUSED(mapping);
+#endif
 	CSTD_UNUSED(mode);
 
+	if (!PageMovable(old_page) || !page_md)
+		return -EINVAL;
+
 	if (!spin_trylock(&page_md->migrate_lock))
 		return -EAGAIN;
 
@@ -229,10 +417,22 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 		kbdev = page_md->data.mem_pool.kbdev;
 		break;
 	case ALLOCATED_MAPPED:
+		status_mapped = true;
+		break;
 	case PT_MAPPED:
+		status_pt_mapped = true;
+		break;
 	case FREE_ISOLATED_IN_PROGRESS:
-	case MULTI_MAPPED:
-		/* So far, only pages in a memory pool can be migrated. */
+		status_free_isolated_in_progress = true;
+		kbdev = page_md->data.free_isolated.kbdev;
+		break;
+	case FREE_PT_ISOLATED_IN_PROGRESS:
+		status_free_pt_isolated_in_progress = true;
+		kbdev = page_md->data.free_pt_isolated.kbdev;
+		break;
+	case NOT_MOVABLE:
+		status_not_movable = true;
+		break;
 	default:
 		/* State should always fall in one of the previous cases! */
 		err = -EAGAIN;
@@ -241,17 +441,27 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 
 	spin_unlock(&page_md->migrate_lock);
 
-	if (status_mem_pool) {
+	if (status_mem_pool || status_free_isolated_in_progress ||
+	    status_free_pt_isolated_in_progress) {
 		struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
-		kbase_free_page_metadata(kbdev, old_page);
+		kbase_free_page_metadata(kbdev, old_page, NULL);
 		__ClearPageMovable(old_page);
+		put_page(old_page);
 
 		/* Just free new page to avoid lock contention. */
 		INIT_LIST_HEAD(&new_page->lru);
+		get_page(new_page);
 		set_page_private(new_page, 0);
 		kbase_free_page_later(kbdev, new_page);
 		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	} else if (status_not_movable) {
+		__ClearPageMovable(old_page);
+		err = -EINVAL;
+	} else if (status_mapped) {
+		err = kbasep_migrate_page_allocated_mapped(old_page, new_page);
+	} else if (status_pt_mapped) {
+		err = kbasep_migrate_page_pt_mapped(old_page, new_page);
 	}
 
 	return err;
@@ -270,13 +480,17 @@ static int kbase_page_migrate(struct address_space *mapping, struct page *new_pa
 static void kbase_page_putback(struct page *p)
 {
 	bool status_mem_pool = false;
+	bool status_free_isolated_in_progress = false;
+	bool status_free_pt_isolated_in_progress = false;
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
-	struct kbase_device *kbdev;
+	struct kbase_device *kbdev = NULL;
 
 	spin_lock(&page_md->migrate_lock);
 
-	/* Page must have been isolated to reach here but metadata is incorrect. */
-	WARN_ON(!IS_PAGE_ISOLATED(page_md->status));
+	if (WARN_ON(!IS_PAGE_ISOLATED(page_md->status))) {
+		spin_unlock(&page_md->migrate_lock);
+		return;
+	}
 
 	switch (PAGE_STATUS_GET(page_md->status)) {
 	case MEM_POOL:
@@ -284,11 +498,22 @@ static void kbase_page_putback(struct page *p)
 		kbdev = page_md->data.mem_pool.kbdev;
 		break;
 	case ALLOCATED_MAPPED:
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+		break;
 	case PT_MAPPED:
-	case FREE_ISOLATED_IN_PROGRESS:
-		/* Only pages in a memory pool can be isolated for now.
-		 * Therefore only pages in a memory pool can be 'putback'.
+	case NOT_MOVABLE:
+		/* Pages should no longer be isolated if they are in a stable state
+		 * and used by the driver.
 		 */
+		page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+		break;
+	case FREE_ISOLATED_IN_PROGRESS:
+		status_free_isolated_in_progress = true;
+		kbdev = page_md->data.free_isolated.kbdev;
+		break;
+	case FREE_PT_ISOLATED_IN_PROGRESS:
+		status_free_pt_isolated_in_progress = true;
+		kbdev = page_md->data.free_pt_isolated.kbdev;
 		break;
 	default:
 		/* State should always fall in one of the previous cases! */
@@ -297,34 +522,57 @@ static void kbase_page_putback(struct page *p)
 
 	spin_unlock(&page_md->migrate_lock);
 
-	/* If page was in a memory pool then just free it to avoid lock contention. */
-	if (!WARN_ON(!status_mem_pool)) {
-		struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
-
+	/* If page was in a memory pool then just free it to avoid lock contention. The
+	 * same is also true to status_free_pt_isolated_in_progress.
+	 */
+	if (status_mem_pool || status_free_isolated_in_progress ||
+	    status_free_pt_isolated_in_progress) {
 		__ClearPageMovable(p);
-		list_del_init(&p->lru);
-		kbase_free_page_later(kbdev, p);
-		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+		if (!WARN_ON_ONCE(!kbdev)) {
+			struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+
+			kbase_free_page_later(kbdev, p);
+			queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+		}
 	}
 }
 
+#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
+static const struct movable_operations movable_ops = {
+	.isolate_page = kbase_page_isolate,
+	.migrate_page = kbase_page_migrate,
+	.putback_page = kbase_page_putback,
+};
+#else
 static const struct address_space_operations kbase_address_space_ops = {
 	.isolate_page = kbase_page_isolate,
 	.migratepage = kbase_page_migrate,
 	.putback_page = kbase_page_putback,
 };
+#endif
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp)
 {
+	mutex_lock(&kbdev->fw_load_lock);
+
 	if (filp) {
 		filp->f_inode->i_mapping->a_ops = &kbase_address_space_ops;
 
-		if (!kbdev->mem_migrate.mapping)
-			kbdev->mem_migrate.mapping = filp->f_inode->i_mapping;
-		else
-			WARN_ON(kbdev->mem_migrate.mapping != filp->f_inode->i_mapping);
+		if (!kbdev->mem_migrate.inode) {
+			kbdev->mem_migrate.inode = filp->f_inode;
+			/* This reference count increment is balanced by iput()
+			 * upon termination.
+			 */
+			atomic_inc(&filp->f_inode->i_count);
+		} else {
+			WARN_ON(kbdev->mem_migrate.inode != filp->f_inode);
+		}
 	}
+
+	mutex_unlock(&kbdev->fw_load_lock);
 }
+#endif
 
 void kbase_mem_migrate_init(struct kbase_device *kbdev)
 {
@@ -336,6 +584,9 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
 	spin_lock_init(&mem_migrate->free_pages_lock);
 	INIT_LIST_HEAD(&mem_migrate->free_pages_list);
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	mem_migrate->inode = NULL;
+#endif
 	mem_migrate->free_pages_workq =
 		alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
 	INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker);
@@ -347,4 +598,7 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev)
 
 	if (mem_migrate->free_pages_workq)
 		destroy_workqueue(mem_migrate->free_pages_workq);
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
+	iput(mem_migrate->inode);
+#endif
 }
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index 6610c0c..30d0803 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -41,6 +41,8 @@ extern int kbase_page_migration_enabled;
  * @kbdev:    Pointer to kbase device.
  * @p:        Page to assign metadata to.
  * @dma_addr: DMA address mapped to paged.
+ * @group_id: Memory group ID associated with the entity that is
+ *            allocating the page metadata.
  *
  * This will allocate memory for the page's metadata, initialize it and
  * assign a reference to the page's private field. Importantly, once
@@ -49,7 +51,8 @@ extern int kbase_page_migration_enabled;
  *
  * Return: true if successful or false otherwise.
  */
-bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr);
+bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
+			       u8 group_id);
 
 /**
  * kbase_free_page_later - Defer freeing of given page.
@@ -61,6 +64,7 @@ bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_a
  */
 void kbase_free_page_later(struct kbase_device *kbdev, struct page *p);
 
+#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 /*
  * kbase_mem_migrate_set_address_space_ops - Set address space operations
  *
@@ -72,6 +76,7 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p);
  * add a reference to @kbdev.
  */
 void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp);
+#endif
 
 /*
  * kbase_mem_migrate_init - Initialise kbase page migration
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index dce066d..bede1f4 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -57,37 +57,59 @@ static bool kbase_mem_pool_is_empty(struct kbase_mem_pool *pool)
 	return kbase_mem_pool_size(pool) == 0;
 }
 
-static void set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p,
+static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page *p,
 				       struct list_head *page_list, size_t *list_size)
 {
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
+	bool not_movable = false;
 
 	lockdep_assert_held(&pool->pool_lock);
 
+	/* Free the page instead of adding it to the pool if it's not movable.
+	 * Only update page status and add the page to the memory pool if
+	 * it is not isolated.
+	 */
 	spin_lock(&page_md->migrate_lock);
-	/* Only update page status and add the page to the memory pool if it is not isolated */
-	if (!WARN_ON(IS_PAGE_ISOLATED(page_md->status))) {
+	if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+		not_movable = true;
+	} else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
 		page_md->data.mem_pool.pool = pool;
 		page_md->data.mem_pool.kbdev = pool->kbdev;
-		list_move(&p->lru, page_list);
+		list_add(&p->lru, page_list);
 		(*list_size)++;
 	}
 	spin_unlock(&page_md->migrate_lock);
+
+	if (not_movable) {
+		kbase_free_page_later(pool->kbdev, p);
+		pool_dbg(pool, "skipping a not movable page\n");
+	}
+
+	return not_movable;
 }
 
 static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
 		struct page *p)
 {
+	bool queue_work_to_free = false;
+
 	lockdep_assert_held(&pool->pool_lock);
 
-	if (!pool->order && kbase_page_migration_enabled)
-		set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size);
-	else {
+	if (!pool->order && kbase_page_migration_enabled) {
+		if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+			queue_work_to_free = true;
+	} else {
 		list_add(&p->lru, &pool->page_list);
 		pool->cur_size++;
 	}
 
+	if (queue_work_to_free) {
+		struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	}
+
 	pool_dbg(pool, "added page\n");
 }
 
@@ -101,18 +123,29 @@ static void kbase_mem_pool_add(struct kbase_mem_pool *pool, struct page *p)
 static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
 		struct list_head *page_list, size_t nr_pages)
 {
+	bool queue_work_to_free = false;
+
 	lockdep_assert_held(&pool->pool_lock);
 
 	if (!pool->order && kbase_page_migration_enabled) {
 		struct page *p, *tmp;
 
-		list_for_each_entry_safe(p, tmp, page_list, lru)
-			set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size);
+		list_for_each_entry_safe(p, tmp, page_list, lru) {
+			list_del_init(&p->lru);
+			if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
+				queue_work_to_free = true;
+		}
 	} else {
 		list_splice(page_list, &pool->page_list);
 		pool->cur_size += nr_pages;
 	}
 
+	if (queue_work_to_free) {
+		struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
+
+		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
+	}
+
 	pool_dbg(pool, "added %zu pages\n", nr_pages);
 }
 
@@ -226,7 +259,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 	/* Setup page metadata for 4KB pages when page migration is enabled */
 	if (!pool->order && kbase_page_migration_enabled) {
 		INIT_LIST_HEAD(&p->lru);
-		if (!kbase_alloc_page_metadata(kbdev, p, dma_addr)) {
+		if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) {
 			dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
 			kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, pool->group_id, p,
 							  pool->order);
@@ -460,7 +493,11 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
 	 * struct shrinker does not define batch
 	 */
 	pool->reclaim.batch = 0;
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
 	register_shrinker(&pool->reclaim);
+#else
+	register_shrinker(&pool->reclaim, "mali-mem-pool");
+#endif
 
 	pool_dbg(pool, "initialized\n");
 
@@ -636,10 +673,12 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	/* Get pages from this pool */
 	kbase_mem_pool_lock(pool);
 	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
+
 	while (nr_from_pool--) {
 		int j;
 
 		p = kbase_mem_pool_remove_locked(pool, ALLOCATE_IN_PROGRESS);
+
 		if (pool->order) {
 			pages[i++] = as_tagged_tag(page_to_phys(p),
 						   HUGE_HEAD | HUGE_PAGE);
@@ -867,7 +906,6 @@ void kbase_mem_pool_free_pages(struct kbase_mem_pool *pool, size_t nr_pages,
 			pages[i] = as_tagged(0);
 			continue;
 		}
-
 		p = as_page(pages[i]);
 
 		kbase_mem_pool_free_page(pool, p);
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 063b29a..5f3dabd 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -41,6 +41,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/workqueue.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* Hwcnt reader API version */
 #define HWCNT_READER_API 1
 
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index db20860..d1e4078 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -88,12 +88,11 @@ static void submit_work_pagefault(struct kbase_device *kbdev, u32 as_nr,
 		 * context's address space, when the page fault occurs for
 		 * MCU's address space.
 		 */
-		if (!queue_work(as->pf_wq, &as->work_pagefault))
-			kbase_ctx_sched_release_ctx(kctx);
-		else {
+		if (!queue_work(as->pf_wq, &as->work_pagefault)) {
 			dev_dbg(kbdev->dev,
-				"Page fault is already pending for as %u\n",
-				as_nr);
+				"Page fault is already pending for as %u", as_nr);
+			kbase_ctx_sched_release_ctx(kctx);
+		} else {
 			atomic_inc(&kbdev->faults_pending);
 		}
 	}
@@ -559,7 +558,7 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i)
 	kbdev->as[i].pf_data.addr = 0ULL;
 	kbdev->as[i].gf_data.addr = 0ULL;
 
-	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", 0, 1, i);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
 	if (!kbdev->as[i].pf_wq)
 		return -ENOMEM;
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index c909cd0..e39c8ad 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -25,6 +25,7 @@
 
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
+#include <linux/migrate.h>
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -156,7 +157,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 	} else if (op == KBASE_MMU_OP_FLUSH_MEM) {
 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
 	} else {
-		dev_warn(kbdev->dev, "Invalid flush request (op = %d)\n", op);
+		dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
 		return;
 	}
 
@@ -167,7 +168,7 @@ static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, siz
 		 * perform a reset to recover
 		 */
 		dev_err(kbdev->dev,
-			"Flush for physical address range did not complete. Issuing GPU soft-reset to recover\n");
+			"Flush for physical address range did not complete. Issuing GPU soft-reset to recover");
 
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
@@ -230,9 +231,8 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
 		 */
 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
 
-		if (kbase_prepare_to_reset_gpu(
-			    kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-			kbase_reset_gpu(kbdev);
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+			kbase_reset_gpu_locked(kbdev);
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -326,7 +326,7 @@ static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct
 		 * perform a reset to recover.
 		 */
 		dev_err(kbdev->dev,
-			"Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover\n");
+			"Flush for GPU page table update did not complete. Issuing GPU soft-reset to recover");
 
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 			kbase_reset_gpu(kbdev);
@@ -420,6 +420,65 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 						  u64 vpfn, int level,
 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds,
 						  struct list_head *free_pgds_list);
+
+static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
+{
+	atomic_sub(1, &kbdev->memdev.used_pages);
+
+	/* If MMU tables belong to a context then pages will have been accounted
+	 * against it, so we must decrement the usage counts here.
+	 */
+	if (mmut->kctx) {
+		kbase_process_page_usage_dec(mmut->kctx, 1);
+		atomic_sub(1, &mmut->kctx->used_pages);
+	}
+
+	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+}
+
+static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
+					       struct kbase_mmu_table *mmut,
+					       struct page *p)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(p);
+	bool page_is_isolated = false;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	if (!kbase_page_migration_enabled)
+		return false;
+
+	spin_lock(&page_md->migrate_lock);
+	if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
+		WARN_ON_ONCE(!mmut->kctx);
+		if (IS_PAGE_ISOLATED(page_md->status)) {
+			page_md->status = PAGE_STATUS_SET(page_md->status,
+							  FREE_PT_ISOLATED_IN_PROGRESS);
+			page_md->data.free_pt_isolated.kbdev = kbdev;
+			page_is_isolated = true;
+		} else {
+			page_md->status =
+				PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
+		}
+	} else {
+		WARN_ON_ONCE(mmut->kctx);
+		WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
+	}
+	spin_unlock(&page_md->migrate_lock);
+
+	if (unlikely(page_is_isolated)) {
+		/* Do the CPU cache flush and accounting here for the isolated
+		 * PGD page, which is done inside kbase_mmu_free_pgd() for the
+		 * PGD page that did not get isolated.
+		 */
+		dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		kbase_mmu_account_freed_pgd(kbdev, mmut);
+	}
+
+	return page_is_isolated;
+}
+
 /**
  * kbase_mmu_free_pgd() - Free memory of the page directory
  *
@@ -441,17 +500,7 @@ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_tabl
 
 	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
 
-	atomic_sub(1, &kbdev->memdev.used_pages);
-
-	/* If MMU tables belong to a context then pages will have been accounted
-	 * against it, so we must decrement the usage counts here.
-	 */
-	if (mmut->kctx) {
-		kbase_process_page_usage_dec(mmut->kctx, 1);
-		atomic_sub(1, &mmut->kctx->used_pages);
-	}
-
-	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
+	kbase_mmu_account_freed_pgd(kbdev, mmut);
 }
 
 /**
@@ -482,6 +531,20 @@ static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mm
 	mutex_unlock(&mmut->mmu_lock);
 }
 
+static void kbase_mmu_add_to_free_pgds_list(struct kbase_device *kbdev,
+					    struct kbase_mmu_table *mmut,
+					    struct page *p, struct list_head *free_pgds_list)
+{
+	bool page_is_isolated = false;
+
+	lockdep_assert_held(&mmut->mmu_lock);
+
+	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+
+	if (likely(!page_is_isolated))
+		list_add(&p->lru, free_pgds_list);
+}
+
 /**
  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
  *                               a region on a GPU page fault
@@ -509,7 +572,7 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 	if (!multiple) {
 		dev_warn(
 			kbdev->dev,
-			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 		return minimum_extra;
 	}
@@ -917,7 +980,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx,
 			 */
 			dev_warn(
 				kctx->kbdev->dev,
-				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available\n",
+				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
 				new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
 				total_mempools_free_4k);
 			*pages_to_grow = 0;
@@ -985,9 +1048,8 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 	as_no = faulting_as->number;
 
 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
-	dev_dbg(kbdev->dev,
-		"Entering %s %pK, fault_pfn %lld, as_no %d\n",
-		__func__, (void *)data, fault_pfn, as_no);
+	dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
+		fault_pfn, as_no);
 
 	/* Grab the context that was already refcounted in kbase_mmu_interrupt()
 	 * Therefore, it cannot be scheduled out of this AS until we explicitly
@@ -1010,8 +1072,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
-		dev_dbg(kbdev->dev,
-				"%s: GPU has been removed\n", __func__);
+		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 		goto fault_done;
 	}
 #endif
@@ -1206,8 +1267,7 @@ page_fault_retry:
 
 	/* cap to max vsize */
 	new_pages = min(new_pages, region->nr_pages - current_backed_size);
-	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault\n",
-		new_pages);
+	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
 
 	if (new_pages == 0) {
 		struct kbase_mmu_hw_op_param op_param;
@@ -1284,11 +1344,10 @@ page_fault_retry:
 		 * so the no_flush version of insert_pages is used which allows
 		 * us to unlock the MMU as we see fit.
 		 */
-		err = kbase_mmu_insert_pages_no_flush(kbdev, &kctx->mmu,
-						      region->start_pfn + pfn_offset,
-						      &kbase_get_gpu_phy_pages(region)[pfn_offset],
-						      new_pages, region->flags,
-						      region->gpu_alloc->group_id, &dirty_pgds);
+		err = kbase_mmu_insert_pages_no_flush(
+			kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+			&kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
+			region->gpu_alloc->group_id, &dirty_pgds, region, false);
 		if (err) {
 			kbase_free_phy_pages_helper(region->gpu_alloc,
 					new_pages);
@@ -1314,16 +1373,11 @@ page_fault_retry:
 		if (region->threshold_pages &&
 			kbase_reg_current_backed_size(region) >
 				region->threshold_pages) {
-
-			dev_dbg(kctx->kbdev->dev,
-				"%zu pages exceeded IR threshold %zu\n",
-				new_pages + current_backed_size,
-				region->threshold_pages);
+			dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
+				new_pages + current_backed_size, region->threshold_pages);
 
 			if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
-				dev_dbg(kctx->kbdev->dev,
-					"Get region %pK for IR\n",
-					(void *)region);
+				dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
 				kbase_va_region_alloc_get(kctx, region);
 			}
 		}
@@ -1441,7 +1495,7 @@ page_fault_retry:
 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 					"Page allocation failure", fault);
 		} else {
-			dev_dbg(kbdev->dev, "Try again after pool_grow\n");
+			dev_dbg(kbdev->dev, "Try again after pool_grow");
 			goto page_fault_retry;
 		}
 	}
@@ -1468,7 +1522,7 @@ fault_done:
 	release_ctx(kbdev, kctx);
 
 	atomic_dec(&kbdev->faults_pending);
-	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK\n", (void *)data);
+	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
 }
 
 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
@@ -1532,11 +1586,10 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 			    u64 *dirty_pgds)
 {
 	u64 *page;
+	u64 pgd_vpfn = vpfn;
 	phys_addr_t target_pgd;
 	struct page *p;
 
-	KBASE_DEBUG_ASSERT(*pgd);
-
 	lockdep_assert_held(&mmut->mmu_lock);
 
 	/*
@@ -1549,7 +1602,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	p = pfn_to_page(PFN_DOWN(*pgd));
 	page = kmap(p);
 	if (page == NULL) {
-		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return -EINVAL;
 	}
 
@@ -1559,8 +1612,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 
 		target_pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
 		if (target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
-			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure\n",
-					__func__);
+			dev_dbg(kbdev->dev, "%s: kbase_mmu_alloc_pgd failure", __func__);
 			kunmap(p);
 			return -ENOMEM;
 		}
@@ -1585,9 +1637,32 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 		 * GPU cache is still needed. For explanation, please refer
 		 * the comment in kbase_mmu_insert_pages_no_flush().
 		 */
-		kbase_mmu_sync_pgd(kbdev, mmut->kctx, *pgd + (vpfn * sizeof(u64)),
-				   kbase_dma_addr(p) + (vpfn * sizeof(u64)), sizeof(u64),
-				   KBASE_MMU_OP_FLUSH_PT);
+		kbase_mmu_sync_pgd(kbdev, mmut->kctx,
+				   *pgd + (vpfn * sizeof(u64)),
+				   kbase_dma_addr(p) + (vpfn * sizeof(u64)),
+				   sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+
+		/* Update the new target_pgd page to its stable state */
+		if (kbase_page_migration_enabled) {
+			struct kbase_page_metadata *page_md =
+				kbase_page_private(phys_to_page(target_pgd));
+
+			spin_lock(&page_md->migrate_lock);
+
+			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
+				     IS_PAGE_ISOLATED(page_md->status));
+
+			if (mmut->kctx) {
+				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
+				page_md->data.pt_mapped.mmut = mmut;
+				page_md->data.pt_mapped.pgd_vpfn_level =
+					PGD_VPFN_LEVEL_SET(pgd_vpfn, level);
+			} else {
+				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+			}
+
+			spin_unlock(&page_md->migrate_lock);
+		}
 	} else {
 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
@@ -1618,9 +1693,8 @@ static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_tab
 			mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l, newly_created_pgd, dirty_pgds);
 		/* Handle failure condition */
 		if (err) {
-			dev_dbg(kbdev->dev,
-				 "%s: mmu_get_next_pgd failure at level %d\n",
-				 __func__, l);
+			dev_dbg(kbdev->dev, "%s: mmu_get_next_pgd failure at level %d", __func__,
+				l);
 			return err;
 		}
 	}
@@ -1640,7 +1714,8 @@ static int mmu_get_bottom_pgd(struct kbase_device *kbdev, struct kbase_mmu_table
 static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 					      struct kbase_mmu_table *mmut, u64 from_vpfn,
 					      u64 to_vpfn, u64 *dirty_pgds,
-					      struct list_head *free_pgds_list)
+					      struct list_head *free_pgds_list,
+					      struct tagged_addr *phys, bool ignore_page_migration)
 {
 	u64 vpfn = from_vpfn;
 	struct kbase_mmu_mode const *mmu_mode;
@@ -1693,8 +1768,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 			pcount = count;
 			break;
 		default:
-			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d\n",
-			       __func__, level);
+			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
 			goto next;
 		}
 
@@ -1713,7 +1787,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		if (!num_of_valid_entries) {
 			kunmap(p);
 
-			list_add(&p->lru, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
 							      KBASE_MMU_OP_NONE, dirty_pgds,
@@ -1734,6 +1808,27 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 next:
 		vpfn += count;
 	}
+
+	/* If page migration is enabled: the only way to recover from failure
+	 * is to mark all pages as not movable. It is not predictable what's
+	 * going to happen to these pages at this stage. They might return
+	 * movable once they are returned to a memory pool.
+	 */
+	if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
+		const u64 num_pages = to_vpfn - from_vpfn + 1;
+		u64 i;
+
+		for (i = 0; i < num_pages; i++) {
+			struct page *phys_page = as_page(phys[i]);
+			struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+			if (page_md) {
+				spin_lock(&page_md->migrate_lock);
+				page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+				spin_unlock(&page_md->migrate_lock);
+			}
+		}
+	}
 }
 
 static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
@@ -1806,6 +1901,20 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 	if (nr == 0)
 		return 0;
 
+	/* If page migration is enabled, pages involved in multiple GPU mappings
+	 * are always treated as not movable.
+	 */
+	if (kbase_page_migration_enabled) {
+		struct page *phys_page = as_page(phys);
+		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+		if (page_md) {
+			spin_lock(&page_md->migrate_lock);
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+
 	mutex_lock(&kctx->mmu.mmu_lock);
 
 	while (remain) {
@@ -1842,15 +1951,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure\n",
-				 __func__);
+			dev_warn(kbdev->dev, "%s: mmu_get_bottom_pgd failure", __func__);
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
 								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list);
+								  &dirty_pgds, &free_pgds_list,
+								  NULL, true);
 			}
 			goto fail_unlock;
 		}
@@ -1858,14 +1967,15 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 			if (recover_required) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, &kctx->mmu, start_vpfn,
 								  start_vpfn + recover_count,
-								  &dirty_pgds, &free_pgds_list);
+								  &dirty_pgds, &free_pgds_list,
+								  NULL, true);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -1931,6 +2041,85 @@ fail_unlock:
 	return err;
 }
 
+static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
+						   struct kbase_va_region *reg,
+						   struct kbase_mmu_table *mmut, const u64 vpfn)
+{
+	struct page *phys_page = as_page(phys);
+	struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+	spin_lock(&page_md->migrate_lock);
+
+	/* If no GPU va region is given: the metadata provided are
+	 * invalid.
+	 *
+	 * If the page is already allocated and mapped: this is
+	 * an additional GPU mapping, probably to create a memory
+	 * alias, which means it is no longer possible to migrate
+	 * the page easily because tracking all the GPU mappings
+	 * would be too costly.
+	 *
+	 * In any case: the page becomes not movable. It is kept
+	 * alive, but attempts to migrate it will fail. The page
+	 * will be freed if it is still not movable when it returns
+	 * to a memory pool. Notice that the movable flag is not
+	 * cleared because that would require taking the page lock.
+	 */
+	if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
+	} else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
+		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
+		page_md->data.mapped.reg = reg;
+		page_md->data.mapped.mmut = mmut;
+		page_md->data.mapped.vpfn = vpfn;
+	}
+
+	spin_unlock(&page_md->migrate_lock);
+}
+
+static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
+						     struct tagged_addr *phys, size_t requested_nr)
+{
+	size_t i;
+
+	for (i = 0; i < requested_nr; i++) {
+		struct page *phys_page = as_page(phys[i]);
+		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+
+		/* Skip the 4KB page that is part of a large page, as the large page is
+		 * excluded from the migration process.
+		 */
+		if (is_huge(phys[i]) || is_partial(phys[i]))
+			continue;
+
+		if (page_md) {
+			u8 status;
+
+			spin_lock(&page_md->migrate_lock);
+			status = PAGE_STATUS_GET(page_md->status);
+
+			if (status == ALLOCATED_MAPPED) {
+				if (IS_PAGE_ISOLATED(page_md->status)) {
+					page_md->status = PAGE_STATUS_SET(
+						page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
+					page_md->data.free_isolated.kbdev = kbdev;
+					/* At this point, we still have a reference
+					 * to the page via its page migration metadata,
+					 * and any page with the FREE_ISOLATED_IN_PROGRESS
+					 * status will subsequently be freed in either
+					 * kbase_page_migrate() or kbase_page_putback()
+					 */
+					phys[i] = as_tagged(0);
+				} else
+					page_md->status = PAGE_STATUS_SET(page_md->status,
+									  (u8)FREE_IN_PROGRESS);
+			}
+
+			spin_unlock(&page_md->migrate_lock);
+		}
+	}
+}
+
 u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
 	struct tagged_addr const phy, unsigned long const flags,
 	int const level, int const group_id)
@@ -1944,7 +2133,8 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int const group_id, u64 *dirty_pgds)
+				    unsigned long flags, int const group_id, u64 *dirty_pgds,
+				    struct kbase_va_region *reg, bool ignore_page_migration)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -2006,14 +2196,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		} while (!err);
 
 		if (err) {
-			dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure\n", __func__);
+			dev_warn(kbdev->dev, "%s: mmu_get_pgd_at_level failure", __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
 								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list);
+								  &free_pgds_list, phys,
+								  ignore_page_migration);
 			}
 			goto fail_unlock;
 		}
@@ -2021,15 +2212,15 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "%s: kmap failure\n",
-				 __func__);
+			dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 			if (insert_vpfn != start_vpfn) {
 				/* Invalidate the pages we have partially
 				 * completed
 				 */
 				mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn,
 								  insert_vpfn, dirty_pgds,
-								  &free_pgds_list);
+								  &free_pgds_list, phys,
+								  ignore_page_migration);
 			}
 			err = -ENOMEM;
 			goto fail_unlock;
@@ -2060,6 +2251,14 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 
 				*target = kbase_mmu_create_ate(kbdev,
 					phys[i], flags, cur_level, group_id);
+
+				/* If page migration is enabled, this is the right time
+				 * to update the status of the page.
+				 */
+				if (kbase_page_migration_enabled && !ignore_page_migration &&
+				    !is_huge(phys[i]) && !is_partial(phys[i]))
+					kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
+									       insert_vpfn + i);
 			}
 			num_of_valid_entries += count;
 		}
@@ -2104,8 +2303,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 fail_unlock:
 	mutex_unlock(&mmut->mmu_lock);
 
-	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, *dirty_pgds,
-					  CALLER_MMU_ASYNC);
+	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
+					  dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC);
 	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
 
 	return err;
@@ -2115,11 +2314,10 @@ fail_unlock:
  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
  * number 'as_nr'.
  */
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
-			   struct kbase_mmu_table *mmut, u64 vpfn,
-			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int as_nr, int const group_id,
-			   enum kbase_caller_mmu_sync_info mmu_sync_info)
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+			   int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+			   struct kbase_va_region *reg, bool ignore_page_migration)
 {
 	int err;
 	u64 dirty_pgds = 0;
@@ -2130,7 +2328,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev,
 		return 0;
 
 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
-					      &dirty_pgds);
+					      &dirty_pgds, reg, ignore_page_migration);
 	if (err)
 		return err;
 
@@ -2285,7 +2483,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 				current_pgd + (index * sizeof(u64)),
 				sizeof(u64), flush_op);
 
-			list_add(&p->lru, free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, free_pgds_list);
 		} else {
 			current_valid_entries--;
 
@@ -2361,11 +2559,12 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
  * @mmut:     Pointer to GPU MMU page table.
  * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
  * @phys:     Array of physical pages currently mapped to the virtual
- *            pages to unmap, or NULL. This is only used for GPU cache
- *            maintenance.
+ *            pages to unmap, or NULL. This is used for GPU cache maintenance
+ *            and page migration support.
  * @nr:       Number of pages to unmap.
  * @as_nr:    Address space number, for GPU cache maintenance operations
  *            that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
  *
  * We actually discard the ATE and free the page table pages if no valid entries
  * exist in PGD.
@@ -2384,10 +2583,11 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
  * Return: 0 on success, otherwise an error code.
  */
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr)
+			     struct tagged_addr *phys, size_t nr, int as_nr,
+			     bool ignore_page_migration)
 {
+	const size_t requested_nr = nr;
 	u64 start_vpfn = vpfn;
-	size_t requested_nr = nr;
 	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
 	struct kbase_mmu_mode const *mmu_mode;
 	struct kbase_mmu_hw_op_param op_param;
@@ -2478,9 +2678,8 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 		switch (level) {
 		case MIDGARD_MMU_LEVEL(0):
 		case MIDGARD_MMU_LEVEL(1):
-			dev_warn(kbdev->dev,
-				 "%s: No support for ATEs at level %d\n",
-				 __func__, level);
+			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
+				 level);
 			kunmap(p);
 			goto out;
 		case MIDGARD_MMU_LEVEL(2):
@@ -2488,9 +2687,10 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 			if (count >= 512) {
 				pcount = 1;
 			} else {
-				dev_warn(kbdev->dev,
-					 "%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down\n",
-					 __func__, count);
+				dev_warn(
+					kbdev->dev,
+					"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
+					__func__, count);
 				pcount = 0;
 			}
 			break;
@@ -2499,9 +2699,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 			pcount = count;
 			break;
 		default:
-			dev_err(kbdev->dev,
-				"%s: found non-mapped memory, early out\n",
-				__func__);
+			dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
 			vpfn += count;
 			nr -= count;
 			continue;
@@ -2530,7 +2728,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 				pgd + (index * sizeof(u64)),
 				pcount * sizeof(u64), flush_op);
 
-			list_add(&p->lru, &free_pgds_list);
+			kbase_mmu_add_to_free_pgds_list(kbdev, mmut, p, &free_pgds_list);
 
 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
 							      flush_op, &dirty_pgds,
@@ -2553,7 +2751,6 @@ next:
 	}
 	err = 0;
 out:
-	mutex_unlock(&mmut->mmu_lock);
 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
 	op_param = (struct kbase_mmu_hw_op_param){
 		.vpfn = start_vpfn,
@@ -2566,6 +2763,16 @@ out:
 	};
 	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, &op_param);
 
+	/* If page migration is enabled: the status of all physical pages involved
+	 * shall be updated, unless they are not movable. Their status shall be
+	 * updated before releasing the lock to protect against concurrent
+	 * requests to migrate the pages, if they have been isolated.
+	 */
+	if (kbase_page_migration_enabled && phys && !ignore_page_migration)
+		kbase_mmu_progress_migration_on_teardown(kbdev, phys, requested_nr);
+
+	mutex_unlock(&mmut->mmu_lock);
+
 	kbase_mmu_free_pgds_list(kbdev, mmut, &free_pgds_list);
 
 	return err;
@@ -2737,6 +2944,353 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 	return err;
 }
 
+static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
+	kbdev->mmu_page_migrate_in_progress = true;
+}
+
+static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
+	kbdev->mmu_page_migrate_in_progress = false;
+	/* Invoke the PM state machine, as the MMU page migration session
+	 * may have deferred a transition in L2 state machine.
+	 */
+	kbase_pm_update_state(kbdev);
+}
+
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
+{
+	struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
+	struct kbase_mmu_hw_op_param op_param;
+	struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+					       page_md->data.mapped.mmut :
+					       page_md->data.pt_mapped.mmut;
+	struct kbase_device *kbdev;
+	phys_addr_t pgd;
+	u64 *old_page, *new_page, *pgd_page, *target, vpfn;
+	int index, check_state, ret = 0;
+	unsigned long hwaccess_flags = 0;
+	unsigned int num_of_valid_entries;
+	u8 vmap_count = 0;
+
+	/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
+	 * here we skip the no kctx case, which is only used with MCU's mmut.
+	 */
+	if (!mmut->kctx)
+		return -EINVAL;
+
+	if (level > MIDGARD_MMU_BOTTOMLEVEL)
+		return -EINVAL;
+	else if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		vpfn = page_md->data.mapped.vpfn;
+	else
+		vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
+
+	kbdev = mmut->kctx->kbdev;
+	index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
+
+	/* Create all mappings before copying content.
+	 * This is done as early as possible because is the only operation that may
+	 * fail. It is possible to do this before taking any locks because the
+	 * pages to migrate are not going to change and even the parent PGD is not
+	 * going to be affected by any other concurrent operation, since the page
+	 * has been isolated before migration and therefore it cannot disappear in
+	 * the middle of this function.
+	 */
+	old_page = kmap(as_page(old_phys));
+	if (!old_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
+		ret = -EINVAL;
+		goto old_page_map_error;
+	}
+
+	new_page = kmap(as_page(new_phys));
+	if (!new_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
+		ret = -EINVAL;
+		goto new_page_map_error;
+	}
+
+	/* GPU cache maintenance affects both memory content and page table,
+	 * but at two different stages. A single virtual memory page is affected
+	 * by the migration.
+	 *
+	 * Notice that the MMU maintenance is done in the following steps:
+	 *
+	 * 1) The MMU region is locked without performing any other operation.
+	 *    This lock must cover the entire migration process, in order to
+	 *    prevent any GPU access to the virtual page whose physical page
+	 *    is being migrated.
+	 * 2) Immediately after locking: the MMU region content is flushed via
+	 *    GPU control while the lock is taken and without unlocking.
+	 *    The region must stay locked for the duration of the whole page
+	 *    migration procedure.
+	 *    This is necessary to make sure that pending writes to the old page
+	 *    are finalized before copying content to the new page.
+	 * 3) Before unlocking: changes to the page table are flushed.
+	 *    Finer-grained GPU control operations are used if possible, otherwise
+	 *    the whole GPU cache shall be flushed again.
+	 *    This is necessary to make sure that the GPU accesses the new page
+	 *    after migration.
+	 * 4) The MMU region is unlocked.
+	 */
+#define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
+	op_param.mmu_sync_info = CALLER_MMU_ASYNC;
+	op_param.kctx_id = mmut->kctx->id;
+	op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
+	op_param.nr = 1 << ((3 - level) * 9);
+	op_param.op = KBASE_MMU_OP_FLUSH_PT;
+	/* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
+	op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
+					     pgd_level_to_skip_flush(1ULL << level) :
+					     pgd_level_to_skip_flush(3ULL << level);
+
+	mutex_lock(&mmut->mmu_lock);
+
+	/* The state was evaluated before entering this function, but it could
+	 * have changed before the mmu_lock was taken. However, the state
+	 * transitions which are possible at this point are only two, and in both
+	 * cases it is a stable state progressing to a "free in progress" state.
+	 *
+	 * After taking the mmu_lock the state can no longer change: read it again
+	 * and make sure that it hasn't changed before continuing.
+	 */
+	spin_lock(&page_md->migrate_lock);
+	check_state = PAGE_STATUS_GET(page_md->status);
+	if (level == MIDGARD_MMU_BOTTOMLEVEL)
+		vmap_count = page_md->vmap_count;
+	spin_unlock(&page_md->migrate_lock);
+
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		if (check_state != ALLOCATED_MAPPED) {
+			dev_dbg(kbdev->dev,
+				"%s: state changed to %d (was %d), abort page migration", __func__,
+				check_state, ALLOCATED_MAPPED);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		} else if (vmap_count > 0) {
+			dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
+				__func__);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		}
+	} else {
+		if (check_state != PT_MAPPED) {
+			dev_dbg(kbdev->dev,
+				"%s: state changed to %d (was %d), abort PGD page migration",
+				__func__, check_state, PT_MAPPED);
+			WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
+			ret = -EAGAIN;
+			goto page_state_change_out;
+		}
+	}
+
+	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd, NULL, NULL);
+	if (ret) {
+		dev_warn(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
+		goto get_pgd_at_level_error;
+	}
+
+	pgd_page = kmap(phys_to_page(pgd));
+	if (!pgd_page) {
+		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
+		ret = -EINVAL;
+		goto pgd_page_map_error;
+	}
+
+	mutex_lock(&kbdev->pm.lock);
+	mutex_lock(&kbdev->mmu_hw_mutex);
+
+	/* Lock MMU region and flush GPU cache by using GPU control,
+	 * in order to keep MMU region locked.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
+		/* Defer the migration as L2 is in a transitional phase */
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+		mutex_unlock(&kbdev->mmu_hw_mutex);
+		mutex_unlock(&kbdev->pm.lock);
+		dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
+		ret = -EAGAIN;
+		goto l2_state_defer_out;
+	}
+	/* Prevent transitional phases in L2 by starting the transaction */
+	mmu_page_migration_transaction_begin(kbdev);
+	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+		int as_nr = mmut->kctx->as_nr;
+		struct kbase_as *as = &kbdev->as[as_nr];
+
+		ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
+		if (!ret) {
+				ret = kbase_gpu_cache_flush_and_busy_wait(
+					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
+		}
+		if (ret)
+			mmu_page_migration_transaction_end(kbdev);
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+	if (ret < 0) {
+		dev_err(kbdev->dev,
+			"%s: failed to lock MMU region or flush GPU cache. Issuing GPU soft-reset to recover.",
+			__func__);
+		goto gpu_reset;
+	}
+
+	/* Copy memory content.
+	 *
+	 * It is necessary to claim the ownership of the DMA buffer for the old
+	 * page before performing the copy, to make sure of reading a consistent
+	 * version of its content, before copying. After the copy, ownership of
+	 * the DMA buffer for the new page is given to the GPU in order to make
+	 * the content visible to potential GPU access that may happen as soon as
+	 * this function releases the lock on the MMU region.
+	 */
+	dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+	memcpy(new_page, old_page, PAGE_SIZE);
+	dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Remap GPU virtual page.
+	 *
+	 * This code rests on the assumption that page migration is only enabled
+	 * for 4 kB pages, that necessarily live in the bottom level of the MMU
+	 * page table. For this reason, the PGD level tells us inequivocably
+	 * whether the page being migrated is a "content page" or another PGD
+	 * of the page table:
+	 *
+	 * - Bottom level implies ATE (Address Translation Entry)
+	 * - Any other level implies PTE (Page Table Entry)
+	 *
+	 * The current implementation doesn't handle the case of a level 0 PGD,
+	 * that is: the root PGD of the page table.
+	 */
+	target = &pgd_page[index];
+
+	/* Certain entries of a page table page encode the count of valid entries
+	 * present in that page. So need to save & restore the count information
+	 * when updating the PTE/ATE to point to the new page.
+	 */
+	num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
+
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		WARN_ON_ONCE((*target & 1UL) == 0);
+		*target =
+			kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
+					     level, page_md->data.mapped.reg->gpu_alloc->group_id);
+	} else {
+		u64 managed_pte;
+
+#ifdef CONFIG_MALI_DEBUG
+		/* The PTE should be pointing to the page being migrated */
+		WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
+			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
+				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
+#endif
+		kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
+		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
+			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
+	}
+
+	kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
+
+	/* This function always updates a single entry inside an existing PGD,
+	 * therefore cache maintenance is necessary and affects a single entry.
+	 */
+	kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
+			   kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
+			   KBASE_MMU_OP_FLUSH_PT);
+
+	/* Unlock MMU region.
+	 *
+	 * Notice that GPUs which don't issue flush commands via GPU control
+	 * still need an additional GPU cache flush here, this time only
+	 * for the page table, because the function call above to sync PGDs
+	 * won't have any effect on them.
+	 */
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+		int as_nr = mmut->kctx->as_nr;
+		struct kbase_as *as = &kbdev->as[as_nr];
+
+		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+			ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
+		} else {
+			ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
+								  GPU_COMMAND_CACHE_CLN_INV_L2);
+			if (!ret)
+				ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
+		}
+	}
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+	/* Releasing locks before checking the migration transaction error state */
+	mutex_unlock(&kbdev->mmu_hw_mutex);
+	mutex_unlock(&kbdev->pm.lock);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
+	/* Release the transition prevention in L2 by ending the transaction */
+	mmu_page_migration_transaction_end(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
+
+	/* Checking the final migration transaction error state */
+	if (ret < 0) {
+		dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
+		goto gpu_reset;
+	}
+
+	/* Undertaking metadata transfer, while we are holding the mmu_lock */
+	spin_lock(&page_md->migrate_lock);
+	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
+		size_t page_array_index =
+			page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
+
+		WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
+
+		/* Replace page in array of pages of the physical allocation. */
+		page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
+	}
+	/* Update the new page dma_addr with the transferred metadata from the old_page */
+	page_md->dma_addr = new_dma_addr;
+	page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
+	spin_unlock(&page_md->migrate_lock);
+	set_page_private(as_page(new_phys), (unsigned long)page_md);
+	/* Old page metatdata pointer cleared as it now owned by the new page */
+	set_page_private(as_page(old_phys), 0);
+
+l2_state_defer_out:
+	kunmap(phys_to_page(pgd));
+pgd_page_map_error:
+get_pgd_at_level_error:
+page_state_change_out:
+	mutex_unlock(&mmut->mmu_lock);
+
+	kunmap(as_page(new_phys));
+new_page_map_error:
+	kunmap(as_page(old_phys));
+old_page_map_error:
+	return ret;
+
+gpu_reset:
+	/* Unlock the MMU table before resetting the GPU and undo
+	 * mappings.
+	 */
+	mutex_unlock(&mmut->mmu_lock);
+	kunmap(phys_to_page(pgd));
+	kunmap(as_page(new_phys));
+	kunmap(as_page(old_phys));
+
+	/* Reset the GPU because of an unrecoverable error in locking or flushing. */
+	if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+		kbase_reset_gpu(kbdev);
+
+	return ret;
+}
+
 static void mmu_teardown_level(struct kbase_device *kbdev,
 		struct kbase_mmu_table *mmut, phys_addr_t pgd,
 		int level)
@@ -2746,12 +3300,14 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
 	u64 *pgd_page_buffer = NULL;
+	bool page_is_isolated = false;
+	struct page *p = phys_to_page(pgd);
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	pgd_page = kmap_atomic(pfn_to_page(PFN_DOWN(pgd)));
+	pgd_page = kmap_atomic(p);
 	/* kmap_atomic should NEVER fail. */
-	if (WARN_ON(pgd_page == NULL))
+	if (WARN_ON_ONCE(pgd_page == NULL))
 		return;
 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
 		/* Copy the page to our preallocated buffer so that we can minimize
@@ -2761,6 +3317,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 		memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
 	}
 
+	/* When page migration is enabled, kbase_region_tracker_term() would ensure
+	 * there are no pages left mapped on the GPU for a context. Hence the count
+	 * of valid entries is expected to be zero here.
+	 */
+	if (kbase_page_migration_enabled && mmut->kctx)
+		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
 	/* Invalidate page after copying */
 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
 	kunmap_atomic(pgd_page);
@@ -2779,7 +3341,12 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 		}
 	}
 
-	kbase_mmu_free_pgd(kbdev, mmut, pgd);
+	/* Top level PGD page is excluded from migration process. */
+	if (level != MIDGARD_MMU_TOPLEVEL)
+		page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
+
+	if (likely(!page_is_isolated))
+		kbase_mmu_free_pgd(kbdev, mmut, pgd);
 }
 
 int kbase_mmu_init(struct kbase_device *const kbdev,
@@ -2836,6 +3403,10 @@ void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
 	int level;
 
+	WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
+	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
+	     mmut->kctx->tgid, mmut->kctx->id);
+
 	if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
 		mutex_lock(&mmut->mmu_lock);
 		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
@@ -2881,7 +3452,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 
 	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
 	if (!pgd_page) {
-		dev_warn(kbdev->dev, "%s: kmap failure\n", __func__);
+		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return 0;
 	}
 
@@ -3035,8 +3606,7 @@ void kbase_mmu_bus_fault_worker(struct work_struct *data)
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	/* check if we still have GPU */
 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
-		dev_dbg(kbdev->dev,
-				"%s: GPU has been removed\n", __func__);
+		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
 		release_ctx(kbdev, kctx);
 		atomic_dec(&kbdev->faults_pending);
 		return;
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 5330306..602a3f9 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -29,6 +29,7 @@
 
 struct kbase_context;
 struct kbase_mmu_table;
+struct kbase_va_region;
 
 /**
  * enum kbase_caller_mmu_sync_info - MMU-synchronous caller info.
@@ -132,24 +133,56 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
 
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int group_id, u64 *dirty_pgds);
-int kbase_mmu_insert_pages(struct kbase_device *kbdev,
-			   struct kbase_mmu_table *mmut, u64 vpfn,
-			   struct tagged_addr *phys, size_t nr,
-			   unsigned long flags, int as_nr, int group_id,
-			   enum kbase_caller_mmu_sync_info mmu_sync_info);
+				    unsigned long flags, int group_id, u64 *dirty_pgds,
+				    struct kbase_va_region *reg, bool ignore_page_migration);
+int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
+			   int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
+			   struct kbase_va_region *reg, bool ignore_page_migration);
 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 				 struct tagged_addr phys, size_t nr,
 				 unsigned long flags, int group_id,
 				 enum kbase_caller_mmu_sync_info mmu_sync_info);
 
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr, int as_nr);
+			     struct tagged_addr *phys, size_t nr, int as_nr,
+			     bool ignore_page_migration);
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
 			   unsigned long flags, int const group_id);
 
 /**
+ * kbase_mmu_migrate_page - Migrate GPU mappings and content between memory pages
+ *
+ * @old_phys:     Old physical page to be replaced.
+ * @new_phys:     New physical page used to replace old physical page.
+ * @old_dma_addr: DMA address of the old page.
+ * @new_dma_addr: DMA address of the new page.
+ * @level:        MMU page table level of the provided PGD.
+ *
+ * The page migration process is made of 2 big steps:
+ *
+ * 1) Copy the content of the old page to the new page.
+ * 2) Remap the virtual page, that is: replace either the ATE (if the old page
+ *    was a regular page) or the PTE (if the old page was used as a PGD) in the
+ *    MMU page table with the new page.
+ *
+ * During the process, the MMU region is locked to prevent GPU access to the
+ * virtual memory page that is being remapped.
+ *
+ * Before copying the content of the old page to the new page and while the
+ * MMU region is locked, a GPU cache flush is performed to make sure that
+ * pending GPU writes are finalized to the old page before copying.
+ * That is necessary because otherwise there's a risk that GPU writes might
+ * be finalized to the old page, and not new page, after migration.
+ * The MMU region is unlocked only at the end of the migration operation.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
+			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level);
+
+/**
  * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
  *
  * @kbdev:       Pointer to the kbase device for which bus fault was reported.
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 09b3fa8..63277bc 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -105,6 +105,22 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
 		struct kbase_as *as);
 
 /**
+ * kbase_mmu_hw_do_lock - Issue LOCK command to the MMU and program
+ *                        the LOCKADDR register.
+ *
+ * @kbdev:     Kbase device to issue the MMU operation on.
+ * @as:        Address space to issue the MMU operation on.
+ * @op_param:  Pointer to struct containing information about the MMU
+ *             operation to perform.
+ *
+ * hwaccess_lock needs to be held when calling this function.
+ *
+ * Return: 0 if issuing the command was successful, otherwise an error code.
+ */
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			 const struct kbase_mmu_hw_op_param *op_param);
+
+/**
  * kbase_mmu_hw_do_unlock_no_addr - Issue UNLOCK command to the MMU without
  *                                  programming the LOCKADDR register and wait
  *                                  for it to complete before returning.
@@ -114,6 +130,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev,
  * @op_param:  Pointer to struct containing information about the MMU
  *             operation to perform.
  *
+ * This function should be called for GPU where GPU command is used to flush
+ * the cache(s) instead of MMU command.
+ *
  * Return: 0 if issuing the command was successful, otherwise an error code.
  */
 int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
@@ -145,7 +164,7 @@ int kbase_mmu_hw_do_unlock(struct kbase_device *kbdev, struct kbase_as *as,
  * GPUs where MMU command to flush the cache(s) is deprecated.
  * mmu_hw_mutex needs to be held when calling this function.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 			  const struct kbase_mmu_hw_op_param *op_param);
@@ -164,7 +183,7 @@ int kbase_mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
  * Both mmu_hw_mutex and hwaccess_lock need to be held when calling this
  * function.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as,
 				 const struct kbase_mmu_hw_op_param *op_param);
@@ -181,7 +200,7 @@ int kbase_mmu_hw_do_flush_locked(struct kbase_device *kbdev, struct kbase_as *as
  * specified inside @op_param. GPU command is used to flush the cache(s)
  * instead of the MMU command.
  *
- * Return: Zero if the operation was successful, non-zero otherwise.
+ * Return: 0 if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_hw_do_flush_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_as *as,
 				      const struct kbase_mmu_hw_op_param *op_param);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 527588e..68bc697 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -424,6 +424,14 @@ static int mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
 	return ret;
 }
 
+int kbase_mmu_hw_do_lock(struct kbase_device *kbdev, struct kbase_as *as,
+			 const struct kbase_mmu_hw_op_param *op_param)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return mmu_hw_do_lock(kbdev, as, op_param);
+}
+
 int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *as,
 				   const struct kbase_mmu_hw_op_param *op_param)
 {
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index c4c713c..3f68efa 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,7 @@
  */
 
 #include <kutf/kutf_suite.h>
+#include <linux/device.h>
 
 /**
  * kutf_helper_pending_input() - Check any pending lines sent by user space
@@ -81,4 +82,28 @@ int kutf_helper_input_enqueue(struct kutf_context *context,
  */
 void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
 
+/**
+ * kutf_helper_ignore_dmesg() - Write message in dmesg to instruct parser
+ *                              to ignore errors, until the counterpart
+ *                              is written to dmesg to stop ignoring errors.
+ * @dev:  Device pointer to write to dmesg using.
+ *
+ * This function writes "Start ignoring dmesg warnings" to dmesg, which
+ * the parser will read and not log any errors. Only to be used in cases where
+ * we expect an error to be produced in dmesg but that we do not want to be
+ * flagged as an error.
+ */
+void kutf_helper_ignore_dmesg(struct device *dev);
+
+/**
+ * kutf_helper_stop_ignoring_dmesg() - Write message in dmesg to instruct parser
+ *                                     to stop ignoring errors.
+ * @dev:  Device pointer to write to dmesg using.
+ *
+ * This function writes "Stop ignoring dmesg warnings" to dmesg, which
+ * the parser will read and continue to log any errors. Counterpart to
+ * kutf_helper_ignore_dmesg().
+ */
+void kutf_helper_stop_ignoring_dmesg(struct device *dev);
+
 #endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index d207d1c..4273619 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -127,3 +127,15 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
 {
 	kutf_helper_input_enqueue(context, NULL, 0);
 }
+
+void kutf_helper_ignore_dmesg(struct device *dev)
+{
+	dev_info(dev, "KUTF: Start ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_ignore_dmesg);
+
+void kutf_helper_stop_ignoring_dmesg(struct device *dev)
+{
+	dev_info(dev, "KUTF: Stop ignoring dmesg warnings\n");
+}
+EXPORT_SYMBOL(kutf_helper_stop_ignoring_dmesg);
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 644d69b..359d063 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -30,6 +30,11 @@
 #include <linux/version_compat_defs.h>
 #include <linux/anon_inodes.h>
 
+/* Explicitly include epoll header for old kernels. Not required from 4.16. */
+#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
+#include <uapi/linux/eventpoll.h>
+#endif
+
 /* The timeline stream file operations functions. */
 static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
 				       size_t size, loff_t *f_pos);
author	Jörg Wagner <jorwag@google.com>	2022-12-15 16:21:51 +0000
committer	Jörg Wagner <jorwag@google.com>	2022-12-15 16:28:12 +0000
commit	25e383ffa36a9916065804029fbe3552c71329fe (patch)
tree	1fd24ee61cf42115c75121f9de544814c76cb5a7
parent	9ff5b6f2510d94765def3cf7c1fda01e387cabab (diff)
download	gpu-25e383ffa36a9916065804029fbe3552c71329fe.tar.gz