Mali Valhall Android DDK r36p0 KMD

Provenance: 9f72c118d9 (ipdelivery/EAC/v_r36p0) VX504X08X-BU-00000-r36p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r36p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r36p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r36p0-01eac0 - Valhall Android Renderscript AOSP parts Signed-off-by: Siddharth Kapoor <ksiddharth@google.com> Change-Id: I4a63b707fedc68d7b7d046596c7098da47a139cb
author: Siddharth Kapoor <ksiddharth@google.com> 2022-03-02 14:51:29 +0800
committer: Siddharth Kapoor <ksiddharth@google.com> 2022-03-02 14:51:29 +0800
commit: 88d7d984fed1c2a4358ce2bbc334e82d71e3a391 (patch)
tree: 18f20402a0ed15ae9fe62b29a9957922ebcc2ada /mali_kbase
parent: 0207d6c3b7a2002f15c60d08617e956faf5ba90c (diff)
download: gpu-88d7d984fed1c2a4358ce2bbc334e82d71e3a391.tar.gz
184 files changed, 10059 insertions, 3991 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index afc0f83..c42e087 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -71,7 +71,7 @@ endif
 #
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r35p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r36p0-01eac0"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_DEBUG), y)
     MALI_UNIT_TEST = 1
@@ -165,6 +165,7 @@ mali_kbase-y := \
     mali_kbase_hwcnt_gpu_narrow.o \
     mali_kbase_hwcnt_types.o \
     mali_kbase_hwcnt_virtualizer.o \
+    mali_kbase_hwcnt_watchdog_if_timer.o \
     mali_kbase_softjobs.o \
     mali_kbase_hw.o \
     mali_kbase_debug.o \
@@ -202,12 +203,12 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \
 ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
     mali_kbase-y += \
         mali_kbase_hwcnt_backend_csf.o \
-        mali_kbase_hwcnt_watchdog_if_timer.o \
         mali_kbase_hwcnt_backend_csf_if_fw.o
 else
     mali_kbase-y += \
         mali_kbase_jm.o \
         mali_kbase_hwcnt_backend_jm.o \
+        mali_kbase_hwcnt_backend_jm_watchdog.o \
         mali_kbase_dummy_job_wa.o \
         mali_kbase_debug_job_fault.o \
         mali_kbase_event.o \
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index 1b66978..0f8f273 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -47,6 +47,14 @@ config MALI_REAL_HW
 	default y
 	default n if NO_MALI
 
+config MALI_PLATFORM_DT_PIN_RST
+	bool "Enable Juno GPU Pin reset"
+	depends on MALI_MIDGARD
+	default n
+	default y if BUSLOG
+	help
+	  Enables support for GPUs pin reset on Juno platforms.
+
 config MALI_CSF_SUPPORT
 	bool "Enable Mali CSF based GPU support"
 	depends on MALI_MIDGARD
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
index 65cfc7b..1c4901b 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Mali structures define to support arbitration feature
+ * DOC: Mali structures define to support arbitration feature
  */
 
 #ifndef _MALI_KBASE_ARBITER_DEFS_H_
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
index 3c60878..a0ca1cc 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_interface.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,14 +20,14 @@
  */
 
 /**
- * Defines the Mali arbiter interface
+ * DOC: Defines the Mali arbiter interface
  */
 
 #ifndef _MALI_KBASE_ARBITER_INTERFACE_H_
 #define _MALI_KBASE_ARBITER_INTERFACE_H_
 
 /**
- *  Mali arbiter interface version
+ * DOC: Mali arbiter interface version
  *
  * This specifies the current version of the configuration interface. Whenever
  * the arbiter interface changes, so that integration effort is required, the
@@ -44,7 +44,7 @@
 #define MALI_KBASE_ARBITER_INTERFACE_VERSION 5
 
 /**
- * NO_FREQ is used in case platform doesn't support reporting frequency
+ * DOC: NO_FREQ is used in case platform doesn't support reporting frequency
  */
 #define NO_FREQ 0
 
@@ -53,14 +53,6 @@ struct arbiter_if_dev;
 /**
  * struct arbiter_if_arb_vm_ops - Interface to communicate messages to VM
  *
- * This struct contains callbacks used to deliver messages
- * from the arbiter to the corresponding VM.
- *
- * Note that calls into these callbacks may have synchronous calls back into
- * the arbiter arbiter_if_vm_arb_ops callbacks below.
- * For example vm_arb_gpu_stopped() may be called as a side effect of
- * arb_vm_gpu_stop() being called here.
- *
  * @arb_vm_gpu_stop: Callback to ask VM to stop using GPU.
  *                   dev: The arbif kernel module device.
  *
@@ -94,6 +86,13 @@ struct arbiter_if_dev;
  *                      freq: GPU clock frequency value reported from arbiter
  *
  *                      Informs KBase that the GPU clock frequency has been updated.
+ *
+ * This struct contains callbacks used to deliver messages
+ * from the arbiter to the corresponding VM.
+ * Note that calls into these callbacks may have synchronous calls back into
+ * the arbiter arbiter_if_vm_arb_ops callbacks below.
+ * For example vm_arb_gpu_stopped() may be called as a side effect of
+ * arb_vm_gpu_stop() being called here.
  */
 struct arbiter_if_arb_vm_ops {
 	void (*arb_vm_gpu_stop)(struct device *dev);
@@ -107,12 +106,6 @@ struct arbiter_if_arb_vm_ops {
 /**
  * struct arbiter_if_vm_arb_ops - Interface to communicate messages to arbiter
  *
- * This struct contains callbacks used to request operations
- * from the VM to the arbiter
- *
- * Note that we must not make any synchronous calls back in to the VM
- * (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
- *
  * @vm_arb_register_dev: Callback to register VM device driver callbacks.
  *                       arbif_dev: The arbiter interface to register
  *                                  with for device callbacks
@@ -142,6 +135,11 @@ struct arbiter_if_arb_vm_ops {
  *                      using the GPU
  *                      arbif_dev: The arbiter interface device to notify.
  *                      gpu_required: The GPU is still needed to do more work.
+ *
+ * This struct contains callbacks used to request operations
+ * from the VM to the arbiter.
+ * Note that we must not make any synchronous calls back in to the VM
+ * (via arbiter_if_arb_vm_ops above) in the context of these callbacks.
  */
 struct arbiter_if_vm_arb_ops {
 	int (*vm_arb_register_dev)(struct arbiter_if_dev *arbif_dev,
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index 5425f2b..d813a04 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Mali arbiter power manager state machine and APIs
+ * DOC: Mali arbiter power manager state machine and APIs
  */
 
 #include <mali_kbase.h>
@@ -394,6 +394,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Install interrupts and set the interrupt_install flag to true.
+ *
+ * Return: 0 if success, or a Linux error code
  */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev)
 {
@@ -619,18 +621,6 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 	case KBASE_VM_STATE_SUSPEND_PENDING:
 		/* Suspend finishes with a stop so nothing else to do */
 		break;
-	case KBASE_VM_STATE_INITIALIZING:
-	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
-		/*
-		 * Case stop() is received when in a GPU REQUESTED state, it
-		 * means that the granted() was missed so the GPU needs to be
-		 * requested again.
-		 */
-		dev_dbg(kbdev->dev,
-			"GPU stop while already stopped with GPU requested");
-		kbase_arbif_gpu_stopped(kbdev, true);
-		start_request_timer(kbdev);
-		break;
 	default:
 		dev_warn(kbdev->dev, "GPU_STOP when not expected - state %s\n",
 			kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state));
@@ -668,19 +658,8 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 		break;
 	case KBASE_VM_STATE_SUSPENDED:
 	case KBASE_VM_STATE_STOPPED:
-		dev_dbg(kbdev->dev, "GPU lost while already stopped");
-		break;
-	case KBASE_VM_STATE_INITIALIZING:
 	case KBASE_VM_STATE_STOPPED_GPU_REQUESTED:
-		/*
-		 * Case lost() is received when in a GPU REQUESTED state, it
-		 * means that the granted() and stop() were missed so the GPU
-		 * needs to be requested again. Very unlikely to happen.
-		 */
-		dev_dbg(kbdev->dev,
-			"GPU lost while already stopped with GPU requested");
-		kbase_arbif_gpu_request(kbdev);
-		start_request_timer(kbdev);
+		dev_dbg(kbdev->dev, "GPU lost while already stopped");
 		break;
 	case KBASE_VM_STATE_SUSPEND_WAIT_FOR_GRANT:
 		dev_dbg(kbdev->dev, "GPU lost while waiting to suspend");
@@ -947,6 +926,8 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Checks if the virtual machine holds VM state lock.
+ *
+ * Return: true if GPU is assigned, else false.
  */
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev)
@@ -1067,14 +1048,14 @@ void kbase_arbiter_pm_update_gpu_freq(struct kbase_arbiter_freq *arb_freq,
 }
 
 /**
- * enumerate_arb_gpu_clk() - Enumerate a GPU clock on the given index
+ * get_arb_gpu_clk() - Enumerate a GPU clock on the given index
  * @kbdev: kbase_device pointer
  * @index: GPU clock index
  *
- * Returns pointer to structure holding GPU clock frequency data reported from
+ * Return: Pointer to structure holding GPU clock frequency data reported from
  * arbiter, only index 0 is valid.
  */
-static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
+static void *get_arb_gpu_clk(struct kbase_device *kbdev,
 		unsigned int index)
 {
 	if (index == 0)
@@ -1084,10 +1065,10 @@ static void *enumerate_arb_gpu_clk(struct kbase_device *kbdev,
 
 /**
  * get_arb_gpu_clk_rate() - Get the current rate of GPU clock frequency value
- * @kbdev: kbase_device pointer
- * @index: GPU clock index
+ * @kbdev:          kbase_device pointer
+ * @gpu_clk_handle: Handle unique to the enumerated GPU clock
  *
- * Returns the GPU clock frequency value saved when gpu is granted from arbiter
+ * Return: The GPU clock frequency value saved when gpu is granted from arbiter
  */
 static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
 		void *gpu_clk_handle)
@@ -1109,10 +1090,10 @@ static unsigned long get_arb_gpu_clk_rate(struct kbase_device *kbdev,
  * @gpu_clk_handle:  Handle unique to the enumerated GPU clock
  * @nb:              notifier block containing the callback function pointer
  *
- * Returns 0 on success, negative error code otherwise.
- *
  * This function registers a callback function that is invoked whenever the
  * frequency of the clock corresponding to @gpu_clk_handle changes.
+ *
+ * Return: 0 on success, negative error code otherwise.
  */
 static int arb_gpu_clk_notifier_register(struct kbase_device *kbdev,
 	void *gpu_clk_handle, struct notifier_block *nb)
@@ -1154,7 +1135,7 @@ static void arb_gpu_clk_notifier_unregister(struct kbase_device *kbdev,
 
 struct kbase_clk_rate_trace_op_conf arb_clk_rate_trace_ops = {
 	.get_gpu_clk_rate = get_arb_gpu_clk_rate,
-	.enumerate_gpu_clk = enumerate_arb_gpu_clk,
+	.enumerate_gpu_clk = get_arb_gpu_clk,
 	.gpu_clk_notifier_register = arb_gpu_clk_notifier_register,
 	.gpu_clk_notifier_unregister = arb_gpu_clk_notifier_unregister
 };
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
index 091b431..f863f88 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Mali arbiter power manager state machine and APIs
+ * DOC: Mali arbiter power manager state machine and APIs
  */
 
 #ifndef _MALI_KBASE_ARBITER_PM_H_
@@ -101,6 +101,8 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev);
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Install interrupts and set the interrupt_install flag to true.
+ *
+ * Return: 0 if success, or a Linux error code
  */
 int kbase_arbiter_pm_install_interrupts(struct kbase_device *kbdev);
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index a7110b3..00b32b9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -155,15 +155,16 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 	/* Regulators and clocks work in pairs: every clock has a regulator,
 	 * and we never expect to have more regulators than clocks.
 	 *
-	 * We always need to increase the voltage before increasing
-	 * the frequency of a regulator/clock pair, otherwise the clock
-	 * wouldn't have enough power to perform the transition.
+	 * We always need to increase the voltage before increasing the number
+	 * of shader cores and the frequency of a regulator/clock pair,
+	 * otherwise the clock wouldn't have enough power to perform
+	 * the transition.
 	 *
-	 * It's always safer to decrease the frequency before decreasing
-	 * voltage of a regulator/clock pair, otherwise the clock could have
-	 * problems operating if it is deprived of the necessary power
-	 * to sustain its current frequency (even if that happens for a short
-	 * transition interval).
+	 * It's always safer to decrease the number of shader cores and
+	 * the frequency before decreasing voltage of a regulator/clock pair,
+	 * otherwise the clock could have problematic operation if it is
+	 * deprived of the necessary power to sustain its current frequency
+	 * (even if that happens for a short transition interval).
 	 */
 	for (i = 0; i < kbdev->nr_clocks; i++) {
 		if (kbdev->regulators[i] &&
@@ -202,6 +203,8 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 		}
 	}
 
+	kbase_devfreq_set_core_mask(kbdev, core_mask);
+
 #if IS_ENABLED(CONFIG_REGULATOR)
 	for (i = 0; i < kbdev->nr_clocks; i++) {
 		if (kbdev->regulators[i] &&
@@ -222,8 +225,6 @@ kbase_devfreq_target(struct device *dev, unsigned long *target_freq, u32 flags)
 	}
 #endif
 
-	kbase_devfreq_set_core_mask(kbdev, core_mask);
-
 	*target_freq = nominal_freq;
 	kbdev->current_nominal_freq = nominal_freq;
 	kbdev->current_core_mask = core_mask;
diff --git a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
index 268a888..0ea14bc 100644
--- a/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_gpuprops_backend.c
@@ -33,7 +33,7 @@ int kbase_backend_gpuprops_get(struct kbase_device *kbdev,
 					struct kbase_gpuprops_regdump *regdump)
 {
 	int i;
-	struct kbase_gpuprops_regdump registers;
+	struct kbase_gpuprops_regdump registers = { 0 };
 
 	/* Fill regdump with the content of the relevant registers */
 	registers.gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 1691a87..0ece571 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -421,12 +421,12 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
 #ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 /* Use the build time option for the override default. */
 #if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
 #else
 	/* Default to primary */
-	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
 #endif
 #endif
 	return 0;
@@ -446,8 +446,8 @@ void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
 	 *
 	 * Valid inputs are the values accepted bythe SET_SELECT bits of the
 	 * PRFCNT_CONFIG register as defined in the architecture specification.
-	*/
-	debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
+	 */
+	debugfs_create_u8("hwcnt_set_select", 0644,
 			  kbdev->mali_debugfs_directory,
 			  (u8 *)&kbdev->hwcnt.backend.override_counter_set);
 }
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
index 3ce3903..136aa52 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -48,6 +48,7 @@ struct rb_entry {
 /**
  * SLOT_RB_TAG_KCTX() - a function-like macro for converting a pointer to a
  *			u64 for serving as tagged value.
+ * @kctx: Pointer to kbase context.
  */
 #define SLOT_RB_TAG_KCTX(kctx) (u64)((uintptr_t)(kctx))
 /**
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index ec3b906..526c720 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -425,6 +425,8 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 					JOB_SLOT_REG(i, JS_STATUS));
 
 				if (completion_code == BASE_JD_EVENT_STOPPED) {
+					u64 job_head;
+
 					KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(
 						kbdev, NULL,
 						i, 0, TL_JS_EVENT_SOFT_STOP);
@@ -441,6 +443,21 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 						((u64)kbase_reg_read(kbdev,
 						JOB_SLOT_REG(i, JS_TAIL_HI))
 						 << 32);
+					job_head = (u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_HEAD_LO)) |
+						((u64)kbase_reg_read(kbdev,
+						JOB_SLOT_REG(i, JS_HEAD_HI))
+						 << 32);
+					/* For a soft-stopped job chain js_tail should
+					 * same as the js_head, but if not then the
+					 * job chain was incorrectly marked as
+					 * soft-stopped. In such case we should not
+					 * be resuming the job chain from js_tail and
+					 * report the completion_code as UNKNOWN.
+					 */
+					if (job_tail != job_head)
+						completion_code = BASE_JD_EVENT_UNKNOWN;
+
 				} else if (completion_code ==
 						BASE_JD_EVENT_NOT_STARTED) {
 					/* PRLAM-10673 can cause a TERMINATED
@@ -922,33 +939,12 @@ void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 			JS_COMMAND_SOFT_STOP | sw_flags);
 }
 
-/**
- * kbase_job_slot_softstop - Soft-stop the specified job slot
- * @kbdev:         The kbase device
- * @js:            The job slot to soft-stop
- * @target_katom:  The job that should be soft-stopped (or NULL for any job)
- * Context:
- *   The job slot lock must be held when calling this function.
- *   The job slot must not already be in the process of being soft-stopped.
- *
- * Where possible any job in the next register is evicted before the soft-stop.
- */
 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 				struct kbase_jd_atom *target_katom)
 {
 	kbase_job_slot_softstop_swflags(kbdev, js, target_katom, 0u);
 }
 
-/**
- * kbase_job_slot_hardstop - Hard-stop the specified job slot
- * @kctx:         The kbase context that contains the job(s) that should
- *                be hard-stopped
- * @js:           The job slot to hard-stop
- * @target_katom: The job that should be hard-stopped (or NULL for all
- *                jobs from the context)
- * Context:
- *   The job slot lock must be held when calling this function.
- */
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 				struct kbase_jd_atom *target_katom)
 {
@@ -961,26 +957,6 @@ void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 	CSTD_UNUSED(stopped);
 }
 
-/**
- * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
- * @kbdev: kbase device
- * @action: the event which has occurred
- * @core_reqs: core requirements of the atom
- * @target_katom: the atom which is being affected
- *
- * For a certain soft-stop action, work out whether to enter disjoint
- * state.
- *
- * This does not register multiple disjoint events if the atom has already
- * started a disjoint period
- *
- * @core_reqs can be supplied as 0 if the atom had not started on the hardware
- * (and so a 'real' soft/hard-stop was not required, but it still interrupted
- * flow, perhaps on another context)
- *
- * kbase_job_check_leave_disjoint() should be used to end the disjoint
- * state when the soft/hard-stop action is complete
- */
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom)
 {
@@ -1002,14 +978,6 @@ void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 	kbase_disjoint_state_up(kbdev);
 }
 
-/**
- * kbase_job_check_enter_disjoint - potentially leave disjoint state
- * @kbdev: kbase device
- * @target_katom: atom which is finishing
- *
- * Work out whether to leave disjoint state when finishing an atom that was
- * originated by kbase_job_check_enter_disjoint().
- */
 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 		struct kbase_jd_atom *target_katom)
 {
@@ -1340,8 +1308,7 @@ static void kbasep_try_reset_gpu_early(struct kbase_device *kbdev)
  * This function soft-stops all the slots to ensure that as many jobs as
  * possible are saved.
  *
- * Return:
- *   The function returns a boolean which should be interpreted as follows:
+ * Return: boolean which should be interpreted as follows:
  *   true - Prepared for reset, kbase_reset_gpu_locked should be called.
  *   false - Another thread is performing a reset, kbase_reset_gpu should
  *   not be called.
@@ -1518,9 +1485,9 @@ static u64 kbasep_apply_limited_core_mask(const struct kbase_device *kbdev,
 #ifdef CONFIG_MALI_DEBUG
 	dev_dbg(kbdev->dev,
 				"Limiting affinity due to BASE_JD_REQ_LIMITED_CORE_MASK from 0x%lx to 0x%lx (mask is 0x%lx)\n",
-				(unsigned long int)affinity,
-				(unsigned long int)result,
-				(unsigned long int)limited_core_mask);
+				(unsigned long)affinity,
+				(unsigned long)result,
+				(unsigned long)limited_core_mask);
 #else
 	CSTD_UNUSED(kbdev);
 #endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index 0f2f296..e1a298b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,14 +37,23 @@
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-/* Return whether the specified ringbuffer is empty. HW access lock must be
- * held
+/**
+ * SLOT_RB_EMPTY - Return whether the specified ringbuffer is empty.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
  */
 #define SLOT_RB_EMPTY(rb)   (rb->write_idx == rb->read_idx)
-/* Return number of atoms currently in the specified ringbuffer. HW access lock
- * must be held
+
+/**
+ * SLOT_RB_ENTRIES - Return number of atoms currently in the specified ringbuffer.
+ *
+ * @rb: ring buffer
+ *
+ * Note: HW access lock must be held
  */
-#define SLOT_RB_ENTRIES(rb) (int)(s8)(rb->write_idx - rb->read_idx)
+#define SLOT_RB_ENTRIES(rb) ((int)(s8)(rb->write_idx - rb->read_idx))
 
 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom,
@@ -304,10 +313,10 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 				[katom->slot_nr]);
 
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_READY:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_FOR_CORE_AVAILABLE:
 		break;
 
@@ -367,13 +376,13 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		}
 
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_PROTECTED_MODE_PREV:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_WAITING_BLOCKED:
 		/* ***FALLTHROUGH: TRANSITION TO LOWER STATE*** */
-
+		fallthrough;
 	case KBASE_ATOM_GPU_RB_RETURN_TO_JS:
 		break;
 	}
@@ -1813,7 +1822,7 @@ void kbase_gpu_dump_slots(struct kbase_device *kbdev)
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	dev_info(kbdev->dev, "kbase_gpu_dump_slots:\n");
+	dev_info(kbdev->dev, "%s:\n", __func__);
 
 	for (js = 0; js < kbdev->gpu_props.num_job_slots; js++) {
 		int idx;
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_internal.h b/mali_kbase/backend/gpu/mali_kbase_js_internal.h
index 5284288..4f7c371 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_js_internal.h
@@ -62,7 +62,7 @@ void kbase_backend_timer_suspend(struct kbase_device *kbdev);
  *                              scheduling timer
  * @kbdev: Device pointer
  *
- * This function should be called on resume. Note that is is not guaranteed to
+ * This function should be called on resume. Note that is not guaranteed to
  * re-start the timer, only evalute whether it should be re-started.
  *
  * Caller must hold runpool_mutex.
diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
index c2d7a26..9ce5075 100644
--- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -121,9 +121,9 @@ int kbase_set_mmu_quirks(struct kbase_device *kbdev)
 
 	if (kbdev->system_coherency == COHERENCY_ACE) {
 		/* Allow memory configuration disparity to be ignored,
-		* we optimize the use of shared memory and thus we
-		* expect some disparity in the memory configuration.
-		*/
+		 * we optimize the use of shared memory and thus we
+		 * expect some disparity in the memory configuration.
+		 */
 		kbdev->hw_quirks_mmu |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
 	}
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index ccf0e7c..603ffcf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1470,9 +1470,8 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
 		pr_debug("JS_IRQ_MASK being read %x", *value);
 	}
 #else /* !MALI_USE_CSF */
-	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK)) {
-		/* ignore JOB_IRQ_MASK as it is handled by CSFFW */
-	}
+	else if (addr == JOB_CONTROL_REG(JOB_IRQ_MASK))
+		; /* ignore JOB_IRQ_MASK as it is handled by CSFFW */
 #endif /* !MALI_USE_CSF */
 	else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
 		*value = (dummy->reset_completed_mask << 8) |
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
index e092134..87690f4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -143,7 +143,6 @@ void midgard_model_destroy(void *h);
 u8 midgard_model_write_reg(void *h, u32 addr, u32 value);
 u8 midgard_model_read_reg(void *h, u32 addr,
 							u32 * const value);
-void gpu_generate_error(void);
 void midgard_set_error(int job_slot);
 int job_atom_inject_error(struct kbase_error_params *params);
 int gpu_model_control(void *h,
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
index dfa7f62..3440460 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_error_generator.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,7 +39,11 @@ unsigned int error_probability = 50;	/* to be set between 0 and 100 */
 /* probability to have multiple error give that there is an error */
 unsigned int multiple_error_probability = 50;
 
-void gpu_generate_error(void)
+#ifdef CONFIG_MALI_ERROR_INJECT_RANDOM
+/**
+ * gpu_generate_error - Generate GPU error
+ */
+static void gpu_generate_error(void)
 {
 	unsigned int errors_num = 0;
 
@@ -94,6 +98,7 @@ void gpu_generate_error(void)
 		}
 	}
 }
+#endif
 
 int job_atom_inject_error(struct kbase_error_params *params)
 {
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index ed5d4ce..7887cb2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015, 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -135,8 +135,12 @@ void gpu_device_raise_irq(void *model,
 	default:
 		dev_warn(kbdev->dev, "Unknown IRQ");
 		kmem_cache_free(kbdev->irq_slab, data);
+		data = NULL;
+		break;
 	}
-	queue_work(kbdev->irq_workq, &data->work);
+
+	if (data != NULL)
+		queue_work(kbdev->irq_workq, &data->work);
 }
 
 void kbase_reg_write(struct kbase_device *kbdev, u32 offset, u32 value)
@@ -248,6 +252,11 @@ int kbase_gpu_device_create(struct kbase_device *kbdev)
 	return 0;
 }
 
+/**
+ * kbase_gpu_device_destroy - Destroy GPU device
+ *
+ * @kbdev: kbase device
+ */
 void kbase_gpu_device_destroy(struct kbase_device *kbdev)
 {
 	midgard_model_destroy(kbdev->model);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c
index 077c234..bbf6290 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_always_on.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2015, 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,11 @@ static void always_on_init(struct kbase_device *kbdev)
 	CSTD_UNUSED(kbdev);
 }
 
+/**
+ * always_on_term - Term callback function for always-on power policy
+ *
+ * @kbdev: kbase device
+ */
 static void always_on_term(struct kbase_device *kbdev)
 {
 	CSTD_UNUSED(kbdev);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index 8711a6c..2d52eca 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -101,9 +101,8 @@ int kbase_pm_runtime_init(struct kbase_device *kbdev)
 
 void kbase_pm_runtime_term(struct kbase_device *kbdev)
 {
-	if (kbdev->pm.callback_power_runtime_term) {
+	if (kbdev->pm.callback_power_runtime_term)
 		kbdev->pm.callback_power_runtime_term(kbdev);
-	}
 }
 
 void kbase_pm_register_access_enable(struct kbase_device *kbdev)
@@ -202,6 +201,13 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 		kbase_pm_hwcnt_disable_worker);
 	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
 
+#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
+	kbdev->pm.backend.gpu_sleep_supported =
+		kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_GPU_SLEEP) &&
+		!kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TURSEHW_1997) &&
+		kbdev->pm.backend.callback_power_runtime_gpu_active &&
+		kbdev->pm.backend.callback_power_runtime_gpu_idle;
+#endif
 
 	if (IS_ENABLED(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED)) {
 		kbdev->pm.backend.l2_always_on = false;
@@ -288,7 +294,7 @@ static void pm_handle_power_off(struct kbase_device *kbdev)
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 #if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
-	if (kbdev->pm.backend.gpu_wakeup_override ) {
+	if (kbdev->pm.backend.gpu_wakeup_override) {
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 		return;
 	}
@@ -362,11 +368,6 @@ static void kbase_pm_gpu_poweroff_wait_wq(struct work_struct *data)
 
 	kbase_pm_lock(kbdev);
 
-#ifdef CONFIG_MALI_ARBITER_SUPPORT
-	if (kbase_pm_is_gpu_lost(kbdev))
-		backend->poweron_required = false;
-#endif
-
 	pm_handle_power_off(kbdev);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -683,6 +684,13 @@ void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
 }
 KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
 
+/**
+ * is_gpu_powered_down - Check whether GPU is powered down
+ *
+ * @kbdev: kbase device
+ *
+ * Return: true if GPU is powered down, false otherwise
+ */
 static bool is_gpu_powered_down(struct kbase_device *kbdev)
 {
 	bool ret;
@@ -882,7 +890,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 	lockdep_assert_held(&kbdev->pm.lock);
 
 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_warn(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
+		dev_warn_once(kbdev->dev, "Change of core mask not supported for slot 0 as dummy job WA is enabled");
 		new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
 	}
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index 803ba4d..7d14be9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -55,6 +55,9 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 {
 	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
 	unsigned long flags;
+#if MALI_USE_CSF
+	u64 old_core_mask = 0;
+#endif
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
@@ -65,6 +68,8 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 			core_mask, kbdev->pm.debug_core_mask);
 		goto unlock;
 	}
+
+	old_core_mask = pm_backend->ca_cores_enabled;
 #else
 	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
 		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
@@ -73,20 +78,53 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 	}
 
 	if (kbase_dummy_job_wa_enabled(kbdev)) {
-		dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
+		dev_err_once(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
 		goto unlock;
 	}
 #endif /* MALI_USE_CSF */
-
 	pm_backend->ca_cores_enabled = core_mask;
 
 	kbase_pm_update_state(kbdev);
-
-unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
+#if MALI_USE_CSF
+	/* Check if old_core_mask contained the undesired cores and wait
+	 * for those cores to get powered down
+	 */
+	if ((core_mask & old_core_mask) != old_core_mask) {
+		bool can_wait;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		can_wait = kbdev->pm.backend.gpu_ready && kbase_pm_is_mcu_desired(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+		/* This check is ideally not required, the wait function can
+		 * deal with the GPU power down. But it has been added to
+		 * address the scenario where down-scaling request comes from
+		 * the platform specific code soon after the GPU power down
+		 * and at the time same time application thread tries to
+		 * power up the GPU (on the flush of GPU queue).
+		 * The platform specific @ref callback_power_on that gets
+		 * invoked on power up does not return until down-scaling
+		 * request is complete. The check mitigates the race caused by
+		 * the problem in platform specific code.
+		 */
+		if (likely(can_wait)) {
+			if (kbase_pm_wait_for_desired_state(kbdev)) {
+				dev_warn(kbdev->dev,
+					 "Wait for update of core_mask from %llx to %llx failed",
+					 old_core_mask, core_mask);
+			}
+		}
+	}
+#endif
+
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
 			pm_backend->ca_cores_enabled);
+
+	return;
+unlock:
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
 #endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 81c922f..240c31a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -101,6 +101,8 @@ static u64 kbase_pm_get_state(
 		enum kbase_pm_core_type core_type,
 		enum kbasep_pm_action action);
 
+static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev);
+
 #if MALI_USE_CSF
 bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev)
 {
@@ -655,6 +657,35 @@ static void kbase_pm_enable_mcu_db_notification(struct kbase_device *kbdev)
 }
 #endif
 
+
+/**
+ * kbasep_pm_toggle_power_interrupt - Toggles the IRQ mask for power interrupts
+ *                                    from the firmware
+ *
+ * @kbdev:  Pointer to the device
+ * @enable: boolean indicating to enable interrupts or not
+ *
+ * The POWER_CHANGED_ALL and POWER_CHANGED_SINGLE interrupts can be disabled
+ * after L2 has been turned on when FW is controlling the power for the shader
+ * cores. Correspondingly, the interrupts can be re-enabled after the MCU has
+ * been disabled before the power down of L2.
+ */
+static void kbasep_pm_toggle_power_interrupt(struct kbase_device *kbdev, bool enable)
+{
+	u32 irq_mask;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+
+	if (enable)
+		irq_mask |= POWER_CHANGED_ALL | POWER_CHANGED_SINGLE;
+	else
+		irq_mask &= ~(POWER_CHANGED_ALL | POWER_CHANGED_SINGLE);
+
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), irq_mask);
+}
+
 static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -698,6 +729,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 					kbase_pm_ca_get_core_mask(kbdev);
 				kbase_csf_firmware_global_reinit(kbdev,
 					backend->shaders_desired_mask);
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, false);
 				backend->mcu_state =
 					KBASE_MCU_ON_GLB_REINIT_PEND;
 			}
@@ -906,6 +939,8 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 		case KBASE_MCU_PEND_OFF:
 			/* wait synchronously for the MCU to get disabled */
 			kbase_csf_firmware_disable_mcu_wait(kbdev);
+			if (!kbdev->csf.firmware_hctl_core_pwr)
+				kbasep_pm_toggle_power_interrupt(kbdev, true);
 			backend->mcu_state = KBASE_MCU_OFF;
 			break;
 #ifdef KBASE_PM_RUNTIME
@@ -924,6 +959,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 				backend->mcu_state = KBASE_MCU_IN_SLEEP;
 				kbase_pm_enable_db_mirror_interrupt(kbdev);
 				kbase_csf_scheduler_reval_idleness_post_sleep(kbdev);
+				/* Enable PM interrupt, after MCU has been put
+				 * to sleep, for the power down of L2.
+				 */
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, true);
 			}
 			break;
 
@@ -934,6 +974,11 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 					kbdev, kbase_backend_get_cycle_cnt(kbdev));
 				kbase_pm_enable_mcu_db_notification(kbdev);
 				kbase_pm_disable_db_mirror_interrupt(kbdev);
+				/* Disable PM interrupt after L2 has been
+				 * powered up for the wakeup of MCU.
+				 */
+				if (!kbdev->csf.firmware_hctl_core_pwr)
+					kbasep_pm_toggle_power_interrupt(kbdev, false);
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
 			}
 			break;
@@ -1017,6 +1062,18 @@ static void kbase_pm_l2_clear_backend_slot_submit_kctx(struct kbase_device *kbde
 }
 #endif
 
+static bool can_power_down_l2(struct kbase_device *kbdev)
+{
+#if MALI_USE_CSF
+	/* Due to the HW issue GPU2019-3878, need to prevent L2 power off
+	 * whilst MMU command is in progress.
+	 */
+	return !kbdev->mmu_hw_operation_in_progress;
+#else
+	return true;
+#endif
+}
+
 static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -1258,9 +1315,8 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			}
 
 			backend->hwcnt_desired = false;
-			if (!backend->hwcnt_disabled) {
+			if (!backend->hwcnt_disabled)
 				kbase_pm_trigger_hwcnt_disable(kbdev);
-			}
 #endif
 
 			if (backend->hwcnt_disabled) {
@@ -1297,27 +1353,31 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 			break;
 
 		case KBASE_L2_POWER_DOWN:
-			if (!backend->l2_always_on)
-				/* Powering off the L2 will also power off the
-				 * tiler.
-				 */
-				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
-						l2_present,
-						ACTION_PWROFF);
-			else
-				/* If L2 cache is powered then we must flush it
-				 * before we power off the GPU. Normally this
-				 * would have been handled when the L2 was
-				 * powered off.
-				 */
-				kbase_gpu_start_cache_clean_nolock(
-					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
+			if (kbase_pm_is_l2_desired(kbdev))
+				backend->l2_state = KBASE_L2_PEND_ON;
+			else if (can_power_down_l2(kbdev)) {
+				if (!backend->l2_always_on)
+					/* Powering off the L2 will also power off the
+					 * tiler.
+					 */
+					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+							l2_present,
+							ACTION_PWROFF);
+				else
+					/* If L2 cache is powered then we must flush it
+					 * before we power off the GPU. Normally this
+					 * would have been handled when the L2 was
+					 * powered off.
+					 */
+					kbase_gpu_start_cache_clean_nolock(
+						kbdev, GPU_COMMAND_CACHE_CLN_INV_L2);
 #if !MALI_USE_CSF
-			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
 #else
-			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
 #endif
-			backend->l2_state = KBASE_L2_PEND_OFF;
+				backend->l2_state = KBASE_L2_PEND_OFF;
+			}
 			break;
 
 		case KBASE_L2_PEND_OFF:
@@ -1803,12 +1863,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	if (kbase_pm_is_l2_desired(kbdev) &&
-			kbdev->pm.backend.l2_state != KBASE_L2_ON)
-		in_desired_state = false;
-	else if (!kbase_pm_is_l2_desired(kbdev) &&
-			kbdev->pm.backend.l2_state != KBASE_L2_OFF)
-		in_desired_state = false;
+	in_desired_state = kbase_pm_l2_is_in_desired_state(kbdev);
 
 #if !MALI_USE_CSF
 	if (kbdev->pm.backend.shaders_desired &&
@@ -1818,13 +1873,7 @@ static bool kbase_pm_is_in_desired_state_nolock(struct kbase_device *kbdev)
 			kbdev->pm.backend.shaders_state != KBASE_SHADERS_OFF_CORESTACK_OFF)
 		in_desired_state = false;
 #else
-	if (kbase_pm_is_mcu_desired(kbdev) &&
-	    kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
-		in_desired_state = false;
-	else if (!kbase_pm_is_mcu_desired(kbdev) &&
-		 (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
-		 (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
-		in_desired_state = false;
+	in_desired_state = kbase_pm_mcu_is_in_desired_state(kbdev);
 #endif
 
 	return in_desired_state;
@@ -2077,11 +2126,13 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
-/* Timeout for kbase_pm_wait_for_desired_state when wait_event_killable has
- * aborted due to a fatal signal. If the time spent waiting has exceeded this
- * threshold then there is most likely a hardware issue.
+#if !MALI_USE_CSF
+/* Timeout in milliseconds for GPU Power Management to reach the desired
+ * Shader and L2 state. If the time spent waiting has exceeded this threshold
+ * then there is most likely a hardware issue.
  */
 #define PM_TIMEOUT_MS (5000) /* 5s */
+#endif
 
 static void kbase_pm_timed_out(struct kbase_device *kbdev)
 {
@@ -2156,7 +2207,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 #if MALI_USE_CSF
-	timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
+	timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 #else
 	timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
 #endif
@@ -2188,7 +2239,7 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
 	unsigned long flags;
 	long remaining;
 #if MALI_USE_CSF
-	long timeout = kbase_csf_timeout_in_jiffies(PM_TIMEOUT_MS);
+	long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 #else
 	long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
 #endif
@@ -2285,6 +2336,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 {
 	lockdep_assert_held(&kbdev->pm.lock);
 
+	mutex_lock(&kbdev->csf.reg_lock);
 	if (kbdev->csf.mali_file_inode) {
 		/* This would zap the pte corresponding to the mapping of User
 		 * register page for all the Kbase contexts.
@@ -2293,6 +2345,7 @@ static void update_user_reg_page_mapping(struct kbase_device *kbdev)
 				    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
 				    PAGE_SIZE, 1);
 	}
+	mutex_unlock(&kbdev->csf.reg_lock);
 }
 #endif
 
@@ -2358,6 +2411,7 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	update_user_reg_page_mapping(kbdev);
 #endif
 
+
 	if (reset_required) {
 		/* GPU state was lost, reset GPU to ensure it is in a
 		 * consistent state
@@ -2659,8 +2713,8 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 {
 	struct device_node *np = kbdev->dev->of_node;
 	const u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-				GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 	int error = 0;
 
 	kbdev->hw_quirks_gpu = 0;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 97e8607..68ded7d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -301,6 +301,8 @@ void kbase_pm_update_state(struct kbase_device *kbdev);
  * kbase_pm_state_machine_init - Initialize the state machines, primarily the
  *                               shader poweroff timer
  * @kbdev: Device pointer
+ *
+ * Return: 0 on success, error code on error
  */
 int kbase_pm_state_machine_init(struct kbase_device *kbdev);
 
@@ -453,6 +455,8 @@ void kbase_pm_wait_for_gpu_power_down(struct kbase_device *kbdev);
  * Setup the power management callbacks and initialize/enable the runtime-pm
  * for the Mali GPU platform device, using the callback function. This must be
  * called before the kbase_pm_register_access_enable() function.
+ *
+ * Return: 0 on success, error code on error
  */
 int kbase_pm_runtime_init(struct kbase_device *kbdev);
 
@@ -810,9 +814,50 @@ static inline bool kbase_pm_no_mcu_core_pwroff(struct kbase_device *kbdev)
 	return kbdev->pm.backend.csf_pm_sched_flags &
 		CSF_DYNAMIC_PM_CORE_KEEP_ON;
 }
+
+/**
+ * kbase_pm_mcu_is_in_desired_state - Check if MCU is in stable ON/OFF state.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if MCU is in stable ON/OFF state.
+ */
+static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_mcu_desired(kbdev) &&
+		 (kbdev->pm.backend.mcu_state != KBASE_MCU_OFF) &&
+		 (kbdev->pm.backend.mcu_state != KBASE_MCU_IN_SLEEP))
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
 #endif
 
 /**
+ * kbase_pm_l2_is_in_desired_state - Check if L2 is in stable ON/OFF state.
+ *
+ * @kbdev: Device pointer
+ *
+ * Return: true if L2 is in stable ON/OFF state.
+ */
+static inline bool kbase_pm_l2_is_in_desired_state(struct kbase_device *kbdev)
+{
+	bool in_desired_state = true;
+
+	if (kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_ON)
+		in_desired_state = false;
+	else if (!kbase_pm_is_l2_desired(kbdev) && kbdev->pm.backend.l2_state != KBASE_L2_OFF)
+		in_desired_state = false;
+
+	return in_desired_state;
+}
+
+/**
  * kbase_pm_lock - Lock all necessary mutexes to perform PM actions
  *
  * @kbdev: Device pointer
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index 69e8dd3..f85b466 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -235,6 +235,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
 			 * difference.
 			 */
 			u64 margin_ns = diff_ns >> 6;
+
 			if (gpu_active_counter > (diff_ns + margin_ns)) {
 				dev_info(
 					kbdev->dev,
@@ -488,8 +489,7 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP)
 						? katom->device_nr : 0;
 				if (!WARN_ON(device_nr >= 2))
-					kbdev->pm.backend.metrics.
-						active_cl_ctx[device_nr] = 1;
+					kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1;
 			} else {
 				kbdev->pm.backend.metrics.active_gl_ctx[js] = 1;
 				trace_sysgraph(SGR_ACTIVE, 0, js);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index bc05bd7..3a4022e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -180,9 +180,8 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
 
 	shaders_desired = kbdev->pm.backend.pm_current_policy->shaders_needed(kbdev);
 
-	if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
+	if (shaders_desired && kbase_pm_is_l2_desired(kbdev))
 		kbase_pm_update_state(kbdev);
-	}
 #endif
 }
 
@@ -249,9 +248,8 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_policy);
 #if MALI_USE_CSF
 static int policy_change_wait_for_L2_off(struct kbase_device *kbdev)
 {
-#define WAIT_DURATION_MS (3000)
 	long remaining;
-	long timeout = kbase_csf_timeout_in_jiffies(WAIT_DURATION_MS);
+	long timeout = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT));
 	int err = 0;
 
 	/* Wait for L2 becoming off, by which the MCU is also implicitly off
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 51812ee..a83206a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -113,39 +113,60 @@ unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
 	 */
 
 	u64 timeout, nr_cycles = 0;
+	/* Default value to mean 'no cap' */
+	u64 timeout_cap = U64_MAX;
 	u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+	/* Only for debug messages, safe default in case it's mis-maintained */
+	const char *selector_str = "(unknown)";
 
 	WARN_ON(!freq_khz);
 
 	switch (selector) {
-	/* use Firmware timeout if invalid selection */
+	case KBASE_TIMEOUT_SELECTOR_COUNT:
 	default:
 #if !MALI_USE_CSF
 		WARN(1, "Invalid timeout selector used! Using default value");
-		timeout = JM_DEFAULT_TIMEOUT_CYCLES;
-		CSTD_UNUSED(nr_cycles);
+		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
+		break;
 #else
+		/* Use Firmware timeout if invalid selection */
 		WARN(1,
 		     "Invalid timeout selector used! Using CSF Firmware timeout");
 		fallthrough;
 	case CSF_FIRMWARE_TIMEOUT:
+		selector_str = "CSF_FIRMWARE_TIMEOUT";
 		nr_cycles = CSF_FIRMWARE_TIMEOUT_CYCLES;
-		timeout = div_u64(nr_cycles, freq_khz);
-		/* cap CSF FW timeout to FIRMWARE_PING_INTERVAL_MS
-		 * if calculated timeout exceeds it. This should be adapted to a
-		 * direct timeout comparison once the FIRMWARE_PING_INTERVAL_MS
-		 * option is added to this timeout function. A compile-time check
-		 * such as BUILD_BUG_ON can also be done once the firmware ping
-		 * interval in cycles becomes available as a macro.
+		/* Setup a cap on CSF FW timeout to FIRMWARE_PING_INTERVAL_MS,
+		 * if calculated timeout exceeds it. This should be adapted to
+		 * a direct timeout comparison once the
+		 * FIRMWARE_PING_INTERVAL_MS option is added to this timeout
+		 * function. A compile-time check such as BUILD_BUG_ON can also
+		 * be done once the firmware ping interval in cycles becomes
+		 * available as a macro.
 		 */
-		if (timeout > FIRMWARE_PING_INTERVAL_MS) {
-			dev_dbg(kbdev->dev, "Capped CSF_FIRMWARE_TIMEOUT %llu to %d",
-				timeout, FIRMWARE_PING_INTERVAL_MS);
-			timeout = FIRMWARE_PING_INTERVAL_MS;
-		}
-#endif
+		timeout_cap = FIRMWARE_PING_INTERVAL_MS;
+		break;
+	case CSF_PM_TIMEOUT:
+		selector_str = "CSF_PM_TIMEOUT";
+		nr_cycles = CSF_PM_TIMEOUT_CYCLES;
 		break;
+	case CSF_GPU_RESET_TIMEOUT:
+		selector_str = "CSF_GPU_RESET_TIMEOUT";
+		nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
+		break;
+#endif
+	}
+
+	timeout = div_u64(nr_cycles, freq_khz);
+	if (timeout > timeout_cap) {
+		dev_dbg(kbdev->dev, "Capped %s %llu to %llu", selector_str,
+			(unsigned long long)timeout, (unsigned long long)timeout_cap);
+		timeout = timeout_cap;
 	}
+	if (WARN(timeout > UINT_MAX,
+		 "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
+		 (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
+		timeout = UINT_MAX;
 	return (unsigned int)timeout;
 }
 
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index 030af9d..5dd5fd5 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -34,6 +34,9 @@ bob_defaults {
             "CONFIG_MALI_NO_MALI_DEFAULT_GPU={{.gpu}}",
         ],
     },
+    mali_platform_dt_pin_rst: {
+        kbuild_options: ["CONFIG_MALI_PLATFORM_DT_PIN_RST=y"],
+    },
     gpu_has_csf: {
         kbuild_options: ["CONFIG_MALI_CSF_SUPPORT=y"],
     },
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 8ce81e7..6b65bab 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -110,6 +110,11 @@ static void kbase_context_flush_jobs(struct kbase_context *kctx)
 	flush_workqueue(kctx->jctx.job_done_wq);
 }
 
+/**
+ * kbase_context_free - Free kcontext at its destruction
+ *
+ * @kctx: kcontext to be freed
+ */
 static void kbase_context_free(struct kbase_context *kctx)
 {
 	kbase_timeline_post_kbase_context_destroy(kctx);
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 9eaf69a..f5258bd 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -152,6 +152,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
 
 	init_waitqueue_head(&kctx->event_queue);
 	atomic_set(&kctx->event_count, 0);
+
 #if !MALI_USE_CSF
 	atomic_set(&kctx->event_closed, false);
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
@@ -159,6 +160,11 @@ int kbase_context_common_init(struct kbase_context *kctx)
 #endif
 #endif
 
+#if MALI_USE_CSF
+	atomic64_set(&kctx->num_fixable_allocs, 0);
+	atomic64_set(&kctx->num_fixed_allocs, 0);
+#endif
+
 	bitmap_copy(kctx->cookies, &cookies_mask, BITS_PER_LONG);
 
 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index 546e18d..a56b689 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -61,7 +61,7 @@
 /*
  * Maximum number of loops polling the GPU before we assume the GPU has hung.
  */
-#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)8000000)
+#define IPA_INACTIVE_MAX_LOOPS (8000000U)
 
 /*
  * Number of bits used to configure a performance counter in SELECT registers.
@@ -356,9 +356,8 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
 
 	spin_lock_init(&ipa_ctrl->lock);
 	ipa_ctrl->num_active_sessions = 0;
-	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++)
 		ipa_ctrl->sessions[i].active = false;
-	}
 
 	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
 				GFP_KERNEL);
@@ -523,8 +522,10 @@ int kbase_ipa_control_register(
 	struct kbase_ipa_control_session *session = NULL;
 	unsigned long flags;
 
-	if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
-	    WARN_ON(client == NULL) ||
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(perf_counters == NULL) || WARN_ON(client == NULL) ||
 	    WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
@@ -706,7 +707,10 @@ int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
 	unsigned long flags;
 	bool new_config = false, valid_session = false;
 
-	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(client == NULL)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
 	}
@@ -788,8 +792,10 @@ int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
 	unsigned long flags;
 	bool gpu_ready;
 
-	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
-	    WARN_ON(values == NULL)) {
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return -ENODEV;
+
+	if (WARN_ON(client == NULL) || WARN_ON(values == NULL)) {
 		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
 		return -EINVAL;
 	}
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 8b70349..dd15287 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -27,7 +27,7 @@
 #include <linux/export.h>
 #include <linux/priority_control_manager.h>
 #include <linux/shmem_fs.h>
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include "mali_kbase_csf_tiler_heap.h"
 #include <mmu/mali_kbase_mmu.h>
 #include "mali_kbase_csf_timeout.h"
@@ -561,6 +561,10 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	queue->sync_ptr = 0;
 	queue->sync_value = 0;
 
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	queue->saved_cmd_ptr = 0;
+#endif
+
 	queue->sb_status = 0;
 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
 
@@ -572,6 +576,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 	INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
 	list_add(&queue->link, &kctx->csf.queue_list);
 
+	queue->extract_ofs = 0;
+
 	region->flags |= KBASE_REG_NO_USER_FREE;
 	region->user_data = queue;
 
@@ -621,13 +627,13 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 		return -EINVAL;
 
 	/* Validate the cs_trace configuration parameters */
-        if (reg->ex_buffer_size &&
-            ((reg->ex_event_size > max_size) ||
-             (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
-             (reg->ex_buffer_size < min_buf_size)))
-          return -EINVAL;
+	if (reg->ex_buffer_size &&
+		((reg->ex_event_size > max_size) ||
+			(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
+			(reg->ex_buffer_size < min_buf_size)))
+		return -EINVAL;
 
-        return csf_queue_register_internal(kctx, NULL, reg);
+	return csf_queue_register_internal(kctx, NULL, reg);
 }
 
 static void unbind_queue(struct kbase_context *kctx,
@@ -1195,7 +1201,7 @@ static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
 	}
 
 	s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
-			nr_pages);
+			nr_pages, true);
 	if (s_buf->pma == NULL) {
 		err = -ENOMEM;
 		goto pma_alloc_failed;
@@ -1229,7 +1235,7 @@ mmu_insert_failed:
 	mutex_unlock(&kbdev->csf.reg_lock);
 
 add_va_region_failed:
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
 pma_alloc_failed:
 	kfree(phys);
 phy_alloc_failed:
@@ -1479,7 +1485,7 @@ static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
 	kbase_remove_va_region(kbdev, s_buf->reg);
 	mutex_unlock(&kbdev->csf.reg_lock);
 
-	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+	kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages, true);
 	s_buf->pma = NULL;
 	kfree(s_buf->reg);
 	s_buf->reg = NULL;
@@ -1925,7 +1931,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
  * This function will handle the OoM event request from the firmware for the
  * CS. It will retrieve the address of heap context and heap's
  * statistics (like number of render passes in-flight) from the CS's kernel
- * kernel output page and pass them to the tiler heap function to allocate a
+ * output page and pass them to the tiler heap function to allocate a
  * new chunk.
  * It will also update the CS's kernel input page with the address
  * of a new chunk that was allocated.
@@ -2521,8 +2527,24 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 		}
 	}
 
-	if (protm_pend)
-		queue_work(group->kctx->csf.wq, &group->protm_event_work);
+	if (protm_pend) {
+		struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+		u32 current_protm_pending_seq =
+			scheduler->tick_protm_pending_seq;
+
+		if (current_protm_pending_seq > group->scan_seq_num) {
+			scheduler->tick_protm_pending_seq = group->scan_seq_num;
+			queue_work(group->kctx->csf.wq, &group->protm_event_work);
+		}
+
+		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
+			clear_bit(group->csg_nr,
+				  scheduler->csg_slots_idle_mask);
+			dev_dbg(kbdev->dev,
+				"Group-%d on slot %d de-idled by protm request",
+				group->handle, group->csg_nr);
+		}
+	}
 }
 
 /**
@@ -2593,6 +2615,10 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
+
+		/* SYNC_UPDATE events shall invalidate GPU idle event */
+		atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
+
 		kbase_csf_event_signal_cpu_only(group->kctx);
 	}
 
@@ -2609,15 +2635,25 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
 			 group->handle, csg_nr);
 
-		/* Check if the scheduling tick can be advanced */
-		if (kbase_csf_scheduler_all_csgs_idle(kbdev)) {
-			if (!scheduler->gpu_idle_fw_timer_enabled)
-				kbase_csf_scheduler_advance_tick_nolock(kbdev);
-		} else if (atomic_read(&scheduler->non_idle_offslot_grps)) {
+		if (atomic_read(&scheduler->non_idle_offslot_grps)) {
 			/* If there are non-idle CSGs waiting for a slot, fire
 			 * a tock for a replacement.
 			 */
 			mod_delayed_work(scheduler->wq, &scheduler->tock_work, 0);
+		} else {
+			u32 current_protm_pending_seq =
+				scheduler->tick_protm_pending_seq;
+
+			if ((current_protm_pending_seq !=
+				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) &&
+			    (group->scan_seq_num < current_protm_pending_seq)) {
+				/* If the protm enter was prevented due to groups
+				 * priority, then fire a tock for the scheduler
+				 * to re-examine the case.
+				 */
+				mod_delayed_work(scheduler->wq,
+						 &scheduler->tock_work, 0);
+			}
 		}
 	}
 
@@ -2803,20 +2839,29 @@ static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	unsigned long flags;
-	u32 remaining = val;
+	u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
 
+	if (csg_interrupts != 0) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		while (csg_interrupts != 0) {
+			int const csg_nr = ffs(csg_interrupts) - 1;
+
+			process_csg_interrupts(kbdev, csg_nr);
+			csg_interrupts &= ~(1 << csg_nr);
+		}
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	}
+
 	if (val & JOB_IRQ_GLOBAL_IF) {
 		const struct kbase_csf_global_iface *const global_iface =
 			&kbdev->csf.global_iface;
-		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
 
 		kbdev->csf.interrupt_received = true;
-		remaining &= ~JOB_IRQ_GLOBAL_IF;
 
 		if (!kbdev->csf.firmware_reloaded)
 			kbase_csf_firmware_reload_completed(kbdev);
@@ -2837,31 +2882,12 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 
 			/* Handle IDLE Hysteresis notification event */
 			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
-				int non_idle_offslot_grps;
-				bool can_suspend_on_idle;
-
 				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
 				kbase_csf_firmware_global_input_mask(
 						global_iface, GLB_REQ, glb_ack,
 						GLB_REQ_IDLE_EVENT_MASK);
 
-				non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
-				can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
-				KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
-					((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
-
-				if (!non_idle_offslot_grps) {
-					if (can_suspend_on_idle)
-						queue_work(system_highpri_wq,
-							   &scheduler->gpu_idle_work);
-				} else {
-					/* Advance the scheduling tick to get
-					 * the non-idle suspended groups loaded
-					 * soon.
-					 */
-					kbase_csf_scheduler_advance_tick_nolock(
-						kbdev);
-				}
+				kbase_csf_scheduler_process_gpu_idle_event(kbdev);
 			}
 
 			process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
@@ -2873,22 +2899,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 			 */
 			kbase_pm_update_state(kbdev);
 		}
-
-		if (!remaining) {
-			wake_up_all(&kbdev->csf.event_wait);
-			KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
-			return;
-		}
-	}
-
-	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	while (remaining != 0) {
-		int const csg_nr = ffs(remaining) - 1;
-
-		process_csg_interrupts(kbdev, csg_nr);
-		remaining &= ~(1 << csg_nr);
 	}
-	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	wake_up_all(&kbdev->csf.event_wait);
 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index e3db81d..46a0529 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -40,7 +40,12 @@
  */
 #define KBASEP_USER_DB_NR_INVALID ((s8)-1)
 
-#define FIRMWARE_PING_INTERVAL_MS (8000) /* 8 seconds */
+/* Indicates an invalid value for the scan out sequence number, used to
+ * signify there is no group that has protected mode execution pending.
+ */
+#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
+
+#define FIRMWARE_PING_INTERVAL_MS (12000) /* 12 seconds */
 
 #define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
 
@@ -312,7 +317,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
- *                                 used to replace the User register page
+ *                                      to replace the User register page
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
index 26637bf..516a33f 100644
--- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -54,7 +54,7 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 	mutex_lock(&kctx->csf.lock);
 	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
 				BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
-		seq_printf(file, "Dump request already started! (try again)\n");
+		seq_puts(file, "Dump request already started! (try again)\n");
 		mutex_unlock(&kctx->csf.lock);
 		return -EBUSY;
 	}
@@ -64,7 +64,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 	kbase_event_wakeup(kctx);
 	mutex_unlock(&kctx->csf.lock);
 
-	seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
+	seq_puts(file,
+		"CPU Queues table (version:v" __stringify(MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION) "):\n");
 
 	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
 			msecs_to_jiffies(3000));
@@ -79,9 +80,8 @@ static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
 		kfree(kctx->csf.cpu_queue.buffer);
 		kctx->csf.cpu_queue.buffer = NULL;
 		kctx->csf.cpu_queue.buffer_size = 0;
-	}
-	else
-		seq_printf(file, "Dump error! (time out)\n");
+	} else
+		seq_puts(file, "Dump error! (time out)\n");
 
 	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
 			BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index 40bee79..2075797 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -172,16 +172,18 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 	cs_active = addr[CS_ACTIVE/4];
 
 #define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
-	"Bind Idx,     Ringbuf addr, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
+	"Bind Idx,     Ringbuf addr,     Size, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
 
-	seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %4u, %16llx, %16llx, %6u, %8d\n",
-			queue->csi_index, queue->base_addr, queue->priority,
-			cs_insert, cs_extract, cs_active, queue->doorbell_nr);
+	seq_printf(file, KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO "%8d, %16llx, %8x, %4u, %16llx, %16llx, %6u, %8d\n",
+			queue->csi_index, queue->base_addr,
+			queue->size,
+			queue->priority, cs_insert, cs_extract, cs_active, queue->doorbell_nr);
 
 	/* Print status information for blocked group waiting for sync object. For on-slot queues,
 	 * if cs_trace is enabled, dump the interface's cs_trace configuration.
 	 */
 	if (kbase_csf_scheduler_group_get_slot(queue->group) < 0) {
+		seq_printf(file, "SAVED_CMD_PTR: 0x%llx\n", queue->saved_cmd_ptr);
 		if (CS_STATUS_WAIT_SYNC_WAIT_GET(queue->status_wait)) {
 			wait_status = queue->status_wait;
 			wait_sync_value = queue->sync_value;
@@ -268,17 +270,13 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 	seq_puts(file, "\n");
 }
 
-/* Waiting timeout for STATUS_UPDATE acknowledgment, in milliseconds */
-#define CSF_STATUS_UPDATE_TO_MS (100)
-
 static void update_active_group_status(struct seq_file *file,
 		struct kbase_queue_group *const group)
 {
 	struct kbase_device *const kbdev = group->kctx->kbdev;
 	struct kbase_csf_cmd_stream_group_info const *const ginfo =
 		&kbdev->csf.global_iface.groups[group->csg_nr];
-	long remaining =
-		kbase_csf_timeout_in_jiffies(CSF_STATUS_UPDATE_TO_MS);
+	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 	unsigned long flags;
 
 	/* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell
@@ -327,6 +325,7 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 		struct kbase_device *const kbdev = group->kctx->kbdev;
 		u32 ep_c, ep_r;
 		char exclusive;
+		char idle = 'N';
 		struct kbase_csf_cmd_stream_group_info const *const ginfo =
 			&kbdev->csf.global_iface.groups[group->csg_nr];
 		u8 slot_priority =
@@ -345,8 +344,12 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 		else
 			exclusive = '0';
 
-		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive\n");
-		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c\n",
+		if (kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_STATE) &
+				CSG_STATUS_STATE_IDLE_MASK)
+			idle = 'Y';
+
+		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
+		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
 			group->handle,
 			group->csg_nr,
 			slot_priority,
@@ -358,7 +361,8 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 			CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
 			CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
 			CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
-			exclusive);
+			exclusive,
+			idle);
 
 		/* Wait for the User doobell ring to take effect */
 		if (kbdev->csf.scheduler.state != SCHED_SLEEPING)
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 0712648..07b5874 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -252,6 +252,24 @@ enum kbase_queue_group_priority {
 	KBASE_QUEUE_GROUP_PRIORITY_COUNT
 };
 
+/**
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
+ *                               using the lowest GPU frequency.
+ * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
+ * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
+ *                  Shader, L2 and MCU state.
+ * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
+ *                                the enum.
+ */
+enum kbase_timeout_selector {
+	CSF_FIRMWARE_TIMEOUT,
+	CSF_PM_TIMEOUT,
+	CSF_GPU_RESET_TIMEOUT,
+
+	/* Must be the last in the enum */
+	KBASE_TIMEOUT_SELECTOR_COUNT
+};
 
 /**
  * struct kbase_csf_notification - Event or error generated as part of command
@@ -333,6 +351,13 @@ struct kbase_csf_notification {
  * @cs_fatal_info:    Records additional information about the CS fatal event.
  * @cs_fatal:         Records information about the CS fatal event.
  * @pending:          Indicating whether the queue has new submitted work.
+ * @extract_ofs: The current EXTRACT offset, this is updated during certain
+ *               events such as GPU idle IRQ in order to help detect a
+ *               queue's true idle status.
+ * @saved_cmd_ptr: The command pointer value for the GPU queue, saved when the
+ *                 group to which queue is bound is suspended.
+ *                 This can be useful in certain cases to know that till which
+ *                 point the execution reached in the Linear command buffer.
  */
 struct kbase_queue {
 	struct kbase_context *kctx;
@@ -367,6 +392,10 @@ struct kbase_queue {
 	u64 cs_fatal_info;
 	u32 cs_fatal;
 	atomic_t pending;
+	u64 extract_ofs;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	u64 saved_cmd_ptr;
+#endif
 };
 
 /**
@@ -851,11 +880,14 @@ struct kbase_csf_csg_slot {
  *                          This pointer being set doesn't necessarily indicates
  *                          that GPU is in protected mode, kbdev->protected_mode
  *                          needs to be checked for that.
- * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
- *                            firmware idle hysteresis timer for preparing a
- *                            GPU suspend on idle.
+ * @idle_wq:                Workqueue for executing GPU idle notification
+ *                          handler.
  * @gpu_idle_work:          Work item for facilitating the scheduler to bring
  *                          the GPU to a low-power mode on becoming idle.
+ * @gpu_no_longer_idle:     Effective only when the GPU idle worker has been
+ *                          queued for execution, this indicates whether the
+ *                          GPU has become non-idle since the last time the
+ *                          idle notification was received.
  * @non_idle_offslot_grps:  Count of off-slot non-idle groups. Reset during
  *                          the scheduler active phase in a tick. It then
  *                          tracks the count of non-idle groups across all the
@@ -876,6 +908,12 @@ struct kbase_csf_csg_slot {
  *                          when scheduling tick needs to be advanced from
  *                          interrupt context, without actually deactivating
  *                          the @tick_timer first and then enqueing @tick_work.
+ * @tick_protm_pending_seq: Scan out sequence number of the group that has
+ *                          protected mode execution pending for the queue(s)
+ *                          bound to it and will be considered first for the
+ *                          protected mode execution compared to other such
+ *                          groups. It is updated on every tick/tock.
+ *                          @interrupt_lock is used to serialize the access.
  */
 struct kbase_csf_scheduler {
 	struct mutex lock;
@@ -907,13 +945,15 @@ struct kbase_csf_scheduler {
 	struct kbase_queue_group *top_grp;
 	bool tock_pending_request;
 	struct kbase_queue_group *active_protm_grp;
-	bool gpu_idle_fw_timer_enabled;
+	struct workqueue_struct *idle_wq;
 	struct work_struct gpu_idle_work;
+	atomic_t gpu_no_longer_idle;
 	atomic_t non_idle_offslot_grps;
 	u32 non_idle_scanout_grps;
 	u32 pm_active_count;
 	unsigned int csg_scheduling_period_ms;
 	bool tick_timer_active;
+	u32 tick_protm_pending_seq;
 };
 
 /*
@@ -1050,8 +1090,7 @@ struct kbase_ipa_control_prfcnt_config {
  *
  */
 struct kbase_ipa_control_prfcnt_block {
-	struct kbase_ipa_control_prfcnt_config
-		select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
+	struct kbase_ipa_control_prfcnt_config select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
 	size_t num_available_counters;
 };
 
@@ -1074,8 +1113,7 @@ struct kbase_ipa_control_prfcnt_block {
  */
 struct kbase_ipa_control {
 	struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
-	struct kbase_ipa_control_session
-		sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
+	struct kbase_ipa_control_session sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
 	spinlock_t lock;
 	void *rtm_listener_data;
 	size_t num_active_sessions;
@@ -1089,8 +1127,15 @@ struct kbase_ipa_control {
  * @node:  Interface objects are on the kbase_device:csf.firmware_interfaces
  *         list using this list_head to link them
  * @phys:  Array of the physical (tagged) addresses making up this interface
+ * @reuse_pages: Flag used to identify if the FW interface entry reuses
+ *               physical pages allocated for another FW interface entry.
+ * @is_small_page: Flag used to identify if small pages are used for
+ *                 the FW interface entry.
  * @name:  NULL-terminated string naming the interface
  * @num_pages: Number of entries in @phys and @pma (and length of the interface)
+ * @num_pages_aligned: Same as @num_pages except for the case when @is_small_page
+ *                     is false and @reuse_pages is false and therefore will be
+ *                     aligned to NUM_4K_PAGES_IN_2MB_PAGE.
  * @virtual: Starting GPU virtual address this interface is mapped at
  * @flags: bitmask of CSF_FIRMWARE_ENTRY_* conveying the interface attributes
  * @data_start: Offset into firmware image at which the interface data starts
@@ -1102,8 +1147,11 @@ struct kbase_ipa_control {
 struct kbase_csf_firmware_interface {
 	struct list_head node;
 	struct tagged_addr *phys;
+	bool reuse_pages;
+	bool is_small_page;
 	char *name;
 	u32 num_pages;
+	u32 num_pages_aligned;
 	u32 virtual;
 	u32 flags;
 	u32 data_start;
@@ -1177,7 +1225,7 @@ struct kbase_csf_hwcnt {
  * @reg_lock:               Lock to serialize the MCU firmware related actions
  *                          that affect all contexts such as allocation of
  *                          regions from shared interface area, assignment of
- *                          of hardware doorbell pages, assignment of CSGs,
+ *                          hardware doorbell pages, assignment of CSGs,
  *                          sending global requests.
  * @event_wait:             Wait queue to wait for receiving csf events, i.e.
  *                          the interrupt from CSF firmware, or scheduler state
@@ -1200,6 +1248,10 @@ struct kbase_csf_hwcnt {
  *                          in GPU reset has completed.
  * @firmware_reload_needed: Flag for indicating that the firmware needs to be
  *                          reloaded as part of the GPU reset action.
+ * @firmware_full_reload_needed: Flag for indicating that the firmware needs to
+ *                               be fully re-loaded. This may be set when the
+ *                               boot or re-init of MCU fails after a successful
+ *                               soft reset.
  * @firmware_hctl_core_pwr: Flag for indicating that the host diver is in
  *                          charge of the shader core's power transitions, and
  *                          the mcu_core_pwroff timeout feature is disabled
@@ -1259,6 +1311,7 @@ struct kbase_csf_device {
 	bool firmware_inited;
 	bool firmware_reloaded;
 	bool firmware_reload_needed;
+	bool firmware_full_reload_needed;
 	bool firmware_hctl_core_pwr;
 	struct work_struct firmware_reload_work;
 	bool glb_init_request_pending;
diff --git a/mali_kbase/csf/mali_kbase_csf_event.h b/mali_kbase/csf/mali_kbase_csf_event.h
index 1270ef6..4c853b5 100644
--- a/mali_kbase/csf/mali_kbase_csf_event.h
+++ b/mali_kbase/csf/mali_kbase_csf_event.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -110,9 +110,9 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
 /**
  * kbase_csf_event_init - Initialize event object
  *
- * This function initializes the event object.
- *
  * @kctx: The kbase context whose event object will be initialized.
+ *
+ * This function initializes the event object.
  */
 void kbase_csf_event_init(struct kbase_context *const kctx);
 
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 202c677..bf7cdf4 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,7 @@
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
+#include "mali_kbase_mem_pool_group.h"
 #include "mali_kbase_reset_gpu.h"
 #include "mali_kbase_ctx_sched.h"
 #include "mali_kbase_csf_scheduler.h"
@@ -35,7 +36,7 @@
 #include "mali_kbase_csf_tl_reader.h"
 #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -50,7 +51,6 @@
 #include <asm/arch_timer.h>
 
 #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
-#define ACK_TIMEOUT_MILLISECONDS 1000
 
 static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
 module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
@@ -105,9 +105,9 @@ MODULE_PARM_DESC(fw_debug,
 
 #define CSF_MAX_FW_STOP_LOOPS            (100000)
 
-#define CSF_GLB_REQ_CFG_MASK                                                   \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                                       \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |                             \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -190,11 +190,11 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 		return -EINVAL;
 
 	reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
-			interface->num_pages, KBASE_REG_ZONE_MCU_SHARED);
+			interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
 	if (reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
-				interface->virtual, interface->num_pages, 1);
+				interface->virtual, interface->num_pages_aligned, 1);
 		mutex_unlock(&kbdev->csf.reg_lock);
 		if (ret)
 			kfree(reg);
@@ -423,7 +423,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
 	}
 }
 
-static int reload_fw_data_sections(struct kbase_device *kbdev)
+static int reload_fw_image(struct kbase_device *kbdev)
 {
 	const u32 magic = FIRMWARE_HEADER_MAGIC;
 	struct kbase_csf_firmware_interface *interface;
@@ -451,24 +451,79 @@ static int reload_fw_data_sections(struct kbase_device *kbdev)
 	}
 
 	list_for_each_entry(interface, &kbdev->csf.firmware_interfaces, node) {
-		/* Skip reload of text & read only data sections */
-		if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
-		    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
-			continue;
+		/* Dont skip re-loading any section if full reload was requested */
+		if (!kbdev->csf.firmware_full_reload_needed) {
+			/* Skip reload of text & read only data sections */
+			if ((interface->flags & CSF_FIRMWARE_ENTRY_EXECUTE) ||
+			    !(interface->flags & CSF_FIRMWARE_ENTRY_WRITE))
+				continue;
+		}
 
 		load_fw_image_section(kbdev, firmware->data, interface->phys,
 			interface->num_pages, interface->flags,
 			interface->data_start, interface->data_end);
 	}
 
-	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
+	kbdev->csf.firmware_full_reload_needed = false;
 
+	kbase_csf_firmware_reload_trace_buffers_data(kbdev);
 out:
 	release_firmware(firmware);
 	return ret;
 }
 
 /**
+ * entry_find_large_page_to_reuse() - Find if the large page of previously parsed
+ *                                    FW interface entry can be reused to store
+ *                                    the contents of new FW interface entry.
+ *
+ * @kbdev: Kbase device structure
+ * @virtual_start: Start of the virtual address range required for an entry allocation
+ * @virtual_end: End of the virtual address range required for an entry allocation
+ * @phys: Pointer to the array of physical (tagged) addresses making up the new
+ *        FW interface entry. It is an output parameter which would be made to
+ *        point to an already existing array allocated for the previously parsed
+ *        FW interface entry using large page(s). If no appropriate entry is
+ *        found it is set to NULL.
+ * @pma:  Pointer to a protected memory allocation. It is an output parameter
+ *        which would be made to the protected memory allocation of a previously
+ *        parsed FW interface entry using large page(s) from protected memory.
+ *        If no appropriate entry is found it is set to NULL.
+ * @num_pages: Number of pages requested.
+ * @num_pages_aligned: This is an output parameter used to carry the number of 4KB pages
+ *                     within the 2MB pages aligned allocation.
+ * @is_small_page: This is an output flag used to select between the small and large page
+ *                 to be used for the FW entry allocation.
+ *
+ * Go through all the already initialized interfaces and find if a previously
+ * allocated large page can be used to store contents of new FW interface entry.
+ *
+ * Return: true if a large page can be reused, false otherwise.
+ */
+static inline bool entry_find_large_page_to_reuse(
+	struct kbase_device *kbdev, const u32 virtual_start, const u32 virtual_end,
+	struct tagged_addr **phys, struct protected_memory_allocation ***pma,
+	u32 num_pages, u32 *num_pages_aligned, bool *is_small_page)
+{
+	struct kbase_csf_firmware_interface *interface = NULL;
+	struct kbase_csf_firmware_interface *target_interface = NULL;
+	u32 virtual_diff_min = U32_MAX;
+	bool reuse_large_page = false;
+
+	CSTD_UNUSED(interface);
+	CSTD_UNUSED(target_interface);
+	CSTD_UNUSED(virtual_diff_min);
+
+	*num_pages_aligned = num_pages;
+	*is_small_page = true;
+	*phys = NULL;
+	*pma = NULL;
+
+
+	return reuse_large_page;
+}
+
+/**
  * parse_memory_setup_entry() - Process an "interface memory setup" section
  *
  * @kbdev: Kbase device structure
@@ -493,6 +548,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	const u32 data_start = entry[3];
 	const u32 data_end = entry[4];
 	u32 num_pages;
+	u32 num_pages_aligned;
 	char *name;
 	struct tagged_addr *phys = NULL;
 	struct kbase_csf_firmware_interface *interface = NULL;
@@ -500,6 +556,8 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	unsigned long mem_flags = 0;
 	u32 cache_mode = 0;
 	struct protected_memory_allocation **pma = NULL;
+	bool reuse_pages = false;
+	bool is_small_page = true;
 
 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -542,23 +600,37 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;
 
-	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+	reuse_pages = entry_find_large_page_to_reuse(
+		kbdev, virtual_start, virtual_end, &phys, &pma,
+		num_pages, &num_pages_aligned, &is_small_page);
+	if (!reuse_pages)
+		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
+
 	if (!phys)
 		return -ENOMEM;
 
 	if (protected_mode) {
-		pma = kbase_csf_protected_memory_alloc(kbdev, phys, num_pages);
+		if (!reuse_pages) {
+			pma = kbase_csf_protected_memory_alloc(
+				kbdev, phys, num_pages_aligned, is_small_page);
+		}
 
-		if (pma == NULL) {
+		if (!pma)
 			ret = -ENOMEM;
-			goto out;
-		}
 	} else {
-		ret = kbase_mem_pool_alloc_pages(
-			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			num_pages, phys, false);
-		if (ret < 0)
-			goto out;
+		if (!reuse_pages) {
+			ret = kbase_mem_pool_alloc_pages(
+				kbase_mem_pool_group_select(
+					kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+				num_pages_aligned, phys, false);
+		}
+	}
+
+	if (ret < 0) {
+		dev_err(kbdev->dev,
+			"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
+			num_pages_aligned, virtual_start);
+		goto out;
 	}
 
 	allocated_pages = true;
@@ -584,7 +656,10 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 
 	interface->name = name;
 	interface->phys = phys;
+	interface->reuse_pages = reuse_pages;
+	interface->is_small_page = is_small_page;
 	interface->num_pages = num_pages;
+	interface->num_pages_aligned = num_pages_aligned;
 	interface->virtual = virtual_start;
 	interface->kernel_map = NULL;
 	interface->flags = flags;
@@ -645,15 +720,17 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 
 	list_add(&interface->node, &kbdev->csf.firmware_interfaces);
 
-	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
-			virtual_start >> PAGE_SHIFT, phys, num_pages, mem_flags,
-			KBASE_MEM_GROUP_CSF_FW);
+	if (!reuse_pages) {
+		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
+				virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags,
+				KBASE_MEM_GROUP_CSF_FW);
 
-	if (ret != 0) {
-		dev_err(kbdev->dev, "Failed to insert firmware pages\n");
-		/* The interface has been added to the list, so cleanup will
-		 * be handled by firmware unloading
-		 */
+		if (ret != 0) {
+			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
+			/* The interface has been added to the list, so cleanup will
+			 * be handled by firmware unloading
+			 */
+		}
 	}
 
 	dev_dbg(kbdev->dev, "Processed section '%s'", name);
@@ -662,16 +739,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 
 out:
 	if (allocated_pages) {
-		if (protected_mode) {
-			kbase_csf_protected_memory_free(kbdev, pma, num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				num_pages, phys, false, false);
+		if (!reuse_pages) {
+			if (protected_mode) {
+				kbase_csf_protected_memory_free(
+					kbdev, pma, num_pages_aligned, is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
+					num_pages_aligned, phys, false, false);
+			}
 		}
 	}
 
-	kfree(phys);
+	if (!reuse_pages)
+		kfree(phys);
+
 	kfree(interface);
 	return ret;
 }
@@ -994,11 +1077,10 @@ static int parse_capabilities(struct kbase_device *kbdev)
 	iface->group_stride = shared_info[GLB_GROUP_STRIDE/4];
 	iface->prfcnt_size = shared_info[GLB_PRFCNT_SIZE/4];
 
-	if (iface->version >= kbase_csf_interface_version(1, 1, 0)) {
+	if (iface->version >= kbase_csf_interface_version(1, 1, 0))
 		iface->instr_features = shared_info[GLB_INSTR_FEATURES / 4];
-	} else {
+	else
 		iface->instr_features = 0;
-	}
 
 	if ((GROUP_CONTROL_0 +
 		(unsigned long)iface->group_num * iface->group_stride) >
@@ -1378,16 +1460,28 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+					     GLB_REQ_IDLE_ENABLE_MASK);
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+}
+
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
-				 GLB_ACK_IRQ_MASK_PING_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
-				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
-				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+	u32 const ack_irq_mask =
+		GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK | GLB_ACK_IRQ_MASK_PING_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK | GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+		GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK | GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
+		GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK | GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+		GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1401,6 +1495,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -1507,7 +1607,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 	KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(kbdev, kbase_backend_get_cycle_cnt(kbdev));
 
 	/* Reload just the data sections from firmware binary image */
-	err = reload_fw_data_sections(kbdev);
+	err = reload_fw_image(kbdev);
 	if (err)
 		return;
 
@@ -1598,7 +1698,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
 
 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.gpu_idle_hysteresis_ms;
+	unsigned long flags;
+	u32 dur;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return dur;
 }
 
 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -1606,11 +1713,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	unsigned long flags;
 	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
 
+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, where the idle timer will be enabled.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		mutex_unlock(&kbdev->fw_load_lock);
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
+
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
+	/* The 'reg_lock' is also taken and is held till the update is not
+	 * complete, to ensure the update of idle timer value by multiple Users
+	 * gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	/* The firmware only reads the new idle timer value when the timer is
+	 * disabled.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* Ensure that the request has taken effect */
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ms = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);
 
+	kbase_csf_scheduler_pm_idle(kbdev);
+
+end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);
 
@@ -1711,7 +1860,7 @@ static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
 				long ack_timeout;
 
 				ack_timeout = kbase_csf_timeout_in_jiffies(
-						ACK_TIMEOUT_MILLISECONDS);
+					kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT));
 
 				/* write enable request to global input */
 				kbase_csf_firmware_global_input_mask(
@@ -1748,6 +1897,20 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	kbdev->csf.fw_timeout_ms =
 		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_gpu_sleep_allowed(kbdev))
+		kbdev->csf.gpu_idle_hysteresis_ms /=
+			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
+		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+
+	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
+		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
@@ -1786,20 +1949,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 		return ret;
 	}
 
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /=
-			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
-	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
-
-	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
-	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
-		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
-
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev,
@@ -1992,17 +2141,25 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 		list_del(&interface->node);
 
 		vunmap(interface->kernel_map);
-		if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
-			kbase_csf_protected_memory_free(kbdev, interface->pma,
-				interface->num_pages);
-		} else {
-			kbase_mem_pool_free_pages(
-				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-				interface->num_pages, interface->phys,
-				true, false);
+
+		if (!interface->reuse_pages) {
+			if (interface->flags & CSF_FIRMWARE_ENTRY_PROTECTED) {
+				kbase_csf_protected_memory_free(
+					kbdev, interface->pma, interface->num_pages_aligned,
+					interface->is_small_page);
+			} else {
+				kbase_mem_pool_free_pages(
+					kbase_mem_pool_group_select(
+						kbdev, KBASE_MEM_GROUP_CSF_FW,
+						interface->is_small_page),
+					interface->num_pages_aligned,
+					interface->phys,
+					true, false);
+			}
+
+			kfree(interface->phys);
 		}
 
-		kfree(interface->phys);
 		kfree(interface);
 	}
 
@@ -2034,29 +2191,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	const u32 glb_req =
-		kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
 	/* The scheduler is assumed to only call the enable when its internal
 	 * state indicates that the idle timer has previously been disabled. So
 	 * on entry the expected field values are:
 	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
 	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
 	 */
-
 	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
 		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
 
-	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
-					kbdev->csf.gpu_idle_dur_count);
-
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
-				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
-	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
-		kbdev->csf.gpu_idle_dur_count);
+	enable_gpu_idle_timer(kbdev);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 }
 
@@ -2120,6 +2267,8 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(kbdev, kbdev);
+
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 	set_global_request(global_iface, GLB_REQ_PROTM_ENTER_MASK);
 	dev_dbg(kbdev->dev, "Sending request to enter protected mode");
@@ -2134,6 +2283,8 @@ void kbase_csf_wait_protected_mode_enter(struct kbase_device *kbdev)
 		if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu(kbdev);
 	}
+
+	KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(kbdev, kbdev);
 }
 
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index f4ce33c..74bae39 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,7 @@
 #define _KBASE_CSF_FIRMWARE_H_
 
 #include "device/mali_kbase_device.h"
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 
 /*
  * PAGE_KERNEL_RO was only defined on 32bit ARM in 4.19 in:
@@ -75,7 +75,7 @@
 #define MAX_SUPPORTED_CSGS 31
 /* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
 #define MIN_SUPPORTED_STREAMS_PER_GROUP 8
-/* Maximum CSs per csg. */
+/* MAX_SUPPORTED_STREAMS_PER_GROUP: Maximum CSs per csg. */
 #define MAX_SUPPORTED_STREAMS_PER_GROUP 32
 
 struct kbase_device;
@@ -777,7 +777,7 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 /**
  * kbase_csf_interface_version - Helper function to build the full firmware
  *                               interface version in a format compatible with
- *                               with GLB_VERSION register
+ *                               GLB_VERSION register
  *
  * @major:     major version of csf interface
  * @minor:     minor version of csf interface
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index 70bf26a..b114817 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -67,9 +67,9 @@ struct firmware_config {
 			.mode = VERIFY_OCTAL_PERMISSIONS(_mode),	\
 	}
 
-static FW_CFG_ATTR(min, S_IRUGO);
-static FW_CFG_ATTR(max, S_IRUGO);
-static FW_CFG_ATTR(cur, S_IRUGO | S_IWUSR);
+static FW_CFG_ATTR(min, 0444);
+static FW_CFG_ATTR(max, 0444);
+static FW_CFG_ATTR(cur, 0644);
 
 static void fw_cfg_kobj_release(struct kobject *kobj)
 {
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 6f61631..0fd848f 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -101,7 +101,7 @@ struct dummy_firmware_interface {
 
 #define CSF_GLB_REQ_CFG_MASK                                                   \
 	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
-	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK | GLB_REQ_IDLE_ENABLE_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -193,9 +193,8 @@ static int invent_cmd_stream_group_info(struct kbase_device *kbdev,
 	ginfo->stream_stride = 0;
 
 	ginfo->streams = kcalloc(ginfo->stream_num, sizeof(*ginfo->streams), GFP_KERNEL);
-	if (ginfo->streams == NULL) {
+	if (ginfo->streams == NULL)
 		return -ENOMEM;
-	}
 
 	for (sid = 0; sid < ginfo->stream_num; ++sid) {
 		struct kbase_csf_cmd_stream_info *stream = &ginfo->streams[sid];
@@ -241,9 +240,8 @@ static int invent_capabilities(struct kbase_device *kbdev)
 	iface->group_stride = 0;
 
 	iface->groups = kcalloc(iface->group_num, sizeof(*iface->groups), GFP_KERNEL);
-	if (iface->groups == NULL) {
+	if (iface->groups == NULL)
 		return -ENOMEM;
-	}
 
 	for (gid = 0; gid < iface->group_num; ++gid) {
 		int err;
@@ -619,6 +617,20 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
+static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
+					     GLB_REQ_IDLE_ENABLE_MASK);
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+}
+
 static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
 	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
@@ -628,7 +640,8 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 				 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK |
 				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
 				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
-				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -642,6 +655,12 @@ static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
+	/* The GPU idle timer is always enabled for simplicity. Checks will be
+	 * done before scheduling the GPU idle worker to see if it is
+	 * appropriate for the current power policy.
+	 */
+	enable_gpu_idle_timer(kbdev);
+
 	/* Unmask the interrupts */
 	kbase_csf_firmware_global_input(global_iface,
 		GLB_ACK_IRQ_MASK, ack_irq_mask);
@@ -809,7 +828,14 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
 
 u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 {
-	return kbdev->csf.gpu_idle_hysteresis_ms;
+	unsigned long flags;
+	u32 dur;
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	dur = kbdev->csf.gpu_idle_hysteresis_ms;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	return dur;
 }
 
 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
@@ -817,11 +843,53 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	unsigned long flags;
 	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
 
+	/* The 'fw_load_lock' is taken to synchronize against the deferred
+	 * loading of FW, where the idle timer will be enabled.
+	 */
+	mutex_lock(&kbdev->fw_load_lock);
+	if (unlikely(!kbdev->csf.firmware_inited)) {
+		kbase_csf_scheduler_spin_lock(kbdev, &flags);
+		kbdev->csf.gpu_idle_hysteresis_ms = dur;
+		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbase_csf_scheduler_spin_unlock(kbdev, flags);
+		mutex_unlock(&kbdev->fw_load_lock);
+		goto end;
+	}
+	mutex_unlock(&kbdev->fw_load_lock);
+
+	kbase_csf_scheduler_pm_active(kbdev);
+	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+		dev_err(kbdev->dev,
+			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
+		kbase_csf_scheduler_pm_idle(kbdev);
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
+	/* The 'reg_lock' is also taken and is held till the update is not
+	 * complete, to ensure the update of idle timer value by multiple Users
+	 * gets serialized.
+	 */
+	mutex_lock(&kbdev->csf.reg_lock);
+	/* The firmware only reads the new idle timer value when the timer is
+	 * disabled.
+	 */
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	/* Ensure that the request has taken effect */
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
+
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbdev->csf.gpu_idle_hysteresis_ms = dur;
 	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+	wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
+	mutex_unlock(&kbdev->csf.reg_lock);
+
+	kbase_csf_scheduler_pm_idle(kbdev);
 
+end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);
 
@@ -897,6 +965,16 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	kbdev->csf.fw_timeout_ms =
 		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+#ifdef KBASE_PM_RUNTIME
+	if (kbase_pm_gpu_sleep_allowed(kbdev))
+		kbdev->csf.gpu_idle_hysteresis_ms /=
+			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+#endif
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
+		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
+
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -928,16 +1006,6 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 		return ret;
 	}
 
-	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
-#ifdef KBASE_PM_RUNTIME
-	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_ms /=
-			FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
-#endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ms);
-	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, kbdev->csf.gpu_idle_hysteresis_ms);
-
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev,
@@ -1035,29 +1103,19 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
 {
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	u32 glb_req;
+	const u32 glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
-
 	/* The scheduler is assumed to only call the enable when its internal
 	 * state indicates that the idle timer has previously been disabled. So
 	 * on entry the expected field values are:
 	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
 	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
 	 */
-
-	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
 	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
 		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
 
-	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
-					kbdev->csf.gpu_idle_dur_count);
-
-	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
-				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
-
-	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
-		kbdev->csf.gpu_idle_dur_count);
+	enable_gpu_idle_timer(kbdev);
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 }
 
diff --git a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
index 1815a26..4b3931f 100644
--- a/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/mali_kbase/csf/mali_kbase_csf_heap_context_alloc.c
@@ -174,17 +174,15 @@ u64 kbase_csf_heap_context_allocator_alloc(
 	 * allocate it.
 	 */
 	if (!ctx_alloc->region) {
-		ctx_alloc->region =
-			kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
-					&ctx_alloc->gpu_va, mmu_sync_info);
+		ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
+						    &ctx_alloc->gpu_va, mmu_sync_info);
 	}
 
 	/* If the pool still isn't allocated then an error occurred. */
-	if (unlikely(!ctx_alloc->region)) {
+	if (unlikely(!ctx_alloc->region))
 		dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
-	} else {
+	else
 		heap_gpu_va = sub_alloc(ctx_alloc);
-	}
 
 	mutex_unlock(&ctx_alloc->lock);
 
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 05a4fa0..5380994 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -45,6 +45,10 @@ static int kbase_kcpu_map_import_prepare(
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	struct kbase_va_region *reg;
+	struct kbase_mem_phy_alloc *alloc;
+	struct page **pages;
+	struct tagged_addr *pa;
+	long i;
 	int ret = 0;
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
@@ -76,6 +80,13 @@ static int kbase_kcpu_map_import_prepare(
 		ret = kbase_jd_user_buf_pin_pages(kctx, reg);
 		if (ret)
 			goto out;
+
+		alloc = reg->gpu_alloc;
+		pa = kbase_get_gpu_phy_pages(reg);
+		pages = alloc->imported.user_buf.pages;
+
+		for (i = 0; i < alloc->nents; i++)
+			pa[i] = as_tagged(page_to_phys(pages[i]));
 	}
 
 	current_command->type = BASE_KCPU_COMMAND_TYPE_MAP_IMPORT;
@@ -172,8 +183,8 @@ static void kbase_jit_add_to_pending_alloc_list(
 	list_for_each_entry(blocked_queue,
 			&kctx->csf.kcpu_queues.jit_blocked_queues,
 			jit_blocked) {
-		struct kbase_kcpu_command const*const jit_alloc_cmd =
-				&blocked_queue->commands[blocked_queue->start_offset];
+		struct kbase_kcpu_command const *const jit_alloc_cmd =
+			&blocked_queue->commands[blocked_queue->start_offset];
 
 		WARN_ON(jit_alloc_cmd->type != BASE_KCPU_COMMAND_TYPE_JIT_ALLOC);
 		if (cmd->enqueue_ts < jit_alloc_cmd->enqueue_ts) {
@@ -244,7 +255,7 @@ static int kbase_kcpu_jit_allocate_process(
 					break;
 
 				if (jit_cmd->type == BASE_KCPU_COMMAND_TYPE_JIT_FREE) {
-					u8 const*const free_ids = jit_cmd->info.jit_free.ids;
+					u8 const *const free_ids = jit_cmd->info.jit_free.ids;
 
 					if (free_ids && *free_ids && kctx->jit_alloc[*free_ids]) {
 						/*
@@ -456,8 +467,8 @@ static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		queue->kctx->kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(queue->kctx->kbdev,
+									   queue);
 
 	for (i = 0; i < count; i++) {
 		u64 pages_used = 0;
@@ -636,7 +647,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 		struct tagged_addr *page_array;
 		u64 start, end, i;
 
-		if (!(reg->flags & BASE_MEM_SAME_VA) ||
+		if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
 				reg->nr_pages < nr_pages ||
 				kbase_reg_current_backed_size(reg) !=
 					reg->nr_pages) {
@@ -734,8 +745,8 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 						cqs_wait->objs[i].addr, &mapping);
 
 			if (!queue->command_started) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(kbdev,
+											 queue);
 				queue->command_started = true;
 				KBASE_KTRACE_ADD_CSF_KCPU(kbdev, CQS_WAIT_START,
 						   queue, cqs_wait->nr_objs, 0);
@@ -764,8 +775,7 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 						error);
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
-					kbdev, queue,
-					evt[BASEP_EVENT_ERR_INDEX]);
+					kbdev, queue, evt[BASEP_EVENT_ERR_INDEX]);
 				queue->command_started = false;
 			}
 
@@ -855,8 +865,7 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
 		evt = (u32 *)kbase_phy_alloc_mapping_get(
 			queue->kctx, cqs_set->objs[i].addr, &mapping);
 
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
-								  evt ? 0 : 1);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue, evt ? 0 : 1);
 
 		if (!evt) {
 			dev_warn(kbdev->dev,
@@ -1490,8 +1499,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 {
 	u8 i;
 
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
 	for (i = 0; i < jit_alloc->count; i++) {
 		const u8 id = jit_alloc->info[i].id;
 		const struct kbase_va_region *reg = queue->kctx->jit_alloc[id];
@@ -1521,16 +1529,14 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
 {
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(kbdev, queue);
 }
 
 static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
 {
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		kbdev, queue);
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
 }
 
 static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
@@ -1550,8 +1556,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 		switch (cmd->type) {
 		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
 			if (!queue->command_started) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(kbdev,
+											   queue);
 				queue->command_started = true;
 			}
 
@@ -1584,8 +1590,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			}
 			break;
 		case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(kbdev, queue);
 
 			status = 0;
 
@@ -1603,8 +1608,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			queue->has_error = true;
 #endif
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
-				kbdev, queue, status);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(kbdev, queue,
+										   status);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 			status = kbase_kcpu_cqs_wait_process(kbdev, queue,
@@ -1654,15 +1659,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			/* Clear the queue's error state */
 			queue->has_error = false;
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(kbdev, queue);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
 			struct kbase_ctx_ext_res_meta *meta = NULL;
 
 			if (!drain_queue) {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(kbdev,
+											   queue);
 
 				kbase_gpu_vm_lock(queue->kctx);
 				meta = kbase_sticky_resource_acquire(
@@ -1684,8 +1688,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
 			bool ret;
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
 			ret = kbase_sticky_resource_release(
@@ -1698,15 +1701,15 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 						"failed to release the reference. resource not found");
 			}
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
-				kbdev, queue, ret ? 0 : 1);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(kbdev, queue,
+										   ret ? 0 : 1);
 			break;
 		}
 		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
 			bool ret;
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
-					kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(kbdev,
+											   queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
 			ret = kbase_sticky_resource_release_force(
@@ -1729,8 +1732,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				/* We still need to call this function to clean the JIT alloc info up */
 				kbase_kcpu_jit_allocate_finish(queue, cmd);
 			} else {
-				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
-					kbdev, queue);
+				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(kbdev,
+											  queue);
 
 				status = kbase_kcpu_jit_allocate_process(queue,
 									 cmd);
@@ -1754,8 +1757,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			break;
 		}
 		case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
-				kbdev, queue);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(kbdev, queue);
 
 			status = kbase_kcpu_jit_free_process(queue, cmd);
 			if (status)
@@ -1838,12 +1840,12 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 
 	switch (cmd->type) {
 	case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(
-			kbdev, queue, cmd->info.fence.fence);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(kbdev, queue,
+								     cmd->info.fence.fence);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(
-			kbdev, queue, cmd->info.fence.fence);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(kbdev, queue,
+								       cmd->info.fence.fence);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 	{
@@ -1865,8 +1867,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		unsigned int i;
 
 		for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(
-				kbdev, queue, sets[i].addr);
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(kbdev, queue,
+									  sets[i].addr);
 		}
 		break;
 	}
@@ -1881,16 +1883,15 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
-									queue);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev, queue);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
-			kbdev, queue, cmd->info.import.gpu_va);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(kbdev, queue,
+								     cmd->info.import.gpu_va);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(
-			kbdev, queue, cmd->info.import.gpu_va);
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(kbdev, queue,
+								       cmd->info.import.gpu_va);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(
@@ -1900,35 +1901,29 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 	{
 		u8 i;
 
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
 		for (i = 0; i < cmd->info.jit_alloc.count; i++) {
 			const struct base_jit_alloc_info *info =
 				&cmd->info.jit_alloc.info[i];
 
 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-				kbdev, queue, info->gpu_alloc_addr,
-				info->va_pages, info->commit_pages,
-				info->extension, info->id, info->bin_id,
-				info->max_allocations, info->flags,
-				info->usage_id);
+				kbdev, queue, info->gpu_alloc_addr, info->va_pages,
+				info->commit_pages, info->extension, info->id, info->bin_id,
+				info->max_allocations, info->flags, info->usage_id);
 		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(kbdev, queue);
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_JIT_FREE:
 	{
 		u8 i;
 
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		for (i = 0; i < cmd->info.jit_free.count; i++) {
 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(
 				kbdev, queue, cmd->info.jit_free.ids[i]);
 		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(
-			kbdev, queue);
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(kbdev, queue);
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
@@ -1936,6 +1931,9 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
 			cmd->info.suspend_buf_copy.group_handle);
 		break;
+	default:
+		dev_dbg(kbdev->dev, "Unknown command type %u", cmd->type);
+		break;
 	}
 }
 
@@ -2210,8 +2208,8 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	/* Fire the tracepoint with the mutex held to enforce correct ordering
 	 * with the summary stream.
 	 */
-	KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(
-		kctx->kbdev, queue, kctx->id, queue->num_pending_cmds);
+	KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(kctx->kbdev, queue, queue->id, kctx->id,
+					      queue->num_pending_cmds);
 
 	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_NEW, queue,
 		queue->fence_context, 0);
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 3edb4de..2216cb7 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -206,14 +206,16 @@ struct kbase_kcpu_command_group_suspend_info {
  *		indicates that it has been enqueued earlier.
  * @info:	Structure which holds information about the command
  *		dependent on the command type.
- * @info.fence:            Fence
- * @info.cqs_wait:         CQS wait
- * @info.cqs_set:          CQS set
- * @info.import:           import
- * @info.jit_alloc:        jit allocation
- * @info.jit_free:         jit deallocation
- * @info.suspend_buf_copy: suspend buffer copy
- * @info.sample_time:      sample time
+ * @info.fence:              Fence
+ * @info.cqs_wait:           CQS wait
+ * @info.cqs_set:            CQS set
+ * @info.cqs_wait_operation: CQS wait operation
+ * @info.cqs_set_operation:  CQS set operation
+ * @info.import:             import
+ * @info.jit_alloc:          JIT allocation
+ * @info.jit_free:           JIT deallocation
+ * @info.suspend_buf_copy:   suspend buffer copy
+ * @info.sample_time:        sample time
  */
 struct kbase_kcpu_command {
 	enum base_kcpu_command_type type;
@@ -303,8 +305,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 /**
  * kbase_csf_kcpu_queue_delete - Delete KCPU command queue.
  *
- * Return: 0 if successful, -EINVAL if the queue ID is invalid.
- *
  * @kctx:	Pointer to the kbase context from which the KCPU command
  *		queue is to be deleted.
  * @del:	Pointer to the structure which specifies the KCPU command
diff --git a/mali_kbase/csf/mali_kbase_csf_protected_memory.c b/mali_kbase/csf/mali_kbase_csf_protected_memory.c
index 5997483..bf1835b 100644
--- a/mali_kbase/csf/mali_kbase_csf_protected_memory.c
+++ b/mali_kbase/csf/mali_kbase_csf_protected_memory.c
@@ -71,29 +71,60 @@ struct protected_memory_allocation **
 		kbase_csf_protected_memory_alloc(
 		struct kbase_device *const kbdev,
 		struct tagged_addr *phys,
-		size_t num_pages)
+		size_t num_pages,
+		bool is_small_page)
 {
 	size_t i;
 	struct protected_memory_allocator_device *pma_dev =
 		kbdev->csf.pma_dev;
-	struct protected_memory_allocation **pma =
-		kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
+	struct protected_memory_allocation **pma = NULL;
+	unsigned int order = KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER;
+	unsigned int num_pages_order;
+
+	if (is_small_page)
+		order = KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER;
+
+	num_pages_order = (1u << order);
+
+	/* Ensure the requested num_pages is aligned with
+	 * the order type passed as argument.
+	 *
+	 * pma_alloc_page() will then handle the granularity
+	 * of the allocation based on order.
+	 */
+	num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
+
+	pma = kmalloc_array(num_pages, sizeof(*pma), GFP_KERNEL);
 
 	if (WARN_ON(!pma_dev) || WARN_ON(!phys) || !pma)
 		return NULL;
 
 	for (i = 0; i < num_pages; i++) {
-		pma[i] = pma_dev->ops.pma_alloc_page(pma_dev,
-				KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
+		phys_addr_t phys_addr;
+
+		pma[i] = pma_dev->ops.pma_alloc_page(pma_dev, order);
 		if (!pma[i])
 			break;
 
-		phys[i] = as_tagged(pma_dev->ops.pma_get_phys_addr(pma_dev,
-					pma[i]));
+		phys_addr = pma_dev->ops.pma_get_phys_addr(pma_dev, pma[i]);
+
+		if (order) {
+			size_t j;
+
+			*phys++ = as_tagged_tag(phys_addr, HUGE_HEAD | HUGE_PAGE);
+
+			for (j = 1; j < num_pages_order; j++) {
+				*phys++ = as_tagged_tag(phys_addr +
+							PAGE_SIZE * j,
+							HUGE_PAGE);
+			}
+		} else {
+			phys[i] = as_tagged(phys_addr);
+		}
 	}
 
 	if (i != num_pages) {
-		kbase_csf_protected_memory_free(kbdev, pma, i);
+		kbase_csf_protected_memory_free(kbdev, pma, i * num_pages_order, is_small_page);
 		return NULL;
 	}
 
@@ -103,15 +134,28 @@ struct protected_memory_allocation **
 void kbase_csf_protected_memory_free(
 		struct kbase_device *const kbdev,
 		struct protected_memory_allocation **pma,
-		size_t num_pages)
+		size_t num_pages,
+		bool is_small_page)
 {
 	size_t i;
 	struct protected_memory_allocator_device *pma_dev =
 		kbdev->csf.pma_dev;
+	unsigned int num_pages_order = (1u << KBASE_MEM_POOL_2MB_PAGE_TABLE_ORDER);
+
+	if (is_small_page)
+		num_pages_order = (1u << KBASE_MEM_POOL_4KB_PAGE_TABLE_ORDER);
 
 	if (WARN_ON(!pma_dev) || WARN_ON(!pma))
 		return;
 
+	/* Ensure the requested num_pages is aligned with
+	 * the order type passed as argument.
+	 *
+	 * pma_alloc_page() will then handle the granularity
+	 * of the allocation based on order.
+	 */
+	num_pages = div64_u64(num_pages + num_pages_order - 1, num_pages_order);
+
 	for (i = 0; i < num_pages; i++)
 		pma_dev->ops.pma_free_page(pma_dev, pma[i]);
 
diff --git a/mali_kbase/csf/mali_kbase_csf_protected_memory.h b/mali_kbase/csf/mali_kbase_csf_protected_memory.h
index 4c0609e..8c1aa91 100644
--- a/mali_kbase/csf/mali_kbase_csf_protected_memory.h
+++ b/mali_kbase/csf/mali_kbase_csf_protected_memory.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,7 @@ void kbase_csf_protected_memory_term(struct kbase_device *const kbdev);
  * @phys:	Array of physical addresses to be filled in by the protected
  *		memory allocator.
  * @num_pages:	Number of pages requested to be allocated.
+ * @is_small_page: Flag used to select the order of protected memory page.
  *
  * Return: Pointer to an array of protected memory allocations on success,
  *		or NULL on failure.
@@ -54,7 +55,8 @@ struct protected_memory_allocation **
 	kbase_csf_protected_memory_alloc(
 		struct kbase_device *const kbdev,
 		struct tagged_addr *phys,
-		size_t num_pages);
+		size_t num_pages,
+		bool is_small_page);
 
 /**
  * kbase_csf_protected_memory_free - Free the allocated
@@ -63,9 +65,11 @@ struct protected_memory_allocation **
  * @kbdev:	Device pointer.
  * @pma:	Array of pointer to protected memory allocations.
  * @num_pages:	Number of pages to be freed.
+ * @is_small_page: Flag used to select the order of protected memory page.
  */
 void kbase_csf_protected_memory_free(
 		struct kbase_device *const kbdev,
 		struct protected_memory_allocation **pma,
-		size_t num_pages);
+		size_t num_pages,
+		bool is_small_page);
 #endif
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
new file mode 100644
index 0000000..99de444
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -0,0 +1,1524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * This header was originally autogenerated, but it is now ok (and
+ * expected) to have to add to it.
+ */
+
+#ifndef _KBASE_CSF_REGISTERS_H_
+#define _KBASE_CSF_REGISTERS_H_
+
+/*
+ * Begin register sets
+ */
+
+/* DOORBELLS base address */
+#define DOORBELLS_BASE 0x0080000
+#define DOORBELLS_REG(r) (DOORBELLS_BASE + (r))
+
+/* CS_KERNEL_INPUT_BLOCK base address */
+#define CS_KERNEL_INPUT_BLOCK_BASE 0x0000
+#define CS_KERNEL_INPUT_BLOCK_REG(r) (CS_KERNEL_INPUT_BLOCK_BASE + (r))
+
+/* CS_KERNEL_OUTPUT_BLOCK base address */
+#define CS_KERNEL_OUTPUT_BLOCK_BASE 0x0000
+#define CS_KERNEL_OUTPUT_BLOCK_REG(r) (CS_KERNEL_OUTPUT_BLOCK_BASE + (r))
+
+/* CS_USER_INPUT_BLOCK base address */
+#define CS_USER_INPUT_BLOCK_BASE 0x0000
+#define CS_USER_INPUT_BLOCK_REG(r) (CS_USER_INPUT_BLOCK_BASE + (r))
+
+/* CS_USER_OUTPUT_BLOCK base address */
+#define CS_USER_OUTPUT_BLOCK_BASE 0x0000
+#define CS_USER_OUTPUT_BLOCK_REG(r) (CS_USER_OUTPUT_BLOCK_BASE + (r))
+
+/* CSG_INPUT_BLOCK base address */
+#define CSG_INPUT_BLOCK_BASE 0x0000
+#define CSG_INPUT_BLOCK_REG(r) (CSG_INPUT_BLOCK_BASE + (r))
+
+/* CSG_OUTPUT_BLOCK base address */
+#define CSG_OUTPUT_BLOCK_BASE 0x0000
+#define CSG_OUTPUT_BLOCK_REG(r) (CSG_OUTPUT_BLOCK_BASE + (r))
+
+/* GLB_CONTROL_BLOCK base address */
+#define GLB_CONTROL_BLOCK_BASE 0x04000000
+#define GLB_CONTROL_BLOCK_REG(r) (GLB_CONTROL_BLOCK_BASE + (r))
+
+/* GLB_INPUT_BLOCK base address */
+#define GLB_INPUT_BLOCK_BASE 0x0000
+#define GLB_INPUT_BLOCK_REG(r) (GLB_INPUT_BLOCK_BASE + (r))
+
+/* GLB_OUTPUT_BLOCK base address */
+#define GLB_OUTPUT_BLOCK_BASE 0x0000
+#define GLB_OUTPUT_BLOCK_REG(r) (GLB_OUTPUT_BLOCK_BASE + (r))
+
+/* USER base address */
+#define USER_BASE 0x0010000
+#define USER_REG(r) (USER_BASE + (r))
+
+/* End register sets */
+
+/*
+ * Begin register offsets
+ */
+
+/* DOORBELLS register offsets */
+#define DOORBELL_0 0x0000 /* () Doorbell 0 register */
+#define DOORBELL(n) (DOORBELL_0 + (n)*65536)
+#define DOORBELL_REG(n, r) (DOORBELL(n) + DOORBELL_BLOCK_REG(r))
+#define DOORBELL_COUNT 1024
+
+/* DOORBELL_BLOCK register offsets */
+#define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */
+
+/* CS_KERNEL_INPUT_BLOCK register offsets */
+#define CS_REQ 0x0000 /* () CS request flags */
+#define CS_CONFIG 0x0004 /* () CS configuration */
+#define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */
+#define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */
+#define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */
+#define CS_SIZE 0x0018 /* () Size of the ring buffer */
+#define CS_TILER_HEAP_START_LO 0x0020 /* () Pointer to heap start, low word */
+#define CS_TILER_HEAP_START_HI 0x0024 /* () Pointer to heap start, high word */
+#define CS_TILER_HEAP_END_LO 0x0028 /* () Tiler heap descriptor address, low word */
+#define CS_TILER_HEAP_END_HI 0x002C /* () Tiler heap descriptor address, high word */
+#define CS_USER_INPUT_LO 0x0030 /* () CS user mode input page address, low word */
+#define CS_USER_INPUT_HI 0x0034 /* () CS user mode input page address, high word */
+#define CS_USER_OUTPUT_LO 0x0038 /* () CS user mode input page address, low word */
+#define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */
+#define CS_INSTR_CONFIG 0x0040 /* () Instrumentation buffer configuration */
+#define CS_INSTR_BUFFER_SIZE 0x0044 /* () Instrumentation buffer size */
+#define CS_INSTR_BUFFER_BASE_LO 0x0048 /* () Instrumentation buffer base pointer, low word */
+#define CS_INSTR_BUFFER_BASE_HI 0x004C /* () Instrumentation buffer base pointer, high word */
+#define CS_INSTR_BUFFER_OFFSET_POINTER_LO 0x0050 /* () Instrumentation buffer pointer to insert offset, low word */
+#define CS_INSTR_BUFFER_OFFSET_POINTER_HI 0x0054 /* () Instrumentation buffer pointer to insert offset, high word */
+
+/* CS_KERNEL_OUTPUT_BLOCK register offsets */
+#define CS_ACK 0x0000 /* () CS acknowledge flags */
+#define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */
+#define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */
+#define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */
+#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */
+#define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */
+#define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */
+#define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */
+#define CS_STATUS_SCOREBOARDS 0x005C /* () Scoreboard status */
+#define CS_STATUS_BLOCKED_REASON 0x0060 /* () Blocked reason */
+#define CS_FAULT 0x0080 /* () Recoverable fault information */
+#define CS_FATAL 0x0084 /* () Unrecoverable fault information */
+#define CS_FAULT_INFO_LO 0x0088 /* () Additional information about a recoverable fault, low word */
+#define CS_FAULT_INFO_HI 0x008C /* () Additional information about a recoverable fault, high word */
+#define CS_FATAL_INFO_LO 0x0090 /* () Additional information about a non-recoverable fault, low word */
+#define CS_FATAL_INFO_HI 0x0094 /* () Additional information about a non-recoverable fault, high word */
+#define CS_HEAP_VT_START 0x00C0 /* () Number of vertex/tiling operations started */
+#define CS_HEAP_VT_END 0x00C4 /* () Number of vertex/tiling operations completed */
+#define CS_HEAP_FRAG_END 0x00CC /* () Number of fragment completed */
+#define CS_HEAP_ADDRESS_LO 0x00D0 /* () Heap address, low word */
+#define CS_HEAP_ADDRESS_HI 0x00D4 /* () Heap address, high word */
+
+/* CS_USER_INPUT_BLOCK register offsets */
+#define CS_INSERT_LO 0x0000 /* () Current insert offset for ring buffer, low word */
+#define CS_INSERT_HI 0x0004 /* () Current insert offset for ring buffer, high word */
+#define CS_EXTRACT_INIT_LO 0x0008 /* () Initial extract offset for ring buffer, low word */
+#define CS_EXTRACT_INIT_HI 0x000C /* () Initial extract offset for ring buffer, high word */
+
+/* CS_USER_OUTPUT_BLOCK register offsets */
+#define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */
+#define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */
+#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */
+
+/* CSG_INPUT_BLOCK register offsets */
+#define CSG_REQ 0x0000 /* () CSG request */
+#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
+#define CSG_DB_REQ 0x0008 /* () Global doorbell request */
+#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
+#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
+#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
+#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
+#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */
+#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */
+#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */
+#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */
+#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */
+#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
+#define CSG_PROTM_SUSPEND_BUF_HI 0x004C /* () Protected mode suspend buffer, high word */
+#define CSG_CONFIG 0x0050 /* () CSG configuration options */
+#define CSG_ITER_TRACE_CONFIG 0x0054 /* () CSG trace configuration */
+
+/* CSG_OUTPUT_BLOCK register offsets */
+#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
+#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */
+#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */
+#define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
+#define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
+#define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
+
+/* GLB_CONTROL_BLOCK register offsets */
+#define GLB_VERSION 0x0000 /* () Global interface version */
+#define GLB_FEATURES 0x0004 /* () Global interface features */
+#define GLB_INPUT_VA 0x0008 /* () Address of GLB_INPUT_BLOCK */
+#define GLB_OUTPUT_VA 0x000C /* () Address of GLB_OUTPUT_BLOCK */
+#define GLB_GROUP_NUM 0x0010 /* () Number of CSG interfaces */
+#define GLB_GROUP_STRIDE 0x0014 /* () Stride between CSG interfaces */
+#define GLB_PRFCNT_SIZE 0x0018 /* () Size of CSF performance counters */
+#define GLB_INSTR_FEATURES                                                     \
+	0x001C /* () TRACE_POINT instrumentation. (csf >= 1.1.0) */
+#define GROUP_CONTROL_0 0x1000 /* () CSG control and capabilities */
+#define GROUP_CONTROL(n) (GROUP_CONTROL_0 + (n)*256)
+#define GROUP_CONTROL_REG(n, r) (GROUP_CONTROL(n) + GROUP_CONTROL_BLOCK_REG(r))
+#define GROUP_CONTROL_COUNT 16
+
+/* STREAM_CONTROL_BLOCK register offsets */
+#define STREAM_FEATURES 0x0000 /* () CSI features */
+#define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */
+#define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */
+
+/* GROUP_CONTROL_BLOCK register offsets */
+#define GROUP_FEATURES 0x0000 /* () CSG interface features */
+#define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */
+#define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */
+#define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */
+#define GROUP_PROTM_SUSPEND_SIZE 0x0010 /* () Size of CSG protected-mode suspend buffer */
+#define GROUP_STREAM_NUM 0x0014 /* () Number of CS interfaces */
+#define GROUP_STREAM_STRIDE 0x0018 /* () Stride between CS interfaces  */
+#define STREAM_CONTROL_0 0x0040 /* () CS control and capabilities */
+#define STREAM_CONTROL(n) (STREAM_CONTROL_0 + (n)*12)
+#define STREAM_CONTROL_REG(n, r) (STREAM_CONTROL(n) + STREAM_CONTROL_BLOCK_REG(r))
+#define STREAM_CONTROL_COUNT 16
+
+/* GLB_INPUT_BLOCK register offsets */
+#define GLB_REQ 0x0000 /* () Global request */
+#define GLB_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
+#define GLB_DB_REQ 0x0008 /* () Global doorbell request */
+#define GLB_PROGRESS_TIMER 0x0010 /* () Global progress timeout */
+#define GLB_PWROFF_TIMER 0x0014 /* () Global shader core power off timer */
+#define GLB_ALLOC_EN_LO 0x0018 /* () Global shader core allocation enable mask, low word */
+#define GLB_ALLOC_EN_HI 0x001C /* () Global shader core allocation enable mask, high word */
+
+#define GLB_PRFCNT_JASID 0x0024 /* () Performance counter address space */
+#define GLB_PRFCNT_BASE_LO 0x0028 /* () Performance counter buffer address, low word */
+#define GLB_PRFCNT_BASE_HI 0x002C /* () Performance counter buffer address, high word */
+#define GLB_PRFCNT_EXTRACT 0x0030 /* () Performance counter buffer extract index */
+#define GLB_PRFCNT_CONFIG 0x0040 /* () Performance counter configuration */
+#define GLB_PRFCNT_CSG_SELECT 0x0044 /* () CSG performance counting enable */
+#define GLB_PRFCNT_FW_EN 0x0048 /* () Performance counter enable for firmware */
+#define GLB_PRFCNT_CSG_EN 0x004C /* () Performance counter enable for CSG */
+#define GLB_PRFCNT_CSF_EN 0x0050 /* () Performance counter enable for CSF */
+#define GLB_PRFCNT_SHADER_EN 0x0054 /* () Performance counter enable for shader cores */
+#define GLB_PRFCNT_TILER_EN 0x0058 /* () Performance counter enable for tiler */
+#define GLB_PRFCNT_MMU_L2_EN 0x005C /* () Performance counter enable for MMU/L2 cache */
+
+#define GLB_DEBUG_FWUTF_DESTROY 0x0FE0 /* () Test fixture destroy function address */
+#define GLB_DEBUG_FWUTF_TEST 0x0FE4 /* () Test index */
+#define GLB_DEBUG_FWUTF_FIXTURE 0x0FE8 /* () Test fixture index */
+#define GLB_DEBUG_FWUTF_CREATE 0x0FEC /* () Test fixture create function address */
+#define GLB_DEBUG_ACK_IRQ_MASK 0x0FF8 /* () Global debug acknowledge interrupt mask */
+#define GLB_DEBUG_REQ 0x0FFC /* () Global debug request */
+
+/* GLB_OUTPUT_BLOCK register offsets */
+#define GLB_ACK 0x0000 /* () Global acknowledge */
+#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
+#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
+#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
+#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
+#define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
+#define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
+
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
+/* End register offsets */
+
+/* CS_KERNEL_INPUT_BLOCK register set definitions */
+/* GLB_VERSION register */
+#define GLB_VERSION_PATCH_SHIFT (0)
+#define GLB_VERSION_PATCH_MASK ((0xFFFF) << GLB_VERSION_PATCH_SHIFT)
+#define GLB_VERSION_PATCH_GET(reg_val) (((reg_val)&GLB_VERSION_PATCH_MASK) >> GLB_VERSION_PATCH_SHIFT)
+#define GLB_VERSION_PATCH_SET(reg_val, value) \
+	(((reg_val) & ~GLB_VERSION_PATCH_MASK) | (((value) << GLB_VERSION_PATCH_SHIFT) & GLB_VERSION_PATCH_MASK))
+#define GLB_VERSION_MINOR_SHIFT (16)
+#define GLB_VERSION_MINOR_MASK ((0xFF) << GLB_VERSION_MINOR_SHIFT)
+#define GLB_VERSION_MINOR_GET(reg_val) (((reg_val)&GLB_VERSION_MINOR_MASK) >> GLB_VERSION_MINOR_SHIFT)
+#define GLB_VERSION_MINOR_SET(reg_val, value) \
+	(((reg_val) & ~GLB_VERSION_MINOR_MASK) | (((value) << GLB_VERSION_MINOR_SHIFT) & GLB_VERSION_MINOR_MASK))
+#define GLB_VERSION_MAJOR_SHIFT (24)
+#define GLB_VERSION_MAJOR_MASK ((0xFF) << GLB_VERSION_MAJOR_SHIFT)
+#define GLB_VERSION_MAJOR_GET(reg_val) (((reg_val)&GLB_VERSION_MAJOR_MASK) >> GLB_VERSION_MAJOR_SHIFT)
+#define GLB_VERSION_MAJOR_SET(reg_val, value) \
+	(((reg_val) & ~GLB_VERSION_MAJOR_MASK) | (((value) << GLB_VERSION_MAJOR_SHIFT) & GLB_VERSION_MAJOR_MASK))
+
+/* CS_REQ register */
+#define CS_REQ_STATE_SHIFT 0
+#define CS_REQ_STATE_MASK (0x7 << CS_REQ_STATE_SHIFT)
+#define CS_REQ_STATE_GET(reg_val) (((reg_val)&CS_REQ_STATE_MASK) >> CS_REQ_STATE_SHIFT)
+#define CS_REQ_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_STATE_MASK) | (((value) << CS_REQ_STATE_SHIFT) & CS_REQ_STATE_MASK))
+/* CS_REQ_STATE values */
+#define CS_REQ_STATE_STOP 0x0
+#define CS_REQ_STATE_START 0x1
+/* End of CS_REQ_STATE values */
+#define CS_REQ_EXTRACT_EVENT_SHIFT 4
+#define CS_REQ_EXTRACT_EVENT_MASK (0x1 << CS_REQ_EXTRACT_EVENT_SHIFT)
+#define CS_REQ_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_REQ_EXTRACT_EVENT_MASK) >> CS_REQ_EXTRACT_EVENT_SHIFT)
+#define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK))
+
+#define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8
+#define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT)
+#define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT)
+#define CS_REQ_IDLE_SYNC_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_SYNC_WAIT_MASK) |  \
+	 (((value) << CS_REQ_IDLE_SYNC_WAIT_SHIFT) & CS_REQ_IDLE_SYNC_WAIT_MASK))
+#define CS_REQ_IDLE_PROTM_PEND_SHIFT 9
+#define CS_REQ_IDLE_PROTM_PEND_MASK (0x1 << CS_REQ_IDLE_PROTM_PEND_SHIFT)
+#define CS_REQ_IDLE_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_IDLE_PROTM_PEND_MASK) >> CS_REQ_IDLE_PROTM_PEND_SHIFT)
+#define CS_REQ_IDLE_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_PROTM_PEND_MASK) |  \
+	 (((value) << CS_REQ_IDLE_PROTM_PEND_SHIFT) & CS_REQ_IDLE_PROTM_PEND_MASK))
+#define CS_REQ_IDLE_EMPTY_SHIFT 10
+#define CS_REQ_IDLE_EMPTY_MASK (0x1 << CS_REQ_IDLE_EMPTY_SHIFT)
+#define CS_REQ_IDLE_EMPTY_GET(reg_val) (((reg_val)&CS_REQ_IDLE_EMPTY_MASK) >> CS_REQ_IDLE_EMPTY_SHIFT)
+#define CS_REQ_IDLE_EMPTY_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_EMPTY_MASK) | (((value) << CS_REQ_IDLE_EMPTY_SHIFT) & CS_REQ_IDLE_EMPTY_MASK))
+#define CS_REQ_IDLE_RESOURCE_REQ_SHIFT 11
+#define CS_REQ_IDLE_RESOURCE_REQ_MASK (0x1 << CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+#define CS_REQ_IDLE_RESOURCE_REQ_GET(reg_val) \
+	(((reg_val)&CS_REQ_IDLE_RESOURCE_REQ_MASK) >> CS_REQ_IDLE_RESOURCE_REQ_SHIFT)
+#define CS_REQ_IDLE_RESOURCE_REQ_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_IDLE_RESOURCE_REQ_MASK) |  \
+	 (((value) << CS_REQ_IDLE_RESOURCE_REQ_SHIFT) & CS_REQ_IDLE_RESOURCE_REQ_MASK))
+#define CS_REQ_TILER_OOM_SHIFT 26
+#define CS_REQ_TILER_OOM_MASK (0x1 << CS_REQ_TILER_OOM_SHIFT)
+#define CS_REQ_TILER_OOM_GET(reg_val) (((reg_val)&CS_REQ_TILER_OOM_MASK) >> CS_REQ_TILER_OOM_SHIFT)
+#define CS_REQ_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_TILER_OOM_MASK) | (((value) << CS_REQ_TILER_OOM_SHIFT) & CS_REQ_TILER_OOM_MASK))
+#define CS_REQ_PROTM_PEND_SHIFT 27
+#define CS_REQ_PROTM_PEND_MASK (0x1 << CS_REQ_PROTM_PEND_SHIFT)
+#define CS_REQ_PROTM_PEND_GET(reg_val) (((reg_val)&CS_REQ_PROTM_PEND_MASK) >> CS_REQ_PROTM_PEND_SHIFT)
+#define CS_REQ_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_PROTM_PEND_MASK) | (((value) << CS_REQ_PROTM_PEND_SHIFT) & CS_REQ_PROTM_PEND_MASK))
+#define CS_REQ_FATAL_SHIFT 30
+#define CS_REQ_FATAL_MASK (0x1 << CS_REQ_FATAL_SHIFT)
+#define CS_REQ_FATAL_GET(reg_val) (((reg_val)&CS_REQ_FATAL_MASK) >> CS_REQ_FATAL_SHIFT)
+#define CS_REQ_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_FATAL_MASK) | (((value) << CS_REQ_FATAL_SHIFT) & CS_REQ_FATAL_MASK))
+#define CS_REQ_FAULT_SHIFT 31
+#define CS_REQ_FAULT_MASK (0x1 << CS_REQ_FAULT_SHIFT)
+#define CS_REQ_FAULT_GET(reg_val) (((reg_val)&CS_REQ_FAULT_MASK) >> CS_REQ_FAULT_SHIFT)
+#define CS_REQ_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_REQ_FAULT_MASK) | (((value) << CS_REQ_FAULT_SHIFT) & CS_REQ_FAULT_MASK))
+
+/* CS_CONFIG register */
+#define CS_CONFIG_PRIORITY_SHIFT 0
+#define CS_CONFIG_PRIORITY_MASK (0xF << CS_CONFIG_PRIORITY_SHIFT)
+#define CS_CONFIG_PRIORITY_GET(reg_val) (((reg_val)&CS_CONFIG_PRIORITY_MASK) >> CS_CONFIG_PRIORITY_SHIFT)
+#define CS_CONFIG_PRIORITY_SET(reg_val, value) \
+	(((reg_val) & ~CS_CONFIG_PRIORITY_MASK) | (((value) << CS_CONFIG_PRIORITY_SHIFT) & CS_CONFIG_PRIORITY_MASK))
+#define CS_CONFIG_USER_DOORBELL_SHIFT 8
+#define CS_CONFIG_USER_DOORBELL_MASK (0xFF << CS_CONFIG_USER_DOORBELL_SHIFT)
+#define CS_CONFIG_USER_DOORBELL_GET(reg_val) (((reg_val)&CS_CONFIG_USER_DOORBELL_MASK) >> CS_CONFIG_USER_DOORBELL_SHIFT)
+#define CS_CONFIG_USER_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CS_CONFIG_USER_DOORBELL_MASK) |  \
+	 (((value) << CS_CONFIG_USER_DOORBELL_SHIFT) & CS_CONFIG_USER_DOORBELL_MASK))
+
+/* CS_ACK_IRQ_MASK register */
+#define CS_ACK_IRQ_MASK_STATE_SHIFT 0
+#define CS_ACK_IRQ_MASK_STATE_MASK (0x7 << CS_ACK_IRQ_MASK_STATE_SHIFT)
+#define CS_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_STATE_MASK) >> CS_ACK_IRQ_MASK_STATE_SHIFT)
+#define CS_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_STATE_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_STATE_SHIFT) & CS_ACK_IRQ_MASK_STATE_MASK))
+/* CS_ACK_IRQ_MASK_STATE values */
+#define CS_ACK_IRQ_MASK_STATE_DISABLED 0x0
+#define CS_ACK_IRQ_MASK_STATE_ENABLED 0x7
+/* End of CS_ACK_IRQ_MASK_STATE values */
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT 4
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) >> CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_IRQ_MASK_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_EXTRACT_EVENT_SHIFT) & CS_ACK_IRQ_MASK_EXTRACT_EVENT_MASK))
+#define CS_ACK_IRQ_MASK_TILER_OOM_SHIFT 26
+#define CS_ACK_IRQ_MASK_TILER_OOM_MASK (0x1 << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
+#define CS_ACK_IRQ_MASK_TILER_OOM_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_TILER_OOM_MASK) >> CS_ACK_IRQ_MASK_TILER_OOM_SHIFT)
+#define CS_ACK_IRQ_MASK_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_TILER_OOM_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_TILER_OOM_SHIFT) & CS_ACK_IRQ_MASK_TILER_OOM_MASK))
+#define CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT 27
+#define CS_ACK_IRQ_MASK_PROTM_PEND_MASK (0x1 << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
+#define CS_ACK_IRQ_MASK_PROTM_PEND_GET(reg_val) \
+	(((reg_val)&CS_ACK_IRQ_MASK_PROTM_PEND_MASK) >> CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT)
+#define CS_ACK_IRQ_MASK_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_PROTM_PEND_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_PROTM_PEND_SHIFT) & CS_ACK_IRQ_MASK_PROTM_PEND_MASK))
+#define CS_ACK_IRQ_MASK_FATAL_SHIFT 30
+#define CS_ACK_IRQ_MASK_FATAL_MASK (0x1 << CS_ACK_IRQ_MASK_FATAL_SHIFT)
+#define CS_ACK_IRQ_MASK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FATAL_MASK) >> CS_ACK_IRQ_MASK_FATAL_SHIFT)
+#define CS_ACK_IRQ_MASK_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_FATAL_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_FATAL_SHIFT) & CS_ACK_IRQ_MASK_FATAL_MASK))
+#define CS_ACK_IRQ_MASK_FAULT_SHIFT 31
+#define CS_ACK_IRQ_MASK_FAULT_MASK (0x1 << CS_ACK_IRQ_MASK_FAULT_SHIFT)
+#define CS_ACK_IRQ_MASK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_IRQ_MASK_FAULT_MASK) >> CS_ACK_IRQ_MASK_FAULT_SHIFT)
+#define CS_ACK_IRQ_MASK_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_IRQ_MASK_FAULT_MASK) |  \
+	 (((value) << CS_ACK_IRQ_MASK_FAULT_SHIFT) & CS_ACK_IRQ_MASK_FAULT_MASK))
+
+/* CS_BASE register */
+#define CS_BASE_POINTER_SHIFT 0
+#define CS_BASE_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_BASE_POINTER_SHIFT)
+#define CS_BASE_POINTER_GET(reg_val) (((reg_val)&CS_BASE_POINTER_MASK) >> CS_BASE_POINTER_SHIFT)
+#define CS_BASE_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_BASE_POINTER_MASK) | (((value) << CS_BASE_POINTER_SHIFT) & CS_BASE_POINTER_MASK))
+
+/* CS_SIZE register */
+#define CS_SIZE_SIZE_SHIFT 0
+#define CS_SIZE_SIZE_MASK (0xFFFFFFFF << CS_SIZE_SIZE_SHIFT)
+#define CS_SIZE_SIZE_GET(reg_val) (((reg_val)&CS_SIZE_SIZE_MASK) >> CS_SIZE_SIZE_SHIFT)
+#define CS_SIZE_SIZE_SET(reg_val, value) \
+	(((reg_val) & ~CS_SIZE_SIZE_MASK) | (((value) << CS_SIZE_SIZE_SHIFT) & CS_SIZE_SIZE_MASK))
+
+/* CS_TILER_HEAP_START register */
+#define CS_TILER_HEAP_START_POINTER_SHIFT 0
+#define CS_TILER_HEAP_START_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_START_POINTER_SHIFT)
+#define CS_TILER_HEAP_START_POINTER_GET(reg_val) \
+	(((reg_val)&CS_TILER_HEAP_START_POINTER_MASK) >> CS_TILER_HEAP_START_POINTER_SHIFT)
+#define CS_TILER_HEAP_START_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_TILER_HEAP_START_POINTER_MASK) |  \
+	 (((value) << CS_TILER_HEAP_START_POINTER_SHIFT) & CS_TILER_HEAP_START_POINTER_MASK))
+/* HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
+/* End of HeapChunkPointer nested in CS_TILER_HEAP_START_POINTER */
+
+/* CS_TILER_HEAP_END register */
+#define CS_TILER_HEAP_END_POINTER_SHIFT 0
+#define CS_TILER_HEAP_END_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_TILER_HEAP_END_POINTER_SHIFT)
+#define CS_TILER_HEAP_END_POINTER_GET(reg_val) \
+	(((reg_val)&CS_TILER_HEAP_END_POINTER_MASK) >> CS_TILER_HEAP_END_POINTER_SHIFT)
+#define CS_TILER_HEAP_END_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_TILER_HEAP_END_POINTER_MASK) |  \
+	 (((value) << CS_TILER_HEAP_END_POINTER_SHIFT) & CS_TILER_HEAP_END_POINTER_MASK))
+/* HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
+/* End of HeapChunkPointer nested in CS_TILER_HEAP_END_POINTER */
+
+/* CS_USER_INPUT register */
+#define CS_USER_INPUT_POINTER_SHIFT 0
+#define CS_USER_INPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_INPUT_POINTER_SHIFT)
+#define CS_USER_INPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_INPUT_POINTER_MASK) >> CS_USER_INPUT_POINTER_SHIFT)
+#define CS_USER_INPUT_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_USER_INPUT_POINTER_MASK) |  \
+	 (((value) << CS_USER_INPUT_POINTER_SHIFT) & CS_USER_INPUT_POINTER_MASK))
+
+/* CS_USER_OUTPUT register */
+#define CS_USER_OUTPUT_POINTER_SHIFT 0
+#define CS_USER_OUTPUT_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_USER_OUTPUT_POINTER_SHIFT)
+#define CS_USER_OUTPUT_POINTER_GET(reg_val) (((reg_val)&CS_USER_OUTPUT_POINTER_MASK) >> CS_USER_OUTPUT_POINTER_SHIFT)
+#define CS_USER_OUTPUT_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_USER_OUTPUT_POINTER_MASK) |  \
+	 (((value) << CS_USER_OUTPUT_POINTER_SHIFT) & CS_USER_OUTPUT_POINTER_MASK))
+
+/* CS_INSTR_CONFIG register */
+#define CS_INSTR_CONFIG_JASID_SHIFT (0)
+#define CS_INSTR_CONFIG_JASID_MASK ((u32)0xF << CS_INSTR_CONFIG_JASID_SHIFT)
+#define CS_INSTR_CONFIG_JASID_GET(reg_val) (((reg_val)&CS_INSTR_CONFIG_JASID_MASK) >> CS_INSTR_CONFIG_JASID_SHIFT)
+#define CS_INSTR_CONFIG_JASID_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_CONFIG_JASID_MASK) |  \
+	 (((value) << CS_INSTR_CONFIG_JASID_SHIFT) & CS_INSTR_CONFIG_JASID_MASK))
+#define CS_INSTR_CONFIG_EVENT_SIZE_SHIFT (4)
+#define CS_INSTR_CONFIG_EVENT_SIZE_MASK ((u32)0xF << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT)
+#define CS_INSTR_CONFIG_EVENT_SIZE_GET(reg_val) \
+	(((reg_val)&CS_INSTR_CONFIG_EVENT_SIZE_MASK) >> CS_INSTR_CONFIG_EVENT_SIZE_SHIFT)
+#define CS_INSTR_CONFIG_EVENT_SIZE_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_CONFIG_EVENT_SIZE_MASK) |  \
+	 (((value) << CS_INSTR_CONFIG_EVENT_SIZE_SHIFT) & CS_INSTR_CONFIG_EVENT_SIZE_MASK))
+#define CS_INSTR_CONFIG_EVENT_STATE_SHIFT (16)
+#define CS_INSTR_CONFIG_EVENT_STATE_MASK ((u32)0xFF << CS_INSTR_CONFIG_EVENT_STATE_SHIFT)
+#define CS_INSTR_CONFIG_EVENT_STATE_GET(reg_val) \
+	(((reg_val)&CS_INSTR_CONFIG_EVENT_STATE_MASK) >> CS_INSTR_CONFIG_EVENT_STATE_SHIFT)
+#define CS_INSTR_CONFIG_EVENT_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_CONFIG_EVENT_STATE_MASK) |  \
+	 (((value) << CS_INSTR_CONFIG_EVENT_STATE_SHIFT) & CS_INSTR_CONFIG_EVENT_STATE_MASK))
+
+/* CS_INSTR_BUFFER_SIZE register */
+#define CS_INSTR_BUFFER_SIZE_SIZE_SHIFT (0)
+#define CS_INSTR_BUFFER_SIZE_SIZE_MASK ((u32)0xFFFFFFFF << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT)
+#define CS_INSTR_BUFFER_SIZE_SIZE_GET(reg_val) \
+	(((reg_val)&CS_INSTR_BUFFER_SIZE_SIZE_MASK) >> CS_INSTR_BUFFER_SIZE_SIZE_SHIFT)
+#define CS_INSTR_BUFFER_SIZE_SIZE_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_BUFFER_SIZE_SIZE_MASK) |  \
+	 (((value) << CS_INSTR_BUFFER_SIZE_SIZE_SHIFT) & CS_INSTR_BUFFER_SIZE_SIZE_MASK))
+
+/* CS_INSTR_BUFFER_BASE register */
+#define CS_INSTR_BUFFER_BASE_POINTER_SHIFT (0)
+#define CS_INSTR_BUFFER_BASE_POINTER_MASK ((u64)0xFFFFFFFFFFFFFFFF << CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_BASE_POINTER_GET(reg_val) \
+	(((reg_val)&CS_INSTR_BUFFER_BASE_POINTER_MASK) >> CS_INSTR_BUFFER_BASE_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_BASE_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_BUFFER_BASE_POINTER_MASK) |  \
+	 (((value) << CS_INSTR_BUFFER_BASE_POINTER_SHIFT) & CS_INSTR_BUFFER_BASE_POINTER_MASK))
+
+/* CS_INSTR_BUFFER_OFFSET_POINTER register */
+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT (0)
+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK \
+	(((u64)0xFFFFFFFFFFFFFFFF) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_GET(reg_val) \
+	(((reg_val)&CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) >> CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT)
+#define CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK) |  \
+	 (((value) << CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_SHIFT) & CS_INSTR_BUFFER_OFFSET_POINTER_POINTER_MASK))
+
+/* End of CS_KERNEL_INPUT_BLOCK register set definitions */
+
+/* CS_KERNEL_OUTPUT_BLOCK register set definitions */
+
+/* CS_ACK register */
+#define CS_ACK_STATE_SHIFT 0
+#define CS_ACK_STATE_MASK (0x7 << CS_ACK_STATE_SHIFT)
+#define CS_ACK_STATE_GET(reg_val) (((reg_val)&CS_ACK_STATE_MASK) >> CS_ACK_STATE_SHIFT)
+#define CS_ACK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_STATE_MASK) | (((value) << CS_ACK_STATE_SHIFT) & CS_ACK_STATE_MASK))
+/* CS_ACK_STATE values */
+#define CS_ACK_STATE_STOP 0x0
+#define CS_ACK_STATE_START 0x1
+/* End of CS_ACK_STATE values */
+#define CS_ACK_EXTRACT_EVENT_SHIFT 4
+#define CS_ACK_EXTRACT_EVENT_MASK (0x1 << CS_ACK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_EXTRACT_EVENT_GET(reg_val) (((reg_val)&CS_ACK_EXTRACT_EVENT_MASK) >> CS_ACK_EXTRACT_EVENT_SHIFT)
+#define CS_ACK_EXTRACT_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_EXTRACT_EVENT_MASK) | (((value) << CS_ACK_EXTRACT_EVENT_SHIFT) & CS_ACK_EXTRACT_EVENT_MASK))
+#define CS_ACK_TILER_OOM_SHIFT 26
+#define CS_ACK_TILER_OOM_MASK (0x1 << CS_ACK_TILER_OOM_SHIFT)
+#define CS_ACK_TILER_OOM_GET(reg_val) (((reg_val)&CS_ACK_TILER_OOM_MASK) >> CS_ACK_TILER_OOM_SHIFT)
+#define CS_ACK_TILER_OOM_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_TILER_OOM_MASK) | (((value) << CS_ACK_TILER_OOM_SHIFT) & CS_ACK_TILER_OOM_MASK))
+#define CS_ACK_PROTM_PEND_SHIFT 27
+#define CS_ACK_PROTM_PEND_MASK (0x1 << CS_ACK_PROTM_PEND_SHIFT)
+#define CS_ACK_PROTM_PEND_GET(reg_val) (((reg_val)&CS_ACK_PROTM_PEND_MASK) >> CS_ACK_PROTM_PEND_SHIFT)
+#define CS_ACK_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_PROTM_PEND_MASK) | (((value) << CS_ACK_PROTM_PEND_SHIFT) & CS_ACK_PROTM_PEND_MASK))
+#define CS_ACK_FATAL_SHIFT 30
+#define CS_ACK_FATAL_MASK (0x1 << CS_ACK_FATAL_SHIFT)
+#define CS_ACK_FATAL_GET(reg_val) (((reg_val)&CS_ACK_FATAL_MASK) >> CS_ACK_FATAL_SHIFT)
+#define CS_ACK_FATAL_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_FATAL_MASK) | (((value) << CS_ACK_FATAL_SHIFT) & CS_ACK_FATAL_MASK))
+#define CS_ACK_FAULT_SHIFT 31
+#define CS_ACK_FAULT_MASK (0x1 << CS_ACK_FAULT_SHIFT)
+#define CS_ACK_FAULT_GET(reg_val) (((reg_val)&CS_ACK_FAULT_MASK) >> CS_ACK_FAULT_SHIFT)
+#define CS_ACK_FAULT_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACK_FAULT_MASK) | (((value) << CS_ACK_FAULT_SHIFT) & CS_ACK_FAULT_MASK))
+
+/* CS_STATUS_CMD_PTR register */
+#define CS_STATUS_CMD_PTR_POINTER_SHIFT 0
+#define CS_STATUS_CMD_PTR_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_CMD_PTR_POINTER_SHIFT)
+#define CS_STATUS_CMD_PTR_POINTER_GET(reg_val) \
+	(((reg_val)&CS_STATUS_CMD_PTR_POINTER_MASK) >> CS_STATUS_CMD_PTR_POINTER_SHIFT)
+#define CS_STATUS_CMD_PTR_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_CMD_PTR_POINTER_MASK) |  \
+	 (((value) << CS_STATUS_CMD_PTR_POINTER_SHIFT) & CS_STATUS_CMD_PTR_POINTER_MASK))
+
+/* CS_STATUS_WAIT register */
+#define CS_STATUS_WAIT_SB_MASK_SHIFT 0
+#define CS_STATUS_WAIT_SB_MASK_MASK (0xFFFF << CS_STATUS_WAIT_SB_MASK_SHIFT)
+#define CS_STATUS_WAIT_SB_MASK_GET(reg_val) (((reg_val)&CS_STATUS_WAIT_SB_MASK_MASK) >> CS_STATUS_WAIT_SB_MASK_SHIFT)
+#define CS_STATUS_WAIT_SB_MASK_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SB_MASK_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SB_MASK_SHIFT) & CS_STATUS_WAIT_SB_MASK_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT 24
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK (0xF << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_WAIT_CONDITION_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_CONDITION_MASK))
+/* CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_LE 0x0
+#define CS_STATUS_WAIT_SYNC_WAIT_CONDITION_GT 0x1
+/* End of CS_STATUS_WAIT_SYNC_WAIT_CONDITION values */
+#define CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT 28
+#define CS_STATUS_WAIT_PROGRESS_WAIT_MASK (0x1 << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
+#define CS_STATUS_WAIT_PROGRESS_WAIT_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_PROGRESS_WAIT_MASK) >> CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT)
+#define CS_STATUS_WAIT_PROGRESS_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_PROGRESS_WAIT_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_PROGRESS_WAIT_SHIFT) & CS_STATUS_WAIT_PROGRESS_WAIT_MASK))
+#define CS_STATUS_WAIT_PROTM_PEND_SHIFT 29
+#define CS_STATUS_WAIT_PROTM_PEND_MASK (0x1 << CS_STATUS_WAIT_PROTM_PEND_SHIFT)
+#define CS_STATUS_WAIT_PROTM_PEND_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_PROTM_PEND_MASK) >> CS_STATUS_WAIT_PROTM_PEND_SHIFT)
+#define CS_STATUS_WAIT_PROTM_PEND_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_PROTM_PEND_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_PROTM_PEND_SHIFT) & CS_STATUS_WAIT_PROTM_PEND_MASK))
+#define CS_STATUS_WAIT_SYNC_WAIT_SHIFT 31
+#define CS_STATUS_WAIT_SYNC_WAIT_MASK (0x1 << CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_WAIT_MASK) >> CS_STATUS_WAIT_SYNC_WAIT_SHIFT)
+#define CS_STATUS_WAIT_SYNC_WAIT_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_WAIT_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_WAIT_SHIFT) & CS_STATUS_WAIT_SYNC_WAIT_MASK))
+
+/* CS_STATUS_REQ_RESOURCE register */
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT 0
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_COMPUTE_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT 1
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_FRAGMENT_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT 2
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_TILER_RESOURCES_MASK))
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT 3
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK (0x1 << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_GET(reg_val) \
+	(((reg_val)&CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) >> CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT)
+#define CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) |  \
+	 (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK))
+
+/* CS_STATUS_WAIT_SYNC_POINTER register */
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) >> CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT)
+#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT) & CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK))
+
+/* CS_STATUS_WAIT_SYNC_VALUE register */
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT 0
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK (0xFFFFFFFF << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_GET(reg_val) \
+	(((reg_val)&CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) >> CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT)
+#define CS_STATUS_WAIT_SYNC_VALUE_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK) |  \
+	 (((value) << CS_STATUS_WAIT_SYNC_VALUE_VALUE_SHIFT) & CS_STATUS_WAIT_SYNC_VALUE_VALUE_MASK))
+
+/* CS_STATUS_SCOREBOARDS register */
+#define CS_STATUS_SCOREBOARDS_NONZERO_SHIFT (0)
+#define CS_STATUS_SCOREBOARDS_NONZERO_MASK                                     \
+	((0xFFFF) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_GET(reg_val)                             \
+	(((reg_val)&CS_STATUS_SCOREBOARDS_NONZERO_MASK) >>                     \
+	 CS_STATUS_SCOREBOARDS_NONZERO_SHIFT)
+#define CS_STATUS_SCOREBOARDS_NONZERO_SET(reg_val, value)                      \
+	(((reg_val) & ~CS_STATUS_SCOREBOARDS_NONZERO_MASK) |                   \
+	 (((value) << CS_STATUS_SCOREBOARDS_NONZERO_SHIFT) &                   \
+	  CS_STATUS_SCOREBOARDS_NONZERO_MASK))
+
+/* CS_STATUS_BLOCKED_REASON register */
+#define CS_STATUS_BLOCKED_REASON_REASON_SHIFT (0)
+#define CS_STATUS_BLOCKED_REASON_REASON_MASK                                   \
+	((0xF) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_GET(reg_val)                           \
+	(((reg_val)&CS_STATUS_BLOCKED_REASON_REASON_MASK) >>                   \
+	 CS_STATUS_BLOCKED_REASON_REASON_SHIFT)
+#define CS_STATUS_BLOCKED_REASON_REASON_SET(reg_val, value)                    \
+	(((reg_val) & ~CS_STATUS_BLOCKED_REASON_REASON_MASK) |                 \
+	 (((value) << CS_STATUS_BLOCKED_REASON_REASON_SHIFT) &                 \
+	  CS_STATUS_BLOCKED_REASON_REASON_MASK))
+/* CS_STATUS_BLOCKED_REASON_reason values */
+#define CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED 0x0
+#define CS_STATUS_BLOCKED_REASON_REASON_WAIT 0x1
+#define CS_STATUS_BLOCKED_REASON_REASON_PROGRESS_WAIT 0x2
+#define CS_STATUS_BLOCKED_REASON_REASON_SYNC_WAIT 0x3
+#define CS_STATUS_BLOCKED_REASON_REASON_DEFERRED 0x4
+#define CS_STATUS_BLOCKED_REASON_REASON_RESOURCE 0x5
+#define CS_STATUS_BLOCKED_REASON_REASON_FLUSH 0x6
+/* End of CS_STATUS_BLOCKED_REASON_reason values */
+
+/* CS_FAULT register */
+#define CS_FAULT_EXCEPTION_TYPE_SHIFT 0
+#define CS_FAULT_EXCEPTION_TYPE_MASK (0xFF << CS_FAULT_EXCEPTION_TYPE_SHIFT)
+#define CS_FAULT_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_TYPE_MASK) >> CS_FAULT_EXCEPTION_TYPE_SHIFT)
+#define CS_FAULT_EXCEPTION_TYPE_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_EXCEPTION_TYPE_MASK) |  \
+	 (((value) << CS_FAULT_EXCEPTION_TYPE_SHIFT) & CS_FAULT_EXCEPTION_TYPE_MASK))
+/* CS_FAULT_EXCEPTION_TYPE values */
+#define CS_FAULT_EXCEPTION_TYPE_KABOOM 0x05
+#define CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED 0x0F
+#define CS_FAULT_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
+#define CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT 0x4B
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_PC 0x50
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_INVALID_ENC 0x51
+#define CS_FAULT_EXCEPTION_TYPE_INSTR_BARRIER_FAULT 0x55
+#define CS_FAULT_EXCEPTION_TYPE_DATA_INVALID_FAULT 0x58
+#define CS_FAULT_EXCEPTION_TYPE_TILE_RANGE_FAULT 0x59
+#define CS_FAULT_EXCEPTION_TYPE_ADDR_RANGE_FAULT 0x5A
+#define CS_FAULT_EXCEPTION_TYPE_IMPRECISE_FAULT 0x5B
+#define CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT 0x69
+/* End of CS_FAULT_EXCEPTION_TYPE values */
+#define CS_FAULT_EXCEPTION_DATA_SHIFT 8
+#define CS_FAULT_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FAULT_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FAULT_EXCEPTION_DATA_MASK) >> CS_FAULT_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FAULT_EXCEPTION_DATA_SHIFT) & CS_FAULT_EXCEPTION_DATA_MASK))
+
+/* CS_FATAL register */
+#define CS_FATAL_EXCEPTION_TYPE_SHIFT 0
+#define CS_FATAL_EXCEPTION_TYPE_MASK (0xFF << CS_FATAL_EXCEPTION_TYPE_SHIFT)
+#define CS_FATAL_EXCEPTION_TYPE_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_TYPE_MASK) >> CS_FATAL_EXCEPTION_TYPE_SHIFT)
+#define CS_FATAL_EXCEPTION_TYPE_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_EXCEPTION_TYPE_MASK) |  \
+	 (((value) << CS_FATAL_EXCEPTION_TYPE_SHIFT) & CS_FATAL_EXCEPTION_TYPE_MASK))
+/* CS_FATAL_EXCEPTION_TYPE values */
+#define CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT 0x40
+#define CS_FATAL_EXCEPTION_TYPE_CS_ENDPOINT_FAULT 0x44
+#define CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT 0x48
+#define CS_FATAL_EXCEPTION_TYPE_CS_INVALID_INSTRUCTION 0x49
+#define CS_FATAL_EXCEPTION_TYPE_CS_CALL_STACK_OVERFLOW 0x4A
+#define CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR 0x68
+/* End of CS_FATAL_EXCEPTION_TYPE values */
+#define CS_FATAL_EXCEPTION_DATA_SHIFT 8
+#define CS_FATAL_EXCEPTION_DATA_MASK (0xFFFFFF << CS_FATAL_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_EXCEPTION_DATA_GET(reg_val) (((reg_val)&CS_FATAL_EXCEPTION_DATA_MASK) >> CS_FATAL_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FATAL_EXCEPTION_DATA_SHIFT) & CS_FATAL_EXCEPTION_DATA_MASK))
+
+/* CS_FAULT_INFO register */
+#define CS_FAULT_INFO_EXCEPTION_DATA_SHIFT 0
+#define CS_FAULT_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_INFO_EXCEPTION_DATA_GET(reg_val) \
+	(((reg_val)&CS_FAULT_INFO_EXCEPTION_DATA_MASK) >> CS_FAULT_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FAULT_INFO_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FAULT_INFO_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FAULT_INFO_EXCEPTION_DATA_SHIFT) & CS_FAULT_INFO_EXCEPTION_DATA_MASK))
+
+/* CS_FATAL_INFO register */
+#define CS_FATAL_INFO_EXCEPTION_DATA_SHIFT 0
+#define CS_FATAL_INFO_EXCEPTION_DATA_MASK (0xFFFFFFFFFFFFFFFF << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_INFO_EXCEPTION_DATA_GET(reg_val) \
+	(((reg_val)&CS_FATAL_INFO_EXCEPTION_DATA_MASK) >> CS_FATAL_INFO_EXCEPTION_DATA_SHIFT)
+#define CS_FATAL_INFO_EXCEPTION_DATA_SET(reg_val, value) \
+	(((reg_val) & ~CS_FATAL_INFO_EXCEPTION_DATA_MASK) |  \
+	 (((value) << CS_FATAL_INFO_EXCEPTION_DATA_SHIFT) & CS_FATAL_INFO_EXCEPTION_DATA_MASK))
+
+/* CS_HEAP_VT_START register */
+#define CS_HEAP_VT_START_VALUE_SHIFT 0
+#define CS_HEAP_VT_START_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_START_VALUE_SHIFT)
+#define CS_HEAP_VT_START_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_START_VALUE_MASK) >> CS_HEAP_VT_START_VALUE_SHIFT)
+#define CS_HEAP_VT_START_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_VT_START_VALUE_MASK) |  \
+	 (((value) << CS_HEAP_VT_START_VALUE_SHIFT) & CS_HEAP_VT_START_VALUE_MASK))
+
+/* CS_HEAP_VT_END register */
+#define CS_HEAP_VT_END_VALUE_SHIFT 0
+#define CS_HEAP_VT_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_VT_END_VALUE_SHIFT)
+#define CS_HEAP_VT_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_VT_END_VALUE_MASK) >> CS_HEAP_VT_END_VALUE_SHIFT)
+#define CS_HEAP_VT_END_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_VT_END_VALUE_MASK) | (((value) << CS_HEAP_VT_END_VALUE_SHIFT) & CS_HEAP_VT_END_VALUE_MASK))
+
+/* CS_HEAP_FRAG_END register */
+#define CS_HEAP_FRAG_END_VALUE_SHIFT 0
+#define CS_HEAP_FRAG_END_VALUE_MASK (0xFFFFFFFF << CS_HEAP_FRAG_END_VALUE_SHIFT)
+#define CS_HEAP_FRAG_END_VALUE_GET(reg_val) (((reg_val)&CS_HEAP_FRAG_END_VALUE_MASK) >> CS_HEAP_FRAG_END_VALUE_SHIFT)
+#define CS_HEAP_FRAG_END_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_FRAG_END_VALUE_MASK) |  \
+	 (((value) << CS_HEAP_FRAG_END_VALUE_SHIFT) & CS_HEAP_FRAG_END_VALUE_MASK))
+
+/* CS_HEAP_ADDRESS register */
+#define CS_HEAP_ADDRESS_POINTER_SHIFT 0
+#define CS_HEAP_ADDRESS_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CS_HEAP_ADDRESS_POINTER_SHIFT)
+#define CS_HEAP_ADDRESS_POINTER_GET(reg_val) (((reg_val)&CS_HEAP_ADDRESS_POINTER_MASK) >> CS_HEAP_ADDRESS_POINTER_SHIFT)
+#define CS_HEAP_ADDRESS_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CS_HEAP_ADDRESS_POINTER_MASK) |  \
+	 (((value) << CS_HEAP_ADDRESS_POINTER_SHIFT) & CS_HEAP_ADDRESS_POINTER_MASK))
+/* End of CS_KERNEL_OUTPUT_BLOCK register set definitions */
+
+/* CS_USER_INPUT_BLOCK register set definitions */
+
+/* CS_INSERT register */
+#define CS_INSERT_VALUE_SHIFT 0
+#define CS_INSERT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_INSERT_VALUE_SHIFT)
+#define CS_INSERT_VALUE_GET(reg_val) (((reg_val)&CS_INSERT_VALUE_MASK) >> CS_INSERT_VALUE_SHIFT)
+#define CS_INSERT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_INSERT_VALUE_MASK) | (((value) << CS_INSERT_VALUE_SHIFT) & CS_INSERT_VALUE_MASK))
+
+/* CS_EXTRACT_INIT register */
+#define CS_EXTRACT_INIT_VALUE_SHIFT 0
+#define CS_EXTRACT_INIT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_INIT_VALUE_SHIFT)
+#define CS_EXTRACT_INIT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_INIT_VALUE_MASK) >> CS_EXTRACT_INIT_VALUE_SHIFT)
+#define CS_EXTRACT_INIT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_EXTRACT_INIT_VALUE_MASK) |  \
+	 (((value) << CS_EXTRACT_INIT_VALUE_SHIFT) & CS_EXTRACT_INIT_VALUE_MASK))
+/* End of CS_USER_INPUT_BLOCK register set definitions */
+
+/* CS_USER_OUTPUT_BLOCK register set definitions */
+
+/* CS_EXTRACT register */
+#define CS_EXTRACT_VALUE_SHIFT 0
+#define CS_EXTRACT_VALUE_MASK (0xFFFFFFFFFFFFFFFF << CS_EXTRACT_VALUE_SHIFT)
+#define CS_EXTRACT_VALUE_GET(reg_val) (((reg_val)&CS_EXTRACT_VALUE_MASK) >> CS_EXTRACT_VALUE_SHIFT)
+#define CS_EXTRACT_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~CS_EXTRACT_VALUE_MASK) | (((value) << CS_EXTRACT_VALUE_SHIFT) & CS_EXTRACT_VALUE_MASK))
+
+/* CS_ACTIVE register */
+#define CS_ACTIVE_HW_ACTIVE_SHIFT 0
+#define CS_ACTIVE_HW_ACTIVE_MASK (0x1 << CS_ACTIVE_HW_ACTIVE_SHIFT)
+#define CS_ACTIVE_HW_ACTIVE_GET(reg_val) (((reg_val)&CS_ACTIVE_HW_ACTIVE_MASK) >> CS_ACTIVE_HW_ACTIVE_SHIFT)
+#define CS_ACTIVE_HW_ACTIVE_SET(reg_val, value) \
+	(((reg_val) & ~CS_ACTIVE_HW_ACTIVE_MASK) | (((value) << CS_ACTIVE_HW_ACTIVE_SHIFT) & CS_ACTIVE_HW_ACTIVE_MASK))
+/* End of CS_USER_OUTPUT_BLOCK register set definitions */
+
+/* CSG_INPUT_BLOCK register set definitions */
+
+/* CSG_REQ register */
+#define CSG_REQ_STATE_SHIFT 0
+#define CSG_REQ_STATE_MASK (0x7 << CSG_REQ_STATE_SHIFT)
+#define CSG_REQ_STATE_GET(reg_val) (((reg_val)&CSG_REQ_STATE_MASK) >> CSG_REQ_STATE_SHIFT)
+#define CSG_REQ_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_STATE_MASK) | (((value) << CSG_REQ_STATE_SHIFT) & CSG_REQ_STATE_MASK))
+/* CSG_REQ_STATE values */
+#define CSG_REQ_STATE_TERMINATE 0x0
+#define CSG_REQ_STATE_START 0x1
+#define CSG_REQ_STATE_SUSPEND 0x2
+#define CSG_REQ_STATE_RESUME 0x3
+/* End of CSG_REQ_STATE values */
+#define CSG_REQ_EP_CFG_SHIFT 4
+#define CSG_REQ_EP_CFG_MASK (0x1 << CSG_REQ_EP_CFG_SHIFT)
+#define CSG_REQ_EP_CFG_GET(reg_val) (((reg_val)&CSG_REQ_EP_CFG_MASK) >> CSG_REQ_EP_CFG_SHIFT)
+#define CSG_REQ_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_EP_CFG_MASK) | (((value) << CSG_REQ_EP_CFG_SHIFT) & CSG_REQ_EP_CFG_MASK))
+#define CSG_REQ_STATUS_UPDATE_SHIFT 5
+#define CSG_REQ_STATUS_UPDATE_MASK (0x1 << CSG_REQ_STATUS_UPDATE_SHIFT)
+#define CSG_REQ_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_STATUS_UPDATE_MASK) >> CSG_REQ_STATUS_UPDATE_SHIFT)
+#define CSG_REQ_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_REQ_STATUS_UPDATE_SHIFT) & CSG_REQ_STATUS_UPDATE_MASK))
+#define CSG_REQ_SYNC_UPDATE_SHIFT 28
+#define CSG_REQ_SYNC_UPDATE_MASK (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT)
+#define CSG_REQ_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_REQ_SYNC_UPDATE_MASK) >> CSG_REQ_SYNC_UPDATE_SHIFT)
+#define CSG_REQ_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_SYNC_UPDATE_MASK) | (((value) << CSG_REQ_SYNC_UPDATE_SHIFT) & CSG_REQ_SYNC_UPDATE_MASK))
+#define CSG_REQ_IDLE_SHIFT 29
+#define CSG_REQ_IDLE_MASK (0x1 << CSG_REQ_IDLE_SHIFT)
+#define CSG_REQ_IDLE_GET(reg_val) (((reg_val)&CSG_REQ_IDLE_MASK) >> CSG_REQ_IDLE_SHIFT)
+#define CSG_REQ_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_IDLE_MASK) | (((value) << CSG_REQ_IDLE_SHIFT) & CSG_REQ_IDLE_MASK))
+#define CSG_REQ_DOORBELL_SHIFT 30
+#define CSG_REQ_DOORBELL_MASK (0x1 << CSG_REQ_DOORBELL_SHIFT)
+#define CSG_REQ_DOORBELL_GET(reg_val) (((reg_val)&CSG_REQ_DOORBELL_MASK) >> CSG_REQ_DOORBELL_SHIFT)
+#define CSG_REQ_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_DOORBELL_MASK) | (((value) << CSG_REQ_DOORBELL_SHIFT) & CSG_REQ_DOORBELL_MASK))
+#define CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_REQ_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_REQ_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_REQ_PROGRESS_TIMER_EVENT_MASK) >> CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_REQ_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_REQ_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_REQ_PROGRESS_TIMER_EVENT_SHIFT) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_ACK_IRQ_MASK register */
+#define CSG_ACK_IRQ_MASK_STATE_SHIFT 0
+#define CSG_ACK_IRQ_MASK_STATE_MASK (0x7 << CSG_ACK_IRQ_MASK_STATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_STATE_MASK) >> CSG_ACK_IRQ_MASK_STATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_STATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_STATE_SHIFT) & CSG_ACK_IRQ_MASK_STATE_MASK))
+/* CSG_ACK_IRQ_MASK_STATE values */
+#define CSG_ACK_IRQ_MASK_STATE_DISABLED 0x0
+#define CSG_ACK_IRQ_MASK_STATE_ENABLED 0x7
+/* End of CSG_ACK_IRQ_MASK_STATE values */
+#define CSG_ACK_IRQ_MASK_EP_CFG_SHIFT 4
+#define CSG_ACK_IRQ_MASK_EP_CFG_MASK (0x1 << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
+#define CSG_ACK_IRQ_MASK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_EP_CFG_MASK) >> CSG_ACK_IRQ_MASK_EP_CFG_SHIFT)
+#define CSG_ACK_IRQ_MASK_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_EP_CFG_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_EP_CFG_SHIFT) & CSG_ACK_IRQ_MASK_EP_CFG_MASK))
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT 5
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_STATUS_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_STATUS_UPDATE_MASK))
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT 28
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) >> CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_IRQ_MASK_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_SYNC_UPDATE_SHIFT) & CSG_ACK_IRQ_MASK_SYNC_UPDATE_MASK))
+#define CSG_ACK_IRQ_MASK_IDLE_SHIFT 29
+#define CSG_ACK_IRQ_MASK_IDLE_MASK (0x1 << CSG_ACK_IRQ_MASK_IDLE_SHIFT)
+#define CSG_ACK_IRQ_MASK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IRQ_MASK_IDLE_MASK) >> CSG_ACK_IRQ_MASK_IDLE_SHIFT)
+#define CSG_ACK_IRQ_MASK_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_IDLE_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_IDLE_SHIFT) & CSG_ACK_IRQ_MASK_IDLE_MASK))
+#define CSG_ACK_IRQ_MASK_DOORBELL_SHIFT 30
+#define CSG_ACK_IRQ_MASK_DOORBELL_MASK (0x1 << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
+#define CSG_ACK_IRQ_MASK_DOORBELL_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_DOORBELL_MASK) >> CSG_ACK_IRQ_MASK_DOORBELL_SHIFT)
+#define CSG_ACK_IRQ_MASK_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_DOORBELL_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_DOORBELL_SHIFT) & CSG_ACK_IRQ_MASK_DOORBELL_MASK))
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_IRQ_MASK_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_EP_REQ register */
+#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0
+#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8
+#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_EP_REQ_TILER_EP_SHIFT 16
+#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |  \
+	 (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
+	 (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+#define CSG_EP_REQ_PRIORITY_SHIFT 28
+#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
+	(((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+
+/* CSG_SUSPEND_BUF register */
+#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
+#define CSG_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_SUSPEND_BUF_POINTER_GET(reg_val) (((reg_val)&CSG_SUSPEND_BUF_POINTER_MASK) >> CSG_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_SUSPEND_BUF_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CSG_SUSPEND_BUF_POINTER_MASK) |  \
+	 (((value) << CSG_SUSPEND_BUF_POINTER_SHIFT) & CSG_SUSPEND_BUF_POINTER_MASK))
+
+/* CSG_PROTM_SUSPEND_BUF register */
+#define CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT 0
+#define CSG_PROTM_SUSPEND_BUF_POINTER_MASK (0xFFFFFFFFFFFFFFFF << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_PROTM_SUSPEND_BUF_POINTER_GET(reg_val) \
+	(((reg_val)&CSG_PROTM_SUSPEND_BUF_POINTER_MASK) >> CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT)
+#define CSG_PROTM_SUSPEND_BUF_POINTER_SET(reg_val, value) \
+	(((reg_val) & ~CSG_PROTM_SUSPEND_BUF_POINTER_MASK) |  \
+	 (((value) << CSG_PROTM_SUSPEND_BUF_POINTER_SHIFT) & CSG_PROTM_SUSPEND_BUF_POINTER_MASK))
+
+
+/* End of CSG_INPUT_BLOCK register set definitions */
+
+/* CSG_OUTPUT_BLOCK register set definitions */
+
+/* CSG_ACK register */
+#define CSG_ACK_STATE_SHIFT 0
+#define CSG_ACK_STATE_MASK (0x7 << CSG_ACK_STATE_SHIFT)
+#define CSG_ACK_STATE_GET(reg_val) (((reg_val)&CSG_ACK_STATE_MASK) >> CSG_ACK_STATE_SHIFT)
+#define CSG_ACK_STATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_STATE_MASK) | (((value) << CSG_ACK_STATE_SHIFT) & CSG_ACK_STATE_MASK))
+/* CSG_ACK_STATE values */
+#define CSG_ACK_STATE_TERMINATE 0x0
+#define CSG_ACK_STATE_START 0x1
+#define CSG_ACK_STATE_SUSPEND 0x2
+#define CSG_ACK_STATE_RESUME 0x3
+/* End of CSG_ACK_STATE values */
+#define CSG_ACK_EP_CFG_SHIFT 4
+#define CSG_ACK_EP_CFG_MASK (0x1 << CSG_ACK_EP_CFG_SHIFT)
+#define CSG_ACK_EP_CFG_GET(reg_val) (((reg_val)&CSG_ACK_EP_CFG_MASK) >> CSG_ACK_EP_CFG_SHIFT)
+#define CSG_ACK_EP_CFG_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_EP_CFG_MASK) | (((value) << CSG_ACK_EP_CFG_SHIFT) & CSG_ACK_EP_CFG_MASK))
+#define CSG_ACK_STATUS_UPDATE_SHIFT 5
+#define CSG_ACK_STATUS_UPDATE_MASK (0x1 << CSG_ACK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_STATUS_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_STATUS_UPDATE_MASK) >> CSG_ACK_STATUS_UPDATE_SHIFT)
+#define CSG_ACK_STATUS_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_STATUS_UPDATE_MASK) |  \
+	 (((value) << CSG_ACK_STATUS_UPDATE_SHIFT) & CSG_ACK_STATUS_UPDATE_MASK))
+#define CSG_ACK_SYNC_UPDATE_SHIFT 28
+#define CSG_ACK_SYNC_UPDATE_MASK (0x1 << CSG_ACK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_SYNC_UPDATE_GET(reg_val) (((reg_val)&CSG_ACK_SYNC_UPDATE_MASK) >> CSG_ACK_SYNC_UPDATE_SHIFT)
+#define CSG_ACK_SYNC_UPDATE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_SYNC_UPDATE_MASK) | (((value) << CSG_ACK_SYNC_UPDATE_SHIFT) & CSG_ACK_SYNC_UPDATE_MASK))
+#define CSG_ACK_IDLE_SHIFT 29
+#define CSG_ACK_IDLE_MASK (0x1 << CSG_ACK_IDLE_SHIFT)
+#define CSG_ACK_IDLE_GET(reg_val) (((reg_val)&CSG_ACK_IDLE_MASK) >> CSG_ACK_IDLE_SHIFT)
+#define CSG_ACK_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_IDLE_MASK) | (((value) << CSG_ACK_IDLE_SHIFT) & CSG_ACK_IDLE_MASK))
+#define CSG_ACK_DOORBELL_SHIFT 30
+#define CSG_ACK_DOORBELL_MASK (0x1 << CSG_ACK_DOORBELL_SHIFT)
+#define CSG_ACK_DOORBELL_GET(reg_val) (((reg_val)&CSG_ACK_DOORBELL_MASK) >> CSG_ACK_DOORBELL_SHIFT)
+#define CSG_ACK_DOORBELL_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_DOORBELL_MASK) | (((value) << CSG_ACK_DOORBELL_SHIFT) & CSG_ACK_DOORBELL_MASK))
+#define CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT 31
+#define CSG_ACK_PROGRESS_TIMER_EVENT_MASK (0x1 << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_PROGRESS_TIMER_EVENT_GET(reg_val) \
+	(((reg_val)&CSG_ACK_PROGRESS_TIMER_EVENT_MASK) >> CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT)
+#define CSG_ACK_PROGRESS_TIMER_EVENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_ACK_PROGRESS_TIMER_EVENT_MASK) |  \
+	 (((value) << CSG_ACK_PROGRESS_TIMER_EVENT_SHIFT) & CSG_ACK_PROGRESS_TIMER_EVENT_MASK))
+
+/* CSG_STATUS_EP_CURRENT register */
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT 0
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) >> CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_CURRENT_COMPUTE_EP_MASK))
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT 8
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_CURRENT_FRAGMENT_EP_MASK))
+#define CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT 16
+#define CSG_STATUS_EP_CURRENT_TILER_EP_MASK (0xF << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_TILER_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_CURRENT_TILER_EP_MASK) >> CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_CURRENT_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK))
+
+/* CSG_STATUS_EP_REQ register */
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) >> CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_COMPUTE_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) & CSG_STATUS_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT 8
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) >> CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_STATUS_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_STATUS_EP_REQ_TILER_EP_SHIFT 16
+#define CSG_STATUS_EP_REQ_TILER_EP_MASK (0xF << CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_TILER_EP_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_TILER_EP_MASK) >> CSG_STATUS_EP_REQ_TILER_EP_SHIFT)
+#define CSG_STATUS_EP_REQ_TILER_EP_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_TILER_EP_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_TILER_EP_SHIFT) & CSG_STATUS_EP_REQ_TILER_EP_MASK))
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
+	 (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+
+/* End of CSG_OUTPUT_BLOCK register set definitions */
+
+/* STREAM_CONTROL_BLOCK register set definitions */
+
+/* STREAM_FEATURES register */
+#define STREAM_FEATURES_WORK_REGISTERS_SHIFT 0
+#define STREAM_FEATURES_WORK_REGISTERS_MASK (0xFF << STREAM_FEATURES_WORK_REGISTERS_SHIFT)
+#define STREAM_FEATURES_WORK_REGISTERS_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_WORK_REGISTERS_MASK) >> STREAM_FEATURES_WORK_REGISTERS_SHIFT)
+#define STREAM_FEATURES_WORK_REGISTERS_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_WORK_REGISTERS_MASK) |  \
+	 (((value) << STREAM_FEATURES_WORK_REGISTERS_SHIFT) & STREAM_FEATURES_WORK_REGISTERS_MASK))
+#define STREAM_FEATURES_SCOREBOARDS_SHIFT 8
+#define STREAM_FEATURES_SCOREBOARDS_MASK (0xFF << STREAM_FEATURES_SCOREBOARDS_SHIFT)
+#define STREAM_FEATURES_SCOREBOARDS_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_SCOREBOARDS_MASK) >> STREAM_FEATURES_SCOREBOARDS_SHIFT)
+#define STREAM_FEATURES_SCOREBOARDS_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_SCOREBOARDS_MASK) |  \
+	 (((value) << STREAM_FEATURES_SCOREBOARDS_SHIFT) & STREAM_FEATURES_SCOREBOARDS_MASK))
+#define STREAM_FEATURES_COMPUTE_SHIFT 16
+#define STREAM_FEATURES_COMPUTE_MASK (0x1 << STREAM_FEATURES_COMPUTE_SHIFT)
+#define STREAM_FEATURES_COMPUTE_GET(reg_val) (((reg_val)&STREAM_FEATURES_COMPUTE_MASK) >> STREAM_FEATURES_COMPUTE_SHIFT)
+#define STREAM_FEATURES_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_COMPUTE_MASK) |  \
+	 (((value) << STREAM_FEATURES_COMPUTE_SHIFT) & STREAM_FEATURES_COMPUTE_MASK))
+#define STREAM_FEATURES_FRAGMENT_SHIFT 17
+#define STREAM_FEATURES_FRAGMENT_MASK (0x1 << STREAM_FEATURES_FRAGMENT_SHIFT)
+#define STREAM_FEATURES_FRAGMENT_GET(reg_val) \
+	(((reg_val)&STREAM_FEATURES_FRAGMENT_MASK) >> STREAM_FEATURES_FRAGMENT_SHIFT)
+#define STREAM_FEATURES_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_FRAGMENT_MASK) |  \
+	 (((value) << STREAM_FEATURES_FRAGMENT_SHIFT) & STREAM_FEATURES_FRAGMENT_MASK))
+#define STREAM_FEATURES_TILER_SHIFT 18
+#define STREAM_FEATURES_TILER_MASK (0x1 << STREAM_FEATURES_TILER_SHIFT)
+#define STREAM_FEATURES_TILER_GET(reg_val) (((reg_val)&STREAM_FEATURES_TILER_MASK) >> STREAM_FEATURES_TILER_SHIFT)
+#define STREAM_FEATURES_TILER_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_FEATURES_TILER_MASK) |  \
+	 (((value) << STREAM_FEATURES_TILER_SHIFT) & STREAM_FEATURES_TILER_MASK))
+
+/* STREAM_INPUT_VA register */
+#define STREAM_INPUT_VA_VALUE_SHIFT 0
+#define STREAM_INPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_INPUT_VA_VALUE_SHIFT)
+#define STREAM_INPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_INPUT_VA_VALUE_MASK) >> STREAM_INPUT_VA_VALUE_SHIFT)
+#define STREAM_INPUT_VA_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_INPUT_VA_VALUE_MASK) |  \
+	 (((value) << STREAM_INPUT_VA_VALUE_SHIFT) & STREAM_INPUT_VA_VALUE_MASK))
+
+/* STREAM_OUTPUT_VA register */
+#define STREAM_OUTPUT_VA_VALUE_SHIFT 0
+#define STREAM_OUTPUT_VA_VALUE_MASK (0xFFFFFFFF << STREAM_OUTPUT_VA_VALUE_SHIFT)
+#define STREAM_OUTPUT_VA_VALUE_GET(reg_val) (((reg_val)&STREAM_OUTPUT_VA_VALUE_MASK) >> STREAM_OUTPUT_VA_VALUE_SHIFT)
+#define STREAM_OUTPUT_VA_VALUE_SET(reg_val, value) \
+	(((reg_val) & ~STREAM_OUTPUT_VA_VALUE_MASK) |  \
+	 (((value) << STREAM_OUTPUT_VA_VALUE_SHIFT) & STREAM_OUTPUT_VA_VALUE_MASK))
+/* End of STREAM_CONTROL_BLOCK register set definitions */
+
+/* GLB_INPUT_BLOCK register set definitions */
+
+/* GLB_REQ register */
+#define GLB_REQ_HALT_SHIFT 0
+#define GLB_REQ_HALT_MASK (0x1 << GLB_REQ_HALT_SHIFT)
+#define GLB_REQ_HALT_GET(reg_val) (((reg_val)&GLB_REQ_HALT_MASK) >> GLB_REQ_HALT_SHIFT)
+#define GLB_REQ_HALT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_HALT_MASK) | (((value) << GLB_REQ_HALT_SHIFT) & GLB_REQ_HALT_MASK))
+#define GLB_REQ_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_REQ_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_REQ_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_REQ_CFG_PROGRESS_TIMER_MASK) >> GLB_REQ_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_REQ_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_REQ_CFG_PROGRESS_TIMER_SHIFT) & GLB_REQ_CFG_PROGRESS_TIMER_MASK))
+#define GLB_REQ_CFG_ALLOC_EN_SHIFT 2
+#define GLB_REQ_CFG_ALLOC_EN_MASK (0x1 << GLB_REQ_CFG_ALLOC_EN_SHIFT)
+#define GLB_REQ_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_REQ_CFG_ALLOC_EN_MASK) >> GLB_REQ_CFG_ALLOC_EN_SHIFT)
+#define GLB_REQ_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_ALLOC_EN_MASK) | (((value) << GLB_REQ_CFG_ALLOC_EN_SHIFT) & GLB_REQ_CFG_ALLOC_EN_MASK))
+#define GLB_REQ_CFG_PWROFF_TIMER_SHIFT 3
+#define GLB_REQ_CFG_PWROFF_TIMER_MASK (0x1 << GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_REQ_CFG_PWROFF_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_REQ_CFG_PWROFF_TIMER_MASK) >> GLB_REQ_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_REQ_CFG_PWROFF_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_CFG_PWROFF_TIMER_MASK) |  \
+	 (((value) << GLB_REQ_CFG_PWROFF_TIMER_SHIFT) & GLB_REQ_CFG_PWROFF_TIMER_MASK))
+#define GLB_REQ_PROTM_ENTER_SHIFT 4
+#define GLB_REQ_PROTM_ENTER_MASK (0x1 << GLB_REQ_PROTM_ENTER_SHIFT)
+#define GLB_REQ_PROTM_ENTER_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_ENTER_MASK) >> GLB_REQ_PROTM_ENTER_SHIFT)
+#define GLB_REQ_PROTM_ENTER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PROTM_ENTER_MASK) | (((value) << GLB_REQ_PROTM_ENTER_SHIFT) & GLB_REQ_PROTM_ENTER_MASK))
+#define GLB_REQ_PRFCNT_ENABLE_SHIFT 5
+#define GLB_REQ_PRFCNT_ENABLE_MASK (0x1 << GLB_REQ_PRFCNT_ENABLE_SHIFT)
+#define GLB_REQ_PRFCNT_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_ENABLE_MASK) >> GLB_REQ_PRFCNT_ENABLE_SHIFT)
+#define GLB_REQ_PRFCNT_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_ENABLE_MASK) |  \
+	 (((value) << GLB_REQ_PRFCNT_ENABLE_SHIFT) & GLB_REQ_PRFCNT_ENABLE_MASK))
+#define GLB_REQ_PRFCNT_SAMPLE_SHIFT 6
+#define GLB_REQ_PRFCNT_SAMPLE_MASK (0x1 << GLB_REQ_PRFCNT_SAMPLE_SHIFT)
+#define GLB_REQ_PRFCNT_SAMPLE_GET(reg_val) (((reg_val)&GLB_REQ_PRFCNT_SAMPLE_MASK) >> GLB_REQ_PRFCNT_SAMPLE_SHIFT)
+#define GLB_REQ_PRFCNT_SAMPLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_SAMPLE_MASK) |  \
+	 (((value) << GLB_REQ_PRFCNT_SAMPLE_SHIFT) & GLB_REQ_PRFCNT_SAMPLE_MASK))
+#define GLB_REQ_COUNTER_ENABLE_SHIFT 7
+#define GLB_REQ_COUNTER_ENABLE_MASK (0x1 << GLB_REQ_COUNTER_ENABLE_SHIFT)
+#define GLB_REQ_COUNTER_ENABLE_GET(reg_val) (((reg_val)&GLB_REQ_COUNTER_ENABLE_MASK) >> GLB_REQ_COUNTER_ENABLE_SHIFT)
+#define GLB_REQ_COUNTER_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_COUNTER_ENABLE_MASK) |  \
+	 (((value) << GLB_REQ_COUNTER_ENABLE_SHIFT) & GLB_REQ_COUNTER_ENABLE_MASK))
+#define GLB_REQ_PING_SHIFT 8
+#define GLB_REQ_PING_MASK (0x1 << GLB_REQ_PING_SHIFT)
+#define GLB_REQ_PING_GET(reg_val) (((reg_val)&GLB_REQ_PING_MASK) >> GLB_REQ_PING_SHIFT)
+#define GLB_REQ_PING_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PING_MASK) | (((value) << GLB_REQ_PING_SHIFT) & GLB_REQ_PING_MASK))
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK                                    \
+	(0x1 << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                            \
+	(((reg_val)&GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) >>                    \
+	 GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_REQ_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)                     \
+	(((reg_val) & ~GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK) |                  \
+	 (((value) << GLB_REQ_FIRMWARE_CONFIG_UPDATE_SHIFT) &                  \
+	  GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_REQ_SLEEP_SHIFT 12
+#define GLB_REQ_SLEEP_MASK (0x1 << GLB_REQ_SLEEP_SHIFT)
+#define GLB_REQ_SLEEP_GET(reg_val) \
+	(((reg_val) & GLB_REQ_SLEEP_MASK) >> GLB_REQ_SLEEP_SHIFT)
+#define GLB_REQ_SLEEP_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_SLEEP_MASK) | \
+	 (((value) << GLB_REQ_SLEEP_SHIFT) & GLB_REQ_SLEEP_MASK))
+#define GLB_REQ_INACTIVE_COMPUTE_SHIFT 20
+#define GLB_REQ_INACTIVE_COMPUTE_MASK (0x1 << GLB_REQ_INACTIVE_COMPUTE_SHIFT)
+#define GLB_REQ_INACTIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&GLB_REQ_INACTIVE_COMPUTE_MASK) >> GLB_REQ_INACTIVE_COMPUTE_SHIFT)
+#define GLB_REQ_INACTIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_COMPUTE_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_COMPUTE_SHIFT) & GLB_REQ_INACTIVE_COMPUTE_MASK))
+#define GLB_REQ_INACTIVE_FRAGMENT_SHIFT 21
+#define GLB_REQ_INACTIVE_FRAGMENT_MASK (0x1 << GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_REQ_INACTIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&GLB_REQ_INACTIVE_FRAGMENT_MASK) >> GLB_REQ_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_REQ_INACTIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_FRAGMENT_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_FRAGMENT_SHIFT) & GLB_REQ_INACTIVE_FRAGMENT_MASK))
+#define GLB_REQ_INACTIVE_TILER_SHIFT 22
+#define GLB_REQ_INACTIVE_TILER_MASK (0x1 << GLB_REQ_INACTIVE_TILER_SHIFT)
+#define GLB_REQ_INACTIVE_TILER_GET(reg_val) (((reg_val)&GLB_REQ_INACTIVE_TILER_MASK) >> GLB_REQ_INACTIVE_TILER_SHIFT)
+#define GLB_REQ_INACTIVE_TILER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_INACTIVE_TILER_MASK) |  \
+	 (((value) << GLB_REQ_INACTIVE_TILER_SHIFT) & GLB_REQ_INACTIVE_TILER_MASK))
+#define GLB_REQ_PROTM_EXIT_SHIFT 23
+#define GLB_REQ_PROTM_EXIT_MASK (0x1 << GLB_REQ_PROTM_EXIT_SHIFT)
+#define GLB_REQ_PROTM_EXIT_GET(reg_val) (((reg_val)&GLB_REQ_PROTM_EXIT_MASK) >> GLB_REQ_PROTM_EXIT_SHIFT)
+#define GLB_REQ_PROTM_EXIT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PROTM_EXIT_MASK) | (((value) << GLB_REQ_PROTM_EXIT_SHIFT) & GLB_REQ_PROTM_EXIT_MASK))
+#define GLB_REQ_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_REQ_PRFCNT_THRESHOLD_MASK (0x1 << GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_REQ_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_REQ_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_REQ_PRFCNT_THRESHOLD_MASK))
+#define GLB_REQ_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_REQ_PRFCNT_OVERFLOW_MASK (0x1 << GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_REQ_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_REQ_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_REQ_PRFCNT_OVERFLOW_MASK))
+#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
+#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
+#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_CSF_REQ_MASK) >> GLB_REQ_DEBUG_CSF_REQ_SHIFT)
+#define GLB_REQ_DEBUG_CSF_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_DEBUG_CSF_REQ_MASK) |  \
+	 (((value) << GLB_REQ_DEBUG_CSF_REQ_SHIFT) & GLB_REQ_DEBUG_CSF_REQ_MASK))
+#define GLB_REQ_DEBUG_HOST_REQ_SHIFT 31
+#define GLB_REQ_DEBUG_HOST_REQ_MASK (0x1 << GLB_REQ_DEBUG_HOST_REQ_SHIFT)
+#define GLB_REQ_DEBUG_HOST_REQ_GET(reg_val) (((reg_val)&GLB_REQ_DEBUG_HOST_REQ_MASK) >> GLB_REQ_DEBUG_HOST_REQ_SHIFT)
+#define GLB_REQ_DEBUG_HOST_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_REQ_DEBUG_HOST_REQ_MASK) |  \
+	 (((value) << GLB_REQ_DEBUG_HOST_REQ_SHIFT) & GLB_REQ_DEBUG_HOST_REQ_MASK))
+
+/* GLB_ACK_IRQ_MASK register */
+#define GLB_ACK_IRQ_MASK_HALT_SHIFT 0
+#define GLB_ACK_IRQ_MASK_HALT_MASK (0x1 << GLB_ACK_IRQ_MASK_HALT_SHIFT)
+#define GLB_ACK_IRQ_MASK_HALT_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_HALT_MASK) >> GLB_ACK_IRQ_MASK_HALT_SHIFT)
+#define GLB_ACK_IRQ_MASK_HALT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_HALT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_HALT_SHIFT) & GLB_ACK_IRQ_MASK_HALT_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT 2
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) >> GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK))
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT 3
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK (0x1 << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) >> GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT)
+#define GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_SHIFT) & GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK))
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT 4
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) >> GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_ENTER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PROTM_ENTER_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT 5
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT 6
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK (0x1 << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) >> GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_SHIFT) & GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK))
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT 7
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK (0x1 << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) >> GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT)
+#define GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_COUNTER_ENABLE_SHIFT) & GLB_ACK_IRQ_MASK_COUNTER_ENABLE_MASK))
+#define GLB_ACK_IRQ_MASK_PING_SHIFT 8
+#define GLB_ACK_IRQ_MASK_PING_MASK (0x1 << GLB_ACK_IRQ_MASK_PING_SHIFT)
+#define GLB_ACK_IRQ_MASK_PING_GET(reg_val) (((reg_val)&GLB_ACK_IRQ_MASK_PING_MASK) >> GLB_ACK_IRQ_MASK_PING_SHIFT)
+#define GLB_ACK_IRQ_MASK_PING_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PING_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PING_SHIFT) & GLB_ACK_IRQ_MASK_PING_MASK))
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT 9
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK                           \
+	(0x1 << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_GET(reg_val)                   \
+	(((reg_val)&GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) >>           \
+	 GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT)
+#define GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SET(reg_val, value)            \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK) |         \
+	 (((value) << GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_SHIFT) &         \
+	  GLB_ACK_IRQ_MASK_FIRMWARE_CONFIG_UPDATE_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT 20
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_COMPUTE_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT 21
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_FRAGMENT_MASK))
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT 22
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK (0x1 << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) >> GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT)
+#define GLB_ACK_IRQ_MASK_INACTIVE_TILER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_INACTIVE_TILER_SHIFT) & GLB_ACK_IRQ_MASK_INACTIVE_TILER_MASK))
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT 23
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK (0x1 << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) >> GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT)
+#define GLB_ACK_IRQ_MASK_PROTM_EXIT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_PROTM_EXIT_SHIFT) & GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT 24
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK))
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT 25
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK \
+	(0x1 << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) >> \
+	 GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT)
+#define GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK) | \
+	 (((value) << GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_SHIFT) & \
+	  GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK))
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT 30
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_CSF_REQ_MASK))
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT 31
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK (0x1 << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_GET(reg_val) \
+	(((reg_val)&GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) >> GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT)
+#define GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK) |  \
+	 (((value) << GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_SHIFT) & GLB_ACK_IRQ_MASK_DEBUG_HOST_REQ_MASK))
+
+/* GLB_PROGRESS_TIMER register */
+#define GLB_PROGRESS_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PROGRESS_TIMER_TIMEOUT_MASK (0xFFFFFFFF << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
+#define GLB_PROGRESS_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PROGRESS_TIMER_TIMEOUT_MASK) >> GLB_PROGRESS_TIMER_TIMEOUT_SHIFT)
+#define GLB_PROGRESS_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK))
+
+/* GLB_PWROFF_TIMER register */
+#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK))
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK))
+/* GLB_PWROFF_TIMER_TIMER_SOURCE values */
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
+
+/* GLB_ALLOC_EN register */
+#define GLB_ALLOC_EN_MASK_SHIFT 0
+#define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
+#define GLB_ALLOC_EN_MASK_GET(reg_val) (((reg_val)&GLB_ALLOC_EN_MASK_MASK) >> GLB_ALLOC_EN_MASK_SHIFT)
+#define GLB_ALLOC_EN_MASK_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ALLOC_EN_MASK_MASK) | (((value) << GLB_ALLOC_EN_MASK_SHIFT) & GLB_ALLOC_EN_MASK_MASK))
+
+/* GLB_OUTPUT_BLOCK register set definitions */
+
+/* GLB_ACK register */
+#define GLB_ACK_CFG_PROGRESS_TIMER_SHIFT 1
+#define GLB_ACK_CFG_PROGRESS_TIMER_MASK (0x1 << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_CFG_PROGRESS_TIMER_GET(reg_val) \
+	(((reg_val)&GLB_ACK_CFG_PROGRESS_TIMER_MASK) >> GLB_ACK_CFG_PROGRESS_TIMER_SHIFT)
+#define GLB_ACK_CFG_PROGRESS_TIMER_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_CFG_PROGRESS_TIMER_MASK) |  \
+	 (((value) << GLB_ACK_CFG_PROGRESS_TIMER_SHIFT) & GLB_ACK_CFG_PROGRESS_TIMER_MASK))
+#define GLB_ACK_CFG_ALLOC_EN_SHIFT 2
+#define GLB_ACK_CFG_ALLOC_EN_MASK (0x1 << GLB_ACK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_CFG_ALLOC_EN_GET(reg_val) (((reg_val)&GLB_ACK_CFG_ALLOC_EN_MASK) >> GLB_ACK_CFG_ALLOC_EN_SHIFT)
+#define GLB_ACK_CFG_ALLOC_EN_SET(reg_val, value) \
+	(((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK))
+/* End of GLB_OUTPUT_BLOCK register set definitions */
+
+/* The following register and fields are for headers before 10.x.7/11.x.4 */
+#define GLB_REQ_IDLE_ENABLE_SHIFT (10)
+#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_EVENT_SHIFT (26)
+#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_SHIFT (10)
+#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
+
+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT GPU_U(10)
+#define GLB_ACK_IRQ_MASK_IDLE_ENABLE_MASK (GPU_U(0x1) << GLB_ACK_IRQ_MASK_IDLE_ENABLE_SHIFT)
+
+#define GLB_IDLE_TIMER (0x0080)
+/* GLB_IDLE_TIMER register */
+#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
+#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK))
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK))
+/* GLB_IDLE_TIMER_TIMER_SOURCE values */
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
+
+/* GLB_INSTR_FEATURES register */
+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0)
+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT)
+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(reg_val) \
+	(((reg_val)&GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) >> GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT)
+#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK) |  \
+	 (((value) << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) & GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK))
+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT (4)
+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK ((u32)0xF << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT)
+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(reg_val) \
+	(((reg_val)&GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) >> GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT)
+#define GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SET(reg_val, value) \
+	(((reg_val) & ~GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK) |  \
+	 (((value) << GLB_INSTR_FEATURES_EVENT_SIZE_MAX_SHIFT) & GLB_INSTR_FEATURES_EVENT_SIZE_MAX_MASK))
+
+#define CSG_STATUS_STATE (0x0018) /* CSG state status register */
+/* CSG_STATUS_STATE register */
+#define CSG_STATUS_STATE_IDLE_SHIFT (0)
+#define CSG_STATUS_STATE_IDLE_MASK ((0x1) << CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_GET(reg_val) \
+	(((reg_val)&CSG_STATUS_STATE_IDLE_MASK) >> CSG_STATUS_STATE_IDLE_SHIFT)
+#define CSG_STATUS_STATE_IDLE_SET(reg_val, value) \
+	(((reg_val) & ~CSG_STATUS_STATE_IDLE_MASK) |  \
+	(((value) << CSG_STATUS_STATE_IDLE_SHIFT) & CSG_STATUS_STATE_IDLE_MASK))
+
+/* GLB_FEATURES_ITER_TRACE_SUPPORTED register */
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT GPU_U(4)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK                                 \
+	(GPU_U(0x1) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_GET(reg_val)                         \
+	(((reg_val)&GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) >>                 \
+	 GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT)
+#define GLB_FEATURES_ITER_TRACE_SUPPORTED_SET(reg_val, value)                  \
+	(((reg_val) & ~GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK) |               \
+	 (((value) << GLB_FEATURES_ITER_TRACE_SUPPORTED_SHIFT) &               \
+	  GLB_FEATURES_ITER_TRACE_SUPPORTED_MASK))
+
+/* GLB_REQ_ITER_TRACE_ENABLE register */
+#define GLB_REQ_ITER_TRACE_ENABLE_SHIFT GPU_U(11)
+#define GLB_REQ_ITER_TRACE_ENABLE_MASK                                         \
+	(GPU_U(0x1) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT)
+#define GLB_REQ_ITER_TRACE_ENABLE_GET(reg_val)                                 \
+	(((reg_val)&GLB_REQ_ITER_TRACE_ENABLE_MASK) >>                         \
+	 GLB_REQ_ITER_TRACE_ENABLE_SHIFT)
+#define GLB_REQ_ITER_TRACE_ENABLE_SET(reg_val, value)                          \
+	(((reg_val) & ~GLB_REQ_ITER_TRACE_ENABLE_MASK) |                       \
+	 (((value) << GLB_REQ_ITER_TRACE_ENABLE_SHIFT) &                       \
+	  GLB_REQ_ITER_TRACE_ENABLE_MASK))
+
+#endif /* _KBASE_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index d5d8318..10de93f 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -29,14 +29,14 @@
 #include <csf/mali_kbase_csf_trace_buffer.h>
 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 #include <mali_kbase_reset_gpu.h>
+#include <linux/string.h>
 
-/* Waiting timeout for GPU reset to complete */
-#define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
-#define DUMP_DWORDS_PER_LINE (4)
-/* 16 characters needed for a 8 byte value in hex & 1 character for space */
-#define DUMP_HEX_CHARS_PER_DWORD ((2 * 8) + 1)
-#define DUMP_HEX_CHARS_PER_LINE  \
-	(DUMP_DWORDS_PER_LINE * DUMP_HEX_CHARS_PER_DWORD)
+enum kbasep_soft_reset_status {
+	RESET_SUCCESS = 0,
+	SOFT_RESET_FAILED,
+	L2_ON_FAILED,
+	MCU_REINIT_FAILED
+};
 
 static inline bool
 kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
@@ -259,8 +259,8 @@ static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 
 static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
 {
-	u8 *buf, *line_str;
-	unsigned int read_size;
+	u8 *buf, *p, *pnewline, *pend, *pendbuf;
+	unsigned int read_size, remaining_size;
 	struct firmware_trace_buffer *tb =
 		kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);
 
@@ -269,41 +269,53 @@ static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
 		return;
 	}
 
-	buf = kmalloc(PAGE_SIZE + DUMP_HEX_CHARS_PER_LINE + 1, GFP_KERNEL);
+	buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
 	if (buf == NULL) {
 		dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
 		return;
 	}
-	line_str = &buf[PAGE_SIZE];
+
+	buf[PAGE_SIZE] = 0;
+
+	p = buf;
+	pendbuf = &buf[PAGE_SIZE];
 
 	dev_err(kbdev->dev, "Firmware trace buffer dump:");
-	while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, buf,
-								PAGE_SIZE))) {
-		u64 *ptr = (u64 *)buf;
-		u32 num_dwords;
-
-		for (num_dwords = read_size / sizeof(u64);
-		     num_dwords >= DUMP_DWORDS_PER_LINE;
-		     num_dwords -= DUMP_DWORDS_PER_LINE) {
-			dev_err(kbdev->dev, "%016llx %016llx %016llx %016llx",
-				ptr[0], ptr[1], ptr[2], ptr[3]);
-			ptr += DUMP_DWORDS_PER_LINE;
-		}
+	while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
+								pendbuf - p))) {
+		pend = p + read_size;
+		p = buf;
 
-		if (num_dwords) {
-			int pos = 0;
+		while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
+			/* Null-terminate the string */
+			*pnewline = 0;
 
-			while (num_dwords--) {
-				pos += snprintf(line_str + pos,
-						DUMP_HEX_CHARS_PER_DWORD + 1,
-						"%016llx ", ptr[0]);
-				ptr++;
-			}
+			dev_err(kbdev->dev, "FW> %s", p);
+
+			p = pnewline + 1;
+		}
 
-			dev_err(kbdev->dev, "%s", line_str);
+		remaining_size = pend - p;
+
+		if (!remaining_size) {
+			p = buf;
+		} else if (remaining_size < PAGE_SIZE) {
+			/* Copy unfinished string to the start of the buffer */
+			memmove(buf, p, remaining_size);
+			p = &buf[remaining_size];
+		} else {
+			/* Print abnormal page-long string without newlines */
+			dev_err(kbdev->dev, "FW> %s", buf);
+			p = buf;
 		}
 	}
 
+	if (p != buf) {
+		/* Null-terminate and print last unfinished string */
+		*p = 0;
+		dev_err(kbdev->dev, "FW> %s", buf);
+	}
+
 	kfree(buf);
 }
 
@@ -332,36 +344,12 @@ static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 }
 
-static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
-				   bool firmware_inited, bool silent)
+static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev,
+							      bool firmware_inited, bool silent)
 {
 	unsigned long flags;
 	int err;
-
-	WARN_ON(kbdev->irq_reset_flush);
-	/* The reset must now be happening otherwise other threads will not
-	 * have been synchronized with to stop their access to the HW
-	 */
-#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
-	lockdep_assert_held_write(&kbdev->csf.reset.sem);
-#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
-	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
-#else
-	lockdep_assert_held(&kbdev->csf.reset.sem);
-#endif
-	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
-
-	/* Reset the scheduler state before disabling the interrupts as suspend
-	 * of active CSG slots would also be done as a part of reset.
-	 */
-	if (likely(firmware_inited))
-		kbase_csf_scheduler_reset(kbdev);
-	cancel_work_sync(&kbdev->csf.firmware_reload_work);
-
-	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
-	/* This call will block until counters are disabled.
-	 */
-	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+	enum kbasep_soft_reset_status ret = RESET_SUCCESS;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	spin_lock(&kbdev->mmu_mask_change);
@@ -380,8 +368,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
-	/* Must be done without any locks IRQ handlers will take.
-	 */
+	/* Must be done without any locks IRQ handlers will take. */
 	kbase_synchronize_irqs(kbdev);
 
 	dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
@@ -421,10 +408,8 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 
 	mutex_unlock(&kbdev->pm.lock);
 
-	if (WARN_ON(err)) {
-		kbase_csf_hwcnt_on_reset_error(kbdev);
-		return err;
-	}
+	if (WARN_ON(err))
+		return SOFT_RESET_FAILED;
 
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -441,20 +426,78 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	err = kbase_pm_wait_for_desired_state(kbdev);
 	mutex_unlock(&kbdev->pm.lock);
 
-	if (WARN_ON(err)) {
-		kbase_csf_hwcnt_on_reset_error(kbdev);
-		return err;
+	if (err) {
+		if (!kbase_pm_l2_is_in_desired_state(kbdev))
+			ret = L2_ON_FAILED;
+		else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
+			ret = MCU_REINIT_FAILED;
+	}
+
+	return ret;
+}
+
+static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent)
+{
+	unsigned long flags;
+	enum kbasep_soft_reset_status ret;
+
+	WARN_ON(kbdev->irq_reset_flush);
+	/* The reset must now be happening otherwise other threads will not
+	 * have been synchronized with to stop their access to the HW
+	 */
+#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_write(&kbdev->csf.reset.sem);
+#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
+	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
+#else
+	lockdep_assert_held(&kbdev->csf.reset.sem);
+#endif
+	WARN_ON(!kbase_reset_gpu_is_active(kbdev));
+
+	/* Reset the scheduler state before disabling the interrupts as suspend
+	 * of active CSG slots would also be done as a part of reset.
+	 */
+	if (likely(firmware_inited))
+		kbase_csf_scheduler_reset(kbdev);
+	cancel_work_sync(&kbdev->csf.firmware_reload_work);
+
+	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
+	/* This call will block until counters are disabled. */
+	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
+
+	ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
+	if (ret == SOFT_RESET_FAILED) {
+		dev_err(kbdev->dev, "Soft-reset failed");
+		goto err;
+	} else if (ret == L2_ON_FAILED) {
+		dev_err(kbdev->dev, "L2 power up failed after the soft-reset");
+		goto err;
+	} else if (ret == MCU_REINIT_FAILED) {
+		dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload");
+		/* Since MCU reinit failed despite successful soft reset, we can try
+		 * the firmware full reload.
+		 */
+		kbdev->csf.firmware_full_reload_needed = true;
+		ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true);
+		if (ret != RESET_SUCCESS) {
+			dev_err(kbdev->dev,
+				"MCU Re-init failed even after trying full firmware reload, ret = [%d]",
+				ret);
+			goto err;
+		}
 	}
 
 	/* Re-enable GPU hardware counters */
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
-
 	if (!silent)
 		dev_err(kbdev->dev, "Reset complete");
-
 	return 0;
+err:
+
+	kbase_csf_hwcnt_on_reset_error(kbdev);
+	return -1;
 }
 
 static void kbase_csf_reset_gpu_worker(struct work_struct *data)
@@ -593,7 +636,7 @@ bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
 int kbase_reset_gpu_wait(struct kbase_device *kbdev)
 {
 	const long wait_timeout =
-		kbase_csf_timeout_in_jiffies(GPU_RESET_TIMEOUT_MS);
+		kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT));
 	long remaining;
 
 	/* Inform lockdep we might be trying to wait on a reset (as
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index cd87027..ff75178 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
 #include <tl/mali_kbase_tracepoints.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <linux/export.h>
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include <uapi/gpu/arm/midgard/mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
 
@@ -246,7 +246,7 @@ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
  *
  * This function will start the scheduling tick hrtimer and is supposed to
  * be called only from the tick work item function. The tick hrtimer should
- * should not be active already.
+ * not be active already.
  */
 static void start_tick_timer(struct kbase_device *kbdev)
 {
@@ -372,7 +372,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->csf.reg_lock);
 
 	/* If bind operation for the queue hasn't completed yet, then the
-	 * the CSI can't be programmed for the queue
+	 * CSI can't be programmed for the queue
 	 * (even in stopped state) and so the doorbell also can't be assigned
 	 * to it.
 	 */
@@ -406,6 +406,85 @@ static void scheduler_doorbell_init(struct kbase_device *kbdev)
 	WARN_ON(doorbell_nr != CSF_KERNEL_DOORBELL_NR);
 }
 
+/**
+ * update_on_slot_queues_offsets - Update active queues' INSERT & EXTRACT ofs
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function updates the EXTRACT offset for all queues which groups have
+ * been assigned a physical slot. These values could be used to detect a
+ * queue's true idleness status. This is intended to be an additional check
+ * on top of the GPU idle notification to account for race conditions.
+ * This function is supposed to be called only when GPU idle notification
+ * interrupt is received.
+ */
+static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	/* All CSGs have the same number of CSs */
+	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+	size_t i;
+
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	/* csg_slots_idle_mask is not used here for the looping, as it could get
+	 * updated concurrently when Scheduler re-evaluates the idle status of
+	 * the CSGs for which idle notification was received previously.
+	 */
+	for_each_set_bit(i, scheduler->csg_inuse_bitmap, kbdev->csf.global_iface.group_num) {
+		struct kbase_queue_group *const group = scheduler->csg_slots[i].resident_group;
+		size_t j;
+
+		if (WARN_ON(!group))
+			continue;
+
+		for (j = 0; j < max_streams; ++j) {
+			struct kbase_queue *const queue = group->bound_queues[j];
+
+			if (queue) {
+				u64 const *const output_addr =
+					(u64 const *)(queue->user_io_addr + PAGE_SIZE);
+
+				queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
+			}
+		}
+	}
+}
+
+static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
+{
+	atomic_set(&scheduler->gpu_no_longer_idle, false);
+	queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
+}
+
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	int non_idle_offslot_grps;
+	bool can_suspend_on_idle;
+
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
+	can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+	KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
+			 ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
+
+	if (!non_idle_offslot_grps) {
+		if (can_suspend_on_idle) {
+			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
+			 * finished. It's queued before to reduce the time it takes till execution
+			 * but it'll eventually be blocked by the scheduler->interrupt_lock.
+			 */
+			enqueue_gpu_idle_work(scheduler);
+			update_on_slot_queues_offsets(kbdev);
+		}
+	} else {
+		/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
+		kbase_csf_scheduler_advance_tick_nolock(kbdev);
+	}
+}
+
 u32 kbase_csf_scheduler_get_nr_active_csgs_locked(struct kbase_device *kbdev)
 {
 	u32 nr_active_csgs;
@@ -551,54 +630,6 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
 	return kbdev->csf.scheduler.timer_enabled;
 }
 
-static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	if (scheduler->gpu_idle_fw_timer_enabled)
-		return;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-
-	/* Update the timer_enabled flag requires holding interrupt_lock */
-	scheduler->gpu_idle_fw_timer_enabled = true;
-	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->lock);
-	lockdep_assert_held(&scheduler->interrupt_lock);
-
-	/* Update of the timer_enabled flag requires holding interrupt_lock */
-	if (scheduler->gpu_idle_fw_timer_enabled) {
-		scheduler->gpu_idle_fw_timer_enabled = false;
-		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
-	}
-}
-
-static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	if (!scheduler->gpu_idle_fw_timer_enabled)
-		return;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	disable_gpu_idle_fw_timer_locked(kbdev);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
 /**
  * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
  *                                        Scheduler
@@ -631,12 +662,15 @@ static int scheduler_pm_active_handle_suspend(struct kbase_device *kbdev,
 	if (!prev_count) {
 		ret = kbase_pm_context_active_handle_suspend(kbdev,
 							suspend_handler);
-		if (ret) {
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		if (ret)
 			kbdev->csf.scheduler.pm_active_count--;
-			kbase_pm_update_state(kbdev);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-		}
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}
 
 	return ret;
@@ -716,8 +750,16 @@ static void scheduler_pm_idle(struct kbase_device *kbdev)
 		kbdev->csf.scheduler.pm_active_count--;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	if (prev_count == 1)
+	if (prev_count == 1) {
 		kbase_pm_context_idle(kbdev);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }
 
 #ifdef KBASE_PM_RUNTIME
@@ -746,8 +788,16 @@ static void scheduler_pm_idle_before_sleep(struct kbase_device *kbdev)
 	kbdev->pm.backend.exit_gpu_sleep_mode = false;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	if (prev_count == 1)
+	if (prev_count == 1) {
 		kbase_pm_context_idle(kbdev);
+		/* Invoke the PM state machines again as the change in MCU
+		 * desired status, due to the update of scheduler.pm_active_count,
+		 * may be missed by the thread that called pm_wait_for_desired_state()
+		 */
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }
 #endif
 
@@ -1735,6 +1785,13 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
 	bool is_waiting = false;
 
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	u64 cmd_ptr = kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_LO);
+
+	cmd_ptr |= (u64)kbase_csf_firmware_cs_output(stream, CS_STATUS_CMD_PTR_HI) << 32;
+	queue->saved_cmd_ptr = cmd_ptr;
+#endif
+
 	KBASE_KTRACE_ADD_CSF_GRP_Q(stream->kbdev, QUEUE_SYNC_STATUS_WAIT,
 				   queue->group, queue, status);
 
@@ -1948,7 +2005,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 		cancel_tick_timer(kctx->kbdev);
 		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
 		if (scheduler->state != SCHED_SUSPENDED)
-			queue_work(system_wq, &scheduler->gpu_idle_work);
+			enqueue_gpu_idle_work(scheduler);
 	}
 	KBASE_KTRACE_ADD_CSF_GRP(kctx->kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
@@ -2078,7 +2135,7 @@ static void update_offslot_non_idle_cnt_on_grp_suspend(
 	}
 }
 
-static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
+static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
 {
 	bool cs_empty;
 	bool cs_idle;
@@ -2090,8 +2147,8 @@ static bool confirm_cmd_buf_empty(struct kbase_queue *queue)
 
 	u32 glb_version = iface->version;
 
-	u64 *input_addr = (u64 *)queue->user_io_addr;
-	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
+	u64 const *input_addr = (u64 const *)queue->user_io_addr;
+	u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
 
 	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
 		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -2605,7 +2662,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
 			if (kbase_csf_scheduler_wait_mcu_active(kbdev))
 				dev_warn(
 					kbdev->dev,
-					"[%llu] Wait for MCU active failed when when terminating group %d of context %d_%d on slot %d",
+					"[%llu] Wait for MCU active failed when terminating group %d of context %d_%d on slot %d",
 					kbase_backend_get_cycle_cnt(kbdev),
 					group->handle, group->kctx->tgid,
 					group->kctx->id, group->csg_nr);
@@ -2704,6 +2761,7 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 		}
 	} else if (!queue_group_scheduled_locked(group)) {
 		int new_val;
+
 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
 			KBASE_CSF_GROUP_RUNNABLE);
 		/* A new group into the scheduler */
@@ -3033,9 +3091,9 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				struct kbase_queue_group *group =
 					scheduler->csg_slots[i].resident_group;
 
-				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i))) {
+				if (WARN_ON(!csg_slot_stopped_locked(kbdev, (s8)i)))
 					continue;
-				}
+
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask);
 
@@ -3533,13 +3591,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 				 * GPUCORE-21394.
 				 */
 
-				/* Disable the idle timer */
-				disable_gpu_idle_fw_timer_locked(kbdev);
-
 				/* Switch to protected mode */
 				scheduler->active_protm_grp = input_grp;
 				KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM,
 							 input_grp, 0u);
+				/* Reset the tick's pending protm seq number */
+				scheduler->tick_protm_pending_seq =
+					KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
 
 				kbase_csf_enter_protected_mode(kbdev);
 				spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
@@ -3637,6 +3695,7 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 	struct kbase_queue_group *group;
 
 	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
 	if (WARN_ON(priority < 0) ||
 	    WARN_ON(priority >= KBASE_QUEUE_GROUP_PRIORITY_COUNT))
 		return;
@@ -3656,6 +3715,14 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 		/* Set the scanout sequence number, starting from 0 */
 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
 
+		if (scheduler->tick_protm_pending_seq ==
+				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
+			if (!bitmap_empty(group->protm_pending_bitmap,
+			     kbdev->csf.global_iface.groups[0].stream_num))
+				scheduler->tick_protm_pending_seq =
+					group->scan_seq_num;
+		}
+
 		if (queue_group_idle_locked(group)) {
 			if (on_slot_group_idle_locked(group))
 				list_add_tail(&group->link_to_schedule,
@@ -3738,6 +3805,7 @@ static void scheduler_rotate_groups(struct kbase_device *kbdev)
 		WARN_ON(top_grp->kctx != top_ctx);
 		if (!WARN_ON(list_empty(list))) {
 			struct kbase_queue_group *new_head_grp;
+
 			list_move_tail(&top_grp->link, list);
 			new_head_grp = (!list_empty(list)) ?
 						list_first_entry(list, struct kbase_queue_group, link) :
@@ -3774,6 +3842,7 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
 
 			if (!WARN_ON(!found)) {
 				struct kbase_context *new_head_kctx;
+
 				list_move_tail(&pos->csf.link, list);
 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_ROTATE_RUNNABLE, pos,
 						 0u);
@@ -4042,6 +4111,59 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 	return 0;
 }
 
+/**
+ * all_on_slot_groups_remained_idle - Live check for all groups' idleness
+ *
+ * @kbdev: Pointer to the device.
+ *
+ * Returns false if any of the queues inside any of the groups that have been
+ * assigned a physical CSG slot have work to execute, or have executed work
+ * since having received a GPU idle notification. This function is used to
+ * handle a rance condition between firmware reporting GPU idle and userspace
+ * submitting more work by directly ringing a doorbell.
+ *
+ * Return: false if any queue inside any resident group has work to be processed
+ *         or has processed work since GPU idle event, true otherwise.
+ */
+static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	/* All CSGs have the same number of CSs */
+	size_t const max_streams = kbdev->csf.global_iface.groups[0].stream_num;
+	size_t i;
+
+	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	for_each_set_bit(i, scheduler->csg_slots_idle_mask,
+			  kbdev->csf.global_iface.group_num) {
+		struct kbase_queue_group *const group =
+			scheduler->csg_slots[i].resident_group;
+		size_t j;
+
+		for (j = 0; j < max_streams; ++j) {
+			struct kbase_queue const *const queue =
+				group->bound_queues[j];
+			u64 const *output_addr;
+			u64 cur_extract_ofs;
+
+			if (!queue)
+				continue;
+
+			output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+			cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
+			if (cur_extract_ofs != queue->extract_ofs) {
+				/* More work has been executed since the idle
+				 * notification.
+				 */
+				return false;
+			}
+		}
+	}
+
+	return true;
+}
+
 static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
 {
 	bool suspend;
@@ -4055,18 +4177,28 @@ static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
 		return false;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	spin_lock(&scheduler->interrupt_lock);
 	if (scheduler->total_runnable_grps) {
-		spin_lock(&scheduler->interrupt_lock);
 
 		/* Check both on-slots and off-slots groups idle status */
 		suspend = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
 			  !atomic_read(&scheduler->non_idle_offslot_grps) &&
 			  kbase_pm_idle_groups_sched_suspendable(kbdev);
-
-		spin_unlock(&scheduler->interrupt_lock);
 	} else
 		suspend = kbase_pm_no_runnables_sched_suspendable(kbdev);
 
+	/* Confirm that all groups are actually idle before proceeding with
+	 * suspension as groups might potentially become active again without
+	 * informing the scheduler in case userspace rings a doorbell directly.
+	 */
+	if (suspend && (unlikely(atomic_read(&scheduler->gpu_no_longer_idle)) ||
+			unlikely(!all_on_slot_groups_remained_idle(kbdev)))) {
+		dev_info(kbdev->dev,
+			 "GPU suspension skipped due to active CSGs");
+		suspend = false;
+	}
+
+	spin_unlock(&scheduler->interrupt_lock);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	return suspend;
@@ -4150,8 +4282,6 @@ static void gpu_idle_worker(struct work_struct *work)
 	}
 	mutex_lock(&scheduler->lock);
 
-	/* Cycle completed, disable the firmware idle timer */
-	disable_gpu_idle_fw_timer(kbdev);
 	scheduler_is_idle_suspendable = scheduler_idle_suspendable(kbdev);
 	if (scheduler_is_idle_suspendable) {
 		KBASE_KTRACE_ADD(kbdev, GPU_IDLE_HANDLING_START, NULL,
@@ -4177,6 +4307,7 @@ static void gpu_idle_worker(struct work_struct *work)
 static int scheduler_prepare(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
 	int i;
 
 	lockdep_assert_held(&scheduler->lock);
@@ -4202,6 +4333,9 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	scheduler->num_csg_slots_for_tick = 0;
 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
 
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	scheduler->tick_protm_pending_seq =
+		KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
 	/* Scan out to run groups */
 	for (i = 0; i < KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++i) {
 		struct kbase_context *kctx;
@@ -4209,6 +4343,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
 			scheduler_ctx_scan_groups(kbdev, kctx, i);
 	}
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
 	/* Update this tick's non-idle groups */
 	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
@@ -4237,42 +4372,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	return 0;
 }
 
-static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	/* After the scheduler apply operation, the internal variable
-	 * scheduler->non_idle_offslot_grps reflects the end-point view
-	 * of the count at the end of the active phase.
-	 *
-	 * Any changes that follow (after the scheduler has dropped the
-	 * scheduler->lock), reflects async operations to the scheduler,
-	 * such as a group gets killed (evicted) or a new group inserted,
-	 * cqs wait-sync triggered state transtion etc.
-	 *
-	 * The condition for enable the idle timer is that there is no
-	 * non-idle groups off-slots. If there is non-idle group off-slot,
-	 * the timer should be disabled.
-	 */
-	if (atomic_read(&scheduler->non_idle_offslot_grps))
-		disable_gpu_idle_fw_timer(kbdev);
-	else
-		enable_gpu_idle_fw_timer(kbdev);
-}
-
 /**
  * keep_lru_on_slots() - Check the condition for LRU is met.
  *
+ * @kbdev: Pointer to the device.
+ *
  * This function tries to maintain the Last-Recent-Use case on slots, when
  * the scheduler has no non-idle off-slot CSGs for a replacement
  * consideration. This effectively extends the previous scheduling results
  * for the new one. That is, the last recent used CSGs are retained on slots
  * for the new tick/tock action.
  *
- * @kbdev: Pointer to the device.
- *
  * Return: true for avoiding on-slot CSGs changes (i.e. keep existing LRU),
  *         otherwise false.
  */
@@ -4294,10 +4404,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
 		 */
 		keep_lru = kbase_csf_scheduler_all_csgs_idle(kbdev);
 
-		if (keep_lru && !scheduler->gpu_idle_fw_timer_enabled) {
-			scheduler->gpu_idle_fw_timer_enabled = true;
-			kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
-		}
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
 		dev_dbg(kbdev->dev, "Keep_LRU: %d, CSGs on-slots: %d\n",
@@ -4311,6 +4417,8 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
  * prepare_fast_local_tock() - making preparation arrangement for exercizing
  *                             a fast local tock inside scheduling-actions.
  *
+ * @kbdev:  Pointer to the GPU device.
+ *
  * The function assumes that a scheduling action of firing a fast local tock
  * call (i.e. an equivalent tock action without dropping the lock) is desired
  * if there are idle onslot CSGs. The function updates those affected CSGs'
@@ -4320,8 +4428,6 @@ static bool keep_lru_on_slots(struct kbase_device *kbdev)
  * plus some potential newly idle CSGs in the scheduling action committing
  * steps.
  *
- * @kbdev:  Pointer to the GPU device.
- *
  * Return: number of on-slots CSGs that can be considered for replacing.
  */
 static int prepare_fast_local_tock(struct kbase_device *kbdev)
@@ -4408,6 +4514,17 @@ static void schedule_actions(struct kbase_device *kbdev, bool is_tick)
 
 redo_local_tock:
 	scheduler_prepare(kbdev);
+	/* Need to specifically enqueue the GPU idle work if there are no groups
+	 * to schedule despite the runnable groups. This scenario will happen
+	 * if System suspend is done when all groups are idle and and no work
+	 * is submitted for the groups after the System resume.
+	 */
+	if (unlikely(!scheduler->ngrp_to_schedule &&
+		     scheduler->total_runnable_grps)) {
+		dev_dbg(kbdev->dev, "No groups to schedule in the tick");
+		enqueue_gpu_idle_work(scheduler);
+		return;
+	}
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 	protm_grp = scheduler->active_protm_grp;
 
@@ -4423,6 +4540,7 @@ redo_local_tock:
 	 */
 	if (protm_grp && scheduler->top_grp == protm_grp) {
 		int new_val;
+
 		dev_dbg(kbdev->dev, "Scheduler keep protm exec: group-%d",
 			protm_grp->handle);
 		new_val = atomic_dec_return(&scheduler->non_idle_offslot_grps);
@@ -4452,11 +4570,6 @@ redo_local_tock:
 
 		scheduler_apply(kbdev);
 
-		/* Post-apply, all the committed groups in this tick are on
-		 * slots, time to arrange the idle timer on/off decision.
-		 */
-		scheduler_handle_idle_timer_onoff(kbdev);
-
 		/* Scheduler is dropping the exec of the previous protm_grp,
 		 * Until the protm quit completes, the GPU is effectively
 		 * locked in the secure mode.
@@ -4491,7 +4604,6 @@ redo_local_tock:
 	}
 
 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-	return;
 }
 
 /**
@@ -4576,7 +4688,7 @@ static void schedule_on_tock(struct work_struct *work)
 
 	scheduler->state = SCHED_INACTIVE;
 	if (!scheduler->total_runnable_grps)
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+		enqueue_gpu_idle_work(scheduler);
 	mutex_unlock(&scheduler->lock);
 	kbase_reset_gpu_allow(kbdev);
 
@@ -4627,8 +4739,9 @@ static void schedule_on_tick(struct work_struct *work)
 		dev_dbg(kbdev->dev,
 			"scheduling for next tick, num_runnable_groups:%u\n",
 			scheduler->total_runnable_grps);
-	} else if (!scheduler->total_runnable_grps)
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+	} else if (!scheduler->total_runnable_grps) {
+		enqueue_gpu_idle_work(scheduler);
+	}
 
 	scheduler->state = SCHED_INACTIVE;
 	mutex_unlock(&scheduler->lock);
@@ -5044,7 +5157,6 @@ static void firmware_aliveness_monitor(struct work_struct *work)
 exit:
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 	kbase_reset_gpu_allow(kbdev);
-	return;
 }
 
 int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
@@ -5289,6 +5401,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 
 	mutex_lock(&scheduler->lock);
 
+	if (group->run_state == KBASE_CSF_GROUP_IDLE)
+		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
 	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
 		scheduler_group_check_protm_enter(kbdev, group);
@@ -5457,6 +5571,11 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
 			continue;
 
 		if (check_sync_update_for_on_slot_group(group)) {
+			/* As sync update has been performed for an on-slot
+			 * group, when MCU is in sleep state, ring the doorbell
+			 * so that FW can re-evaluate the SYNC_WAIT on wakeup.
+			 */
+			kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 			scheduler_wakeup(kbdev, true);
 			return;
 		}
@@ -5529,6 +5648,7 @@ enum kbase_csf_event_callback_action check_group_sync_update_cb(void *param)
 	struct kbase_context *const kctx = param;
 
 	KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT, kctx, 0u);
+
 	queue_work(kctx->csf.sched.sync_update_wq,
 		&kctx->csf.sched.sync_update_work);
 
@@ -5610,6 +5730,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 		dev_err(kbdev->dev, "Failed to allocate scheduler workqueue\n");
 		return -ENOMEM;
 	}
+	scheduler->idle_wq = alloc_ordered_workqueue(
+		"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
+	if (!scheduler->idle_wq) {
+		dev_err(kbdev->dev,
+			"Failed to allocate GPU idle scheduler workqueue\n");
+		destroy_workqueue(kbdev->csf.scheduler.wq);
+		return -ENOMEM;
+	}
 
 	INIT_WORK(&scheduler->tick_work, schedule_on_tick);
 	INIT_DEFERRABLE_WORK(&scheduler->tock_work, schedule_on_tock);
@@ -5636,11 +5764,11 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 	scheduler->last_schedule = 0;
 	scheduler->tock_pending_request = false;
 	scheduler->active_protm_grp = NULL;
-	scheduler->gpu_idle_fw_timer_enabled = false;
 	scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
 	scheduler_doorbell_init(kbdev);
 
 	INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
+	atomic_set(&scheduler->gpu_no_longer_idle, false);
 	atomic_set(&scheduler->non_idle_offslot_grps, 0);
 
 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -5684,6 +5812,8 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 
 void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
 {
+	if (kbdev->csf.scheduler.idle_wq)
+		destroy_workqueue(kbdev->csf.scheduler.idle_wq);
 	if (kbdev->csf.scheduler.wq)
 		destroy_workqueue(kbdev->csf.scheduler.wq);
 }
@@ -5715,7 +5845,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 		enqueue_tick_work(kbdev);
 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
 	} else if (scheduler->state != SCHED_SUSPENDED) {
-		queue_work(system_wq, &scheduler->gpu_idle_work);
+		enqueue_gpu_idle_work(scheduler);
 	}
 }
 
@@ -5805,8 +5935,6 @@ int kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 
 	mutex_lock(&scheduler->lock);
 
-	disable_gpu_idle_fw_timer(kbdev);
-
 #ifdef KBASE_PM_RUNTIME
 	/* If scheduler is in sleeping state, then MCU needs to be activated
 	 * to suspend CSGs.
@@ -5959,7 +6087,7 @@ void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
 			&kbdev->csf.global_iface.groups[csg_nr];
 		bool csg_idle;
 
-		 if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
+		if (!kbdev->csf.scheduler.csg_slots[csg_nr].resident_group)
 			continue;
 
 		csg_idle =
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 068a45b..a00a9ca 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -570,6 +570,15 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev);
 #endif
 
 /**
+ * kbase_csf_scheduler_process_gpu_idle_event() - Process GPU idle IRQ
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function is called when a GPU idle IRQ has been raised.
+ */
+void kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev);
+
+/**
  * kbase_csf_scheduler_get_nr_active_csgs() - Get the number of active CSGs
  *
  * @kbdev: Pointer to the device
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 62fb241..85babf9 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -82,7 +82,7 @@ static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
  * Unless the @chunk is the first in the kernel's list of chunks belonging to
  * a given tiler heap, this function stores the size and address of the @chunk
  * in the header of the preceding chunk. This requires the GPU memory region
- * containing the header to be be mapped temporarily, which can fail.
+ * containing the header to be mapped temporarily, which can fail.
  *
  * Return: 0 if successful or a negative error code on failure.
  */
@@ -204,8 +204,8 @@ static int create_chunk(struct kbase_csf_tiler_heap *const heap,
 
 	/* Allocate GPU memory for the new chunk. */
 	INIT_LIST_HEAD(&chunk->link);
-	chunk->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
-					&chunk->gpu_va, mmu_sync_info);
+	chunk->region =
+		kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
 
 	if (unlikely(!chunk->region)) {
 		dev_err(kctx->kbdev->dev,
@@ -464,21 +464,18 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 		err = -ENOMEM;
 	} else {
 		err = create_initial_chunks(heap, initial_chunks);
-		if (unlikely(err)) {
-			kbase_csf_heap_context_allocator_free(ctx_alloc,
-				heap->gpu_va);
-		}
+		if (unlikely(err))
+			kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
 	}
 
 	if (unlikely(err)) {
 		kfree(heap);
 	} else {
-		struct kbase_csf_tiler_heap_chunk const *first_chunk =
-			list_first_entry(&heap->chunks_list,
-				struct kbase_csf_tiler_heap_chunk, link);
+		struct kbase_csf_tiler_heap_chunk const *chunk = list_first_entry(
+			&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
 
 		*heap_gpu_va = heap->gpu_va;
-		*first_chunk_va = first_chunk->gpu_va;
+		*first_chunk_va = chunk->gpu_va;
 
 		mutex_lock(&kctx->csf.tiler_heaps.lock);
 		kctx->csf.tiler_heaps.nr_of_heaps++;
@@ -488,17 +485,25 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx,
 		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
 			kctx->kbdev, kctx->id, heap->heap_id,
 			PFN_UP(heap->chunk_size * heap->max_chunks),
-			PFN_UP(heap->chunk_size * heap->chunk_count),
-			heap->max_chunks, heap->chunk_size, heap->chunk_count,
-			heap->target_in_flight, 0);
+			PFN_UP(heap->chunk_size * heap->chunk_count), heap->max_chunks,
+			heap->chunk_size, heap->chunk_count, heap->target_in_flight, 0);
 
-		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n",
-			heap->gpu_va);
+#if defined(CONFIG_MALI_VECTOR_DUMP)
+		list_for_each_entry(chunk, &heap->chunks_list, link) {
+			KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(
+				kctx->kbdev, kctx->id, heap->heap_id, chunk->gpu_va);
+		}
+#endif
+
+		dev_dbg(kctx->kbdev->dev, "Created tiler heap 0x%llX\n", heap->gpu_va);
 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
 		kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
-		kctx->running_total_tiler_heap_memory += heap->chunk_size * heap->chunk_count;
-		if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
-			kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
+		kctx->running_total_tiler_heap_memory +=
+			heap->chunk_size * heap->chunk_count;
+		if (kctx->running_total_tiler_heap_memory >
+		    kctx->peak_total_tiler_heap_memory)
+			kctx->peak_total_tiler_heap_memory =
+				kctx->running_total_tiler_heap_memory;
 	}
 	return err;
 }
@@ -609,6 +614,16 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 	if (likely(heap)) {
 		err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
 			new_chunk_ptr);
+		if (likely(!err)) {
+			/* update total and peak tiler heap memory record */
+			kctx->running_total_tiler_heap_nr_chunks++;
+			kctx->running_total_tiler_heap_memory += heap->chunk_size;
+
+			if (kctx->running_total_tiler_heap_memory >
+			    kctx->peak_total_tiler_heap_memory)
+				kctx->peak_total_tiler_heap_memory =
+					kctx->running_total_tiler_heap_memory;
+		}
 
 		KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(
 			kctx->kbdev, kctx->id, heap->heap_id,
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
index 27a9074..4a1b413 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,14 +28,14 @@ struct kbase_context;
 #define MALI_CSF_TILER_HEAP_DEBUGFS_VERSION 0
 
 /**
- * kbase_csf_tiler_heap_debugfs_init() - Create a debugfs entry for per context tiler heap
+ * kbase_csf_tiler_heap_debugfs_init - Create a debugfs entry for per context tiler heap
  *
  * @kctx: The kbase_context for which to create the debugfs entry
  */
 void kbase_csf_tiler_heap_debugfs_init(struct kbase_context *kctx);
 
 /**
- * kbase_csf_tiler_heap_total_debugfs_init() - Create a debugfs entry for per context tiler heap
+ * kbase_csf_tiler_heap_total_debugfs_init - Create a debugfs entry for per context tiler heap
  *
  * @kctx: The kbase_context for which to create the debugfs entry
  */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
index fb439cf..2c006d9 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_def.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -59,18 +59,18 @@
 /**
  * struct kbase_csf_tiler_heap_chunk - A tiler heap chunk managed by the kernel
  *
- * Chunks are allocated upon initialization of a tiler heap or in response to
- * out-of-memory events from the firmware. Chunks are always fully backed by
- * physical memory to avoid the overhead of processing GPU page faults. The
- * allocated GPU memory regions are linked together independent of the list of
- * kernel objects of this type.
- *
  * @link:   Link to this chunk in a list of chunks belonging to a
  *          @kbase_csf_tiler_heap.
  * @region: Pointer to the GPU memory region allocated for the chunk.
  * @gpu_va: GPU virtual address of the start of the memory region.
  *          This points to the header of the chunk and not to the low address
  *          of free memory within it.
+ *
+ * Chunks are allocated upon initialization of a tiler heap or in response to
+ * out-of-memory events from the firmware. Chunks are always fully backed by
+ * physical memory to avoid the overhead of processing GPU page faults. The
+ * allocated GPU memory regions are linked together independent of the list of
+ * kernel objects of this type.
  */
 struct kbase_csf_tiler_heap_chunk {
 	struct list_head link;
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c
index f52cbab..ea6c116 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.c
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.c
@@ -139,8 +139,7 @@ static ssize_t progress_timeout_show(struct device * const dev,
 
 }
 
-static DEVICE_ATTR(progress_timeout, 0644, progress_timeout_show,
-	progress_timeout_store);
+static DEVICE_ATTR_RW(progress_timeout);
 
 int kbase_csf_timeout_init(struct kbase_device *const kbdev)
 {
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index b01ac29..f40be8f 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -80,9 +80,8 @@ static int kbase_csf_tl_debugfs_poll_interval_write(void *data, u64 val)
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct kbase_csf_tl_reader *self = &kbdev->timeline->csf_tl_reader;
 
-	if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN) {
+	if (val > KBASE_CSF_TL_READ_INTERVAL_MAX || val < KBASE_CSF_TL_READ_INTERVAL_MIN)
 		return -EINVAL;
-	}
 
 	self->timer_interval = (u32)val;
 
@@ -96,7 +95,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_tl_poll_interval_fops,
 
 void kbase_csf_tl_reader_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_file("csf_tl_poll_interval_in_ms", S_IRUGO | S_IWUSR,
+	debugfs_create_file("csf_tl_poll_interval_in_ms", 0644,
 		kbdev->debugfs_instr_directory, kbdev,
 		&kbase_csf_tl_poll_interval_fops);
 
@@ -406,9 +405,8 @@ static int tl_reader_init_late(
 		return -1;
 	}
 
-	if (kbase_ts_converter_init(&self->ts_converter, kbdev)) {
+	if (kbase_ts_converter_init(&self->ts_converter, kbdev))
 		return -1;
-	}
 
 	self->kbdev = kbdev;
 	self->trace_buffer = tb;
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.h b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
index 4523ba2..d554d56 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.h
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -133,14 +133,12 @@ void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self,
 void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self);
 
 /**
- *  kbase_csf_tl_reader_flush_buffer() -
- *   Flush trace from buffer into CSFFW timeline stream.
+ *  kbase_csf_tl_reader_flush_buffer() - Flush trace from buffer into CSFFW timeline stream.
  *
  * @self:    CSFFW TL Reader instance.
  *
  * Return: Zero on success, negative error code (EBUSY) otherwise
  */
-
 int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self);
 
 /**
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index 0c72f00..e90d30d 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -179,13 +179,13 @@ int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
 		extract_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_rw_offset;
-		extract_cpu_va = (u32*)(
+		extract_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
 			mcu_rw_offset);
 		insert_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_write_offset;
-		insert_cpu_va = (u32*)(
+		insert_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
 			mcu_write_offset);
 		data_buffer_gpu_va =
@@ -323,13 +323,13 @@ void kbase_csf_firmware_reload_trace_buffers_data(struct kbase_device *kbdev)
 		extract_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_rw.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_rw_offset;
-		extract_cpu_va = (u32*)(
+		extract_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_rw.cpu_addr +
 			mcu_rw_offset);
 		insert_gpu_va =
 			(kbdev->csf.firmware_trace_buffers.mcu_write.va_reg->start_pfn << PAGE_SHIFT) +
 			mcu_write_offset;
-		insert_cpu_va = (u32*)(
+		insert_cpu_va = (u32 *)(
 			kbdev->csf.firmware_trace_buffers.mcu_write.cpu_addr +
 			mcu_write_offset);
 		data_buffer_gpu_va =
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 8a4d2e2..5325658 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -203,6 +203,8 @@ static void kbase_csf_early_term(struct kbase_device *kbdev)
  * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
  *                                       interface.
  * @kbdev:	Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
  */
 static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
 {
@@ -245,8 +247,9 @@ static void kbase_device_hwcnt_backend_csf_if_term(struct kbase_device *kbdev)
 /**
  * kbase_device_hwcnt_backend_csf_init - Create hardware counter backend.
  * @kbdev:	Device pointer
+ *
+ * Return: 0 if successful or a negative error code on failure.
  */
-
 static int kbase_device_hwcnt_backend_csf_init(struct kbase_device *kbdev)
 {
 	return kbase_hwcnt_backend_csf_create(
@@ -390,7 +393,7 @@ int kbase_device_init(struct kbase_device *kbdev)
  * Hardware counter components depending on firmware are initialized after CSF
  * firmware is loaded.
  *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
  */
 static int kbase_device_hwcnt_csf_deferred_init(struct kbase_device *kbdev)
 {
@@ -457,7 +460,7 @@ virt_fail:
  * To meet Android GKI vendor guideline, firmware load is deferred at
  * the time when @ref kbase_open is called for the first time.
  *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
  */
 static int kbase_csf_firmware_deferred_init(struct kbase_device *kbdev)
 {
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index ae6dc1b..596d57c 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -133,8 +133,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 	if (val & RESET_COMPLETED)
 		kbase_pm_reset_done(kbdev);
 
-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+	/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
+	 * We need to acquire hwaccess_lock to avoid a race condition with
+	 * kbase_gpu_cache_flush_and_busy_wait
+	 */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
 
 #ifdef KBASE_PM_RUNTIME
 	if (val & DOORBELL_MIRROR) {
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index e8f8953..ff57cf6 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -66,8 +66,12 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 	if (val & PRFCNT_SAMPLE_COMPLETED)
 		kbase_instr_hwcnt_sample_done(kbdev);
 
-	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val);
-	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val);
+	/* Defer clearing CLEAN_CACHES_COMPLETED to kbase_clean_caches_done.
+	 * We need to acquire hwaccess_lock to avoid a race condition with
+	 * kbase_gpu_cache_flush_and_busy_wait
+	 */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, val & ~CLEAN_CACHES_COMPLETED);
+	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), val & ~CLEAN_CACHES_COMPLETED);
 
 	/* kbase_pm_check_transitions (called by kbase_pm_power_changed) must
 	 * be called after the IRQ has been cleared. This is because it might
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 2e022eb..fa115ee 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -27,6 +27,9 @@
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_hwcnt_watchdog_if_timer.h>
+#include <mali_kbase_hwcnt_backend_jm.h>
+#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
 
 #if IS_ENABLED(CONFIG_MALI_NO_MALI)
 #include <backend/gpu/mali_kbase_model_linux.h>
@@ -148,73 +151,115 @@ static void kbase_backend_late_term(struct kbase_device *kbdev)
 	kbase_hwaccess_pm_term(kbdev);
 }
 
+/**
+ * kbase_device_hwcnt_watchdog_if_init - Create hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
+static int kbase_device_hwcnt_watchdog_if_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_watchdog_if_timer_create(&kbdev->hwcnt_watchdog_timer);
+}
+
+/**
+ * kbase_device_hwcnt_watchdog_if_term - Terminate hardware counter watchdog
+ *                                       interface.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_watchdog_if_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_watchdog_if_timer_destroy(&kbdev->hwcnt_watchdog_timer);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_init - Create hardware counter backend.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
 static int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
 {
-	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
+	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_jm_backend);
 }
 
+/**
+ * kbase_device_hwcnt_backend_jm_term - Terminate hardware counter backend.
+ * @kbdev:	Device pointer
+ */
 static void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
 {
-	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
+	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_jm_backend);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_watchdog_init - Create hardware counter watchdog backend.
+ * @kbdev:	Device pointer
+ * Return: 0 on success, or an error code on failure.
+ */
+static int kbase_device_hwcnt_backend_jm_watchdog_init(struct kbase_device *kbdev)
+{
+	return kbase_hwcnt_backend_jm_watchdog_create(&kbdev->hwcnt_gpu_jm_backend,
+						      &kbdev->hwcnt_watchdog_timer,
+						      &kbdev->hwcnt_gpu_iface);
+}
+
+/**
+ * kbase_device_hwcnt_backend_jm_watchdog_term - Terminate hardware counter watchdog backend.
+ * @kbdev:	Device pointer
+ */
+static void kbase_device_hwcnt_backend_jm_watchdog_term(struct kbase_device *kbdev)
+{
+	kbase_hwcnt_backend_jm_watchdog_destroy(&kbdev->hwcnt_gpu_iface);
 }
 
 static const struct kbase_device_init dev_init[] = {
 #if IS_ENABLED(CONFIG_MALI_NO_MALI)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
-	  "Dummy model initialization failed" },
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else
 	{ assign_irqs, NULL, "IRQ search failed" },
 	{ registers_map, registers_unmap, "Register map failed" },
 #endif
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
-	{ kbase_device_pm_init, kbase_device_pm_term,
-	  "Power management initialization failed" },
-	{ kbase_device_early_init, kbase_device_early_term,
-	  "Early device initialization failed" },
-	{ kbase_device_populate_max_freq, NULL,
-	  "Populating max frequency failed" },
+	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
+	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
+	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
 	  "Priority control manager initialization failed" },
-	{ kbase_ctx_sched_init, kbase_ctx_sched_term,
-	  "Context scheduler initialization failed" },
-	{ kbase_mem_init, kbase_mem_term,
-	  "Memory subsystem initialization failed" },
+	{ kbase_ctx_sched_init, kbase_ctx_sched_term, "Context scheduler initialization failed" },
+	{ kbase_mem_init, kbase_mem_term, "Memory subsystem initialization failed" },
 	{ kbase_device_coherency_init, NULL, "Device coherency init failed" },
 	{ kbase_protected_mode_init, kbase_protected_mode_term,
 	  "Protected mode subsystem initialization failed" },
-	{ kbase_device_list_init, kbase_device_list_term,
-	  "Device list setup failed" },
-	{ kbasep_js_devdata_init, kbasep_js_devdata_term,
-	  "Job JS devdata initialization failed" },
+	{ kbase_device_list_init, kbase_device_list_term, "Device list setup failed" },
+	{ kbasep_js_devdata_init, kbasep_js_devdata_term, "Job JS devdata initialization failed" },
 	{ kbase_device_timeline_init, kbase_device_timeline_term,
 	  "Timeline stream initialization failed" },
 	{ kbase_clk_rate_trace_manager_init, kbase_clk_rate_trace_manager_term,
 	  "Clock rate trace manager initialization failed" },
-	{ kbase_lowest_gpu_freq_init, NULL,
-	  "Lowest freq initialization failed" },
+	{ kbase_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
 	{ kbase_instr_backend_init, kbase_instr_backend_term,
 	  "Instrumentation backend initialization failed" },
-	{ kbase_device_hwcnt_backend_jm_init,
-	  kbase_device_hwcnt_backend_jm_term,
+	{ kbase_device_hwcnt_watchdog_if_init, kbase_device_hwcnt_watchdog_if_term,
+	  "GPU hwcnt backend watchdog interface creation failed" },
+	{ kbase_device_hwcnt_backend_jm_init, kbase_device_hwcnt_backend_jm_term,
 	  "GPU hwcnt backend creation failed" },
+	{ kbase_device_hwcnt_backend_jm_watchdog_init, kbase_device_hwcnt_backend_jm_watchdog_term,
+	  "GPU hwcnt watchdog backend creation failed" },
 	{ kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
 	  "GPU hwcnt context initialization failed" },
-	{ kbase_device_hwcnt_virtualizer_init,
-	  kbase_device_hwcnt_virtualizer_term,
+	{ kbase_device_hwcnt_virtualizer_init, kbase_device_hwcnt_virtualizer_term,
 	  "GPU hwcnt virtualizer initialization failed" },
 	{ kbase_device_vinstr_init, kbase_device_vinstr_term,
 	  "Virtual instrumentation initialization failed" },
 	{ kbase_device_kinstr_prfcnt_init, kbase_device_kinstr_prfcnt_term,
 	  "Performance counter instrumentation initialization failed" },
-	{ kbase_backend_late_init, kbase_backend_late_term,
-	  "Late backend initialization failed" },
+	{ kbase_backend_late_init, kbase_backend_late_term, "Late backend initialization failed" },
 	{ kbase_debug_job_fault_dev_init, kbase_debug_job_fault_dev_term,
 	  "Job fault debug initialization failed" },
-	{ kbase_device_debugfs_init, kbase_device_debugfs_term,
-	  "DebugFS initialization failed" },
+	{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
 	/* Sysfs init needs to happen before registering the device with
 	 * misc_register(), otherwise it causes a race condition between
 	 * registering the device and a uevent event being generated for
@@ -233,8 +278,7 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_gpuprops_populate_user_buffer, kbase_gpuprops_free_user_buffer,
 	  "GPU property population failed" },
 	{ NULL, kbase_dummy_job_wa_cleanup, NULL },
-	{ kbase_device_late_init, kbase_device_late_term,
-	  "Late device initialization failed" },
+	{ kbase_device_late_init, kbase_device_late_term, "Late device initialization failed" },
 };
 
 static void kbase_device_term_partial(struct kbase_device *kbdev,
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index dc53c43..c123010 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -166,8 +166,11 @@ void kbase_device_pcm_dev_term(struct kbase_device *const kbdev)
  * @nb: notifier block - used to retrieve kbdev pointer
  * @action: action (unused)
  * @data: data pointer (unused)
+ *
  * This function simply lists memory usage by the Mali driver, per GPU device,
  * for diagnostic purposes.
+ *
+ * Return: NOTIFY_OK on success, NOTIFY_BAD otherwise.
  */
 static int mali_oom_notifier_handler(struct notifier_block *nb,
 				     unsigned long action, void *data)
@@ -189,7 +192,7 @@ static int mali_oom_notifier_handler(struct notifier_block *nb,
 
 	mutex_lock(&kbdev->kctx_list_lock);
 
-	list_for_each_entry (kctx, &kbdev->kctx_list, kctx_list_link) {
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
 		struct pid *pid_struct;
 		struct task_struct *task;
 		unsigned long task_alloc_total =
@@ -481,6 +484,7 @@ int kbase_device_early_init(struct kbase_device *kbdev)
 {
 	int err;
 
+
 	err = kbasep_platform_device_init(kbdev);
 	if (err)
 		return err;
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index 22ceca0..5ff970a 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,6 @@
 
 /**
  * kbase_device_get_list - get device list.
- *
  * Get access to device list.
  *
  * Return: Pointer to the linked list head.
@@ -55,18 +54,18 @@ void kbase_increment_device_id(void);
  * When a device file is opened for the first time,
  * load firmware and initialize hardware counter components.
  *
- * @return 0 on success. An error code on failure.
+ * Return: 0 on success. An error code on failure.
  */
 int kbase_device_firmware_init_once(struct kbase_device *kbdev);
 
 /**
  * kbase_device_init - Device initialisation.
  *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
  * This is called from device probe to initialise various other
  * components needed.
  *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
- *
  * Return: 0 on success and non-zero value on failure.
  */
 int kbase_device_init(struct kbase_device *kbdev);
@@ -74,11 +73,10 @@ int kbase_device_init(struct kbase_device *kbdev);
 /**
  * kbase_device_term - Device termination.
  *
- * This is called from device remove to terminate various components that
- * were initialised during kbase_device_init.
- *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
+ * This is called from device remove to terminate various components that
+ * were initialised during kbase_device_init.
  */
 void kbase_device_term(struct kbase_device *kbdev);
 
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index beacc7c..249d5f8 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -63,6 +63,7 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
 	}
 
 	/* Clear the interrupt CLEAN_CACHES_COMPLETED bit. */
+	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR),
 			CLEAN_CACHES_COMPLETED);
 
@@ -72,7 +73,6 @@ static int busy_wait_cache_clean_irq(struct kbase_device *kbdev)
 int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 					u32 flush_op)
 {
-	u32 irq_mask;
 	int need_to_wake_up = 0;
 	int ret = 0;
 
@@ -81,17 +81,18 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 	 */
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	/* 1. Check if CLEAN_CACHES_COMPLETED irq mask bit is set.
+	/* 1. Check if kbdev->cache_clean_in_progress is set.
 	 *    If it is set, it means there are threads waiting for
-	 *    CLEAN_CACHES_COMPLETED irq to be raised.
+	 *    CLEAN_CACHES_COMPLETED irq to be raised and that the
+	 *    corresponding irq mask bit is set.
 	 *    We'll clear the irq mask bit and busy-wait for the cache
 	 *    clean operation to complete before submitting the cache
 	 *    clean command required after the GPU page table update.
 	 *    Pended flush commands will be merged to requested command.
 	 */
-	irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-	if (irq_mask & CLEAN_CACHES_COMPLETED) {
+	if (kbdev->cache_clean_in_progress) {
 		/* disable irq first */
+		u32 irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
 				irq_mask & ~CLEAN_CACHES_COMPLETED);
 
@@ -182,22 +183,28 @@ void kbase_clean_caches_done(struct kbase_device *kbdev)
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	if (kbdev->cache_clean_queued) {
-		u32 pended_flush_op = kbdev->cache_clean_queued;
+	if (kbdev->cache_clean_in_progress) {
+		/* Clear the interrupt CLEAN_CACHES_COMPLETED bit if set.
+		 * It might have already been done by kbase_gpu_cache_flush_and_busy_wait.
+		 */
+		KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, CLEAN_CACHES_COMPLETED);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), CLEAN_CACHES_COMPLETED);
+
+		if (kbdev->cache_clean_queued) {
+			u32 pended_flush_op = kbdev->cache_clean_queued;
 
-		kbdev->cache_clean_queued = 0;
+			kbdev->cache_clean_queued = 0;
 
-		KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL,
-				 pended_flush_op);
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND),
-				pended_flush_op);
-	} else {
-		/* Disable interrupt */
-		irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
-		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
-				irq_mask & ~CLEAN_CACHES_COMPLETED);
+			KBASE_KTRACE_ADD(kbdev, CORE_GPU_CLEAN_INV_CACHES, NULL, pended_flush_op);
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), pended_flush_op);
+		} else {
+			/* Disable interrupt */
+			irq_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+			kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK),
+					irq_mask & ~CLEAN_CACHES_COMPLETED);
 
-		kbase_gpu_cache_clean_wait_complete(kbdev);
+			kbase_gpu_cache_clean_wait_complete(kbdev);
+		}
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index 7499729..893a335 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -20,7 +20,7 @@
  */
 
 #include <mali_kbase.h>
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 #include <gpu/mali_kbase_gpu_fault.h>
 
 const char *kbase_gpu_exception_name(u32 const exception_code)
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
new file mode 100644
index 0000000..f6945b3
--- /dev/null
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_GPU_REGMAP_CSF_H_
+#define _KBASE_GPU_REGMAP_CSF_H_
+
+#include <linux/types.h>
+
+#if !MALI_USE_CSF && defined(__KERNEL__)
+#error "Cannot be compiled with JM"
+#endif
+
+/* GPU_CONTROL_MCU base address */
+#define GPU_CONTROL_MCU_BASE 0x3000
+
+/* MCU_SUBSYSTEM base address */
+#define MCU_SUBSYSTEM_BASE 0x20000
+
+/* IPA control registers */
+#define IPA_CONTROL_BASE       0x40000
+#define IPA_CONTROL_REG(r)     (IPA_CONTROL_BASE+(r))
+#define COMMAND                0x000 /* (WO) Command register */
+#define STATUS                 0x004 /* (RO) Status register */
+#define TIMER                  0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO         0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI         0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO       0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI       0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO        0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI        0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO       0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI       0x02C /* (RW) Counter select for Shader cores, high word */
+
+/* Accumulated counter values for CS hardware */
+#define VALUE_CSHW_BASE        0x100
+#define VALUE_CSHW_REG_LO(n)   (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n)   (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
+
+/* Accumulated counter values for memory system */
+#define VALUE_MEMSYS_BASE      0x140
+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#define VALUE_TILER_BASE       0x180
+#define VALUE_TILER_REG_LO(n)  (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n)  (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
+
+#define VALUE_SHADER_BASE      0x1C0
+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+/* Set to shared memory, that is inner cacheable on ACE and inner or outer
+ * shared, otherwise inner non-cacheable.
+ * Outer cacheable if inner or outer shared, otherwise outer non-cacheable.
+ */
+#define AS_MEMATTR_AARCH64_SHARED         0x8ull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching
+ */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+/* Normal memory, shared between MCU and Host */
+#define AS_MEMATTR_INDEX_SHARED                6
+
+/* Configuration bits for the CSF. */
+#define CSF_CONFIG 0xF00
+
+/* CSF_CONFIG register */
+#define CSF_CONFIG_FORCE_COHERENCY_FEATURES_SHIFT 2
+
+/* GPU control registers */
+#define CORE_FEATURES           0x008   /* () Shader Core Features */
+#define MCU_CONTROL             0x700
+#define MCU_STATUS              0x704
+
+#define MCU_CNTRL_ENABLE        (1 << 0)
+#define MCU_CNTRL_AUTO          (1 << 1)
+#define MCU_CNTRL_DISABLE       (0)
+
+#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31)
+#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT)
+
+#define MCU_STATUS_HALTED        (1 << 1)
+
+#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
+				 * region base address, low word
+				 */
+#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
+				 * region base address, high word
+				 */
+#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
+				 * configuration
+				 */
+
+#define PRFCNT_CSHW_EN   0x06C  /* (RW) Performance counter
+				 * enable for CS Hardware
+				 */
+
+#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
+				 * flags for shader cores
+				 */
+#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
+				 * flags for tiler
+				 */
+#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
+				 * flags for MMU/L2 cache
+				 */
+
+/* JOB IRQ flags */
+#define JOB_IRQ_GLOBAL_IF       (1 << 31)   /* Global interface interrupt received */
+
+/* GPU_COMMAND codes */
+#define GPU_COMMAND_CODE_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_CODE_RESET              0x01 /* Reset the GPU */
+#define GPU_COMMAND_CODE_PRFCNT             0x02 /* Clear or sample performance counters */
+#define GPU_COMMAND_CODE_TIME               0x03 /* Configure time sources */
+#define GPU_COMMAND_CODE_FLUSH_CACHES       0x04 /* Flush caches */
+#define GPU_COMMAND_CODE_SET_PROTECTED_MODE 0x05 /* Places the GPU in protected mode */
+#define GPU_COMMAND_CODE_FINISH_HALT        0x06 /* Halt CSF */
+#define GPU_COMMAND_CODE_CLEAR_FAULT        0x07 /* Clear GPU_FAULTSTATUS and GPU_FAULTADDRESS, TODX */
+
+/* GPU_COMMAND_RESET payloads */
+
+/* This will leave the state of active jobs UNDEFINED, but will leave the external bus in a defined and idle state.
+ * Power domains will remain powered on.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
+
+/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and
+ * idle state.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
+
+/* This reset will leave the state of currently active streams UNDEFINED, will likely lose data, and may leave
+ * the system bus in an inconsistent state. Use only as a last resort when nothing else works.
+ */
+#define GPU_COMMAND_RESET_PAYLOAD_HARD_RESET 0x02
+
+/* GPU_COMMAND_PRFCNT payloads */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE 0x01 /* Sample performance counters */
+#define GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR  0x02 /* Clear performance counters */
+
+/* GPU_COMMAND_TIME payloads */
+#define GPU_COMMAND_TIME_DISABLE 0x00 /* Disable cycle counter */
+#define GPU_COMMAND_TIME_ENABLE  0x01 /* Enable cycle counter */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for L2 caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN 0x001 /* CLN only */
+#define GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE 0x003 /* CLN + INV */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Load-store caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN 0x010 /* CLN only */
+#define GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE 0x030 /* CLN + INV */
+
+/* GPU_COMMAND_FLUSH_CACHES payloads bits for Other caches */
+#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE 0x000 /* No flush */
+#define GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE 0x200 /* INV only */
+
+/* GPU_COMMAND command + payload */
+#define GPU_COMMAND_CODE_PAYLOAD(opcode, payload) \
+	((__u32)opcode | ((__u32)payload << 8))
+
+/* Final GPU_COMMAND form */
+/* No operation, nothing happens */
+#define GPU_COMMAND_NOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_NOP, 0)
+
+/* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_SOFT_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET)
+
+/* Immediately reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_RESET, GPU_COMMAND_RESET_PAYLOAD_HARD_RESET)
+
+/* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_CLEAR \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_CLEAR)
+
+/* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_PRFCNT_SAMPLE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_PRFCNT, GPU_COMMAND_PRFCNT_PAYLOAD_SAMPLE)
+
+/* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_START \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_ENABLE)
+
+/* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_TIME, GPU_COMMAND_TIME_DISABLE)
+
+/* Clean and invalidate L2 cache (Equivalent to FLUSH_PT) */
+#define GPU_COMMAND_CACHE_CLN_INV_L2                                           \
+	GPU_COMMAND_CODE_PAYLOAD(                                              \
+		GPU_COMMAND_CODE_FLUSH_CACHES,                                 \
+		(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE |               \
+		 GPU_COMMAND_FLUSH_PAYLOAD_LSC_NONE |                          \
+		 GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+
+/* Clean and invalidate L2 and LSC caches (Equivalent to FLUSH_MEM) */
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC                                       \
+	GPU_COMMAND_CODE_PAYLOAD(                                              \
+		GPU_COMMAND_CODE_FLUSH_CACHES,                                 \
+		(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE |               \
+		 GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE |              \
+		 GPU_COMMAND_FLUSH_PAYLOAD_OTHER_NONE))
+
+/* Clean and invalidate L2, LSC, and Other caches */
+#define GPU_COMMAND_CACHE_CLN_INV_FULL                                         \
+	GPU_COMMAND_CODE_PAYLOAD(                                              \
+		GPU_COMMAND_CODE_FLUSH_CACHES,                                 \
+		(GPU_COMMAND_FLUSH_PAYLOAD_L2_CLEAN_INVALIDATE |               \
+		 GPU_COMMAND_FLUSH_PAYLOAD_LSC_CLEAN_INVALIDATE |              \
+		 GPU_COMMAND_FLUSH_PAYLOAD_OTHER_INVALIDATE))
+
+/* Merge cache flush commands */
+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2) ((cmd1) | (cmd2))
+
+/* Places the GPU in protected mode */
+#define GPU_COMMAND_SET_PROTECTED_MODE \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_SET_PROTECTED_MODE, 0)
+
+/* Halt CSF */
+#define GPU_COMMAND_FINISH_HALT \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_FINISH_HALT, 0)
+
+/* Clear GPU faults */
+#define GPU_COMMAND_CLEAR_FAULT \
+	GPU_COMMAND_CODE_PAYLOAD(GPU_COMMAND_CODE_CLEAR_FAULT, 0)
+
+/* End Command Values */
+
+/* GPU_FAULTSTATUS register */
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFFul)
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) \
+	 >> GPU_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \
+	(0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10
+#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \
+	(1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT)
+
+#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11
+#define GPU_FAULTSTATUS_JASID_VALID_FLAG \
+	(1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
+
+#define GPU_FAULTSTATUS_JASID_SHIFT 12
+#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_SET(reg_val, value) \
+	(((reg_val) & ~GPU_FAULTSTATUS_JASID_MASK) |  \
+	(((value) << GPU_FAULTSTATUS_JASID_SHIFT) & GPU_FAULTSTATUS_JASID_MASK))
+
+#define GPU_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define GPU_FAULTSTATUS_SOURCE_ID_MASK \
+	(0xFFFFul << GPU_FAULTSTATUS_SOURCE_ID_SHIFT)
+/* End GPU_FAULTSTATUS register */
+
+/* GPU_FAULTSTATUS_ACCESS_TYPE values */
+#define GPU_FAULTSTATUS_ACCESS_TYPE_ATOMIC 0x0
+#define GPU_FAULTSTATUS_ACCESS_TYPE_EXECUTE 0x1
+#define GPU_FAULTSTATUS_ACCESS_TYPE_READ 0x2
+#define GPU_FAULTSTATUS_ACCESS_TYPE_WRITE 0x3
+/* End of GPU_FAULTSTATUS_ACCESS_TYPE values */
+
+/* Implementation-dependent exception codes used to indicate CSG
+ * and CS errors that are not specified in the specs.
+ */
+#define GPU_EXCEPTION_TYPE_SW_FAULT_0 ((__u8)0x70)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_1 ((__u8)0x71)
+#define GPU_EXCEPTION_TYPE_SW_FAULT_2 ((__u8)0x72)
+
+/* GPU_FAULTSTATUS_EXCEPTION_TYPE values */
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_OK 0x00
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_BUS_FAULT 0x80
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_SHAREABILITY_FAULT 0x88
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_SYSTEM_SHAREABILITY_FAULT 0x89
+#define GPU_FAULTSTATUS_EXCEPTION_TYPE_GPU_CACHEABILITY_FAULT 0x8A
+/* End of GPU_FAULTSTATUS_EXCEPTION_TYPE values */
+
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_GET(reg_val) \
+	(((reg_val)&GPU_FAULTSTATUS_ADDRESS_VALID_MASK) >> GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SET(reg_val, value) \
+	(((reg_val) & ~GPU_FAULTSTATUS_ADDRESS_VALID_MASK) |  \
+	(((value) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) & GPU_FAULTSTATUS_ADDRESS_VALID_MASK))
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define GPU_PROTECTED_FAULT     (1 << 1)    /* A GPU fault has occurred in protected mode */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed.  */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+#define DOORBELL_MIRROR         (1 << 18)   /* Mirrors the doorbell interrupt line to the CPU */
+#define MCU_STATUS_GPU_IRQ      (1 << 19)   /* MCU requires attention */
+
+/*
+ * In Debug build,
+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and unmask interupts sources of GPU_IRQ
+ * by writing it onto GPU_IRQ_CLEAR/MASK registers.
+ *
+ * In Release build,
+ * GPU_IRQ_REG_COMMON is used.
+ *
+ * Note:
+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
+ * DOORBELL_MIRROR - Do not have it included for GPU_IRQ_REG_COMMON
+ *                   as it can't be cleared by GPU_IRQ_CLEAR, thus interrupt storm might happen
+ */
+#define GPU_IRQ_REG_COMMON (GPU_FAULT | GPU_PROTECTED_FAULT | RESET_COMPLETED \
+			| POWER_CHANGED_ALL | MCU_STATUS_GPU_IRQ)
+
+/* GPU_CONTROL_MCU.GPU_IRQ_RAWSTAT */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when performance count sample has completed */
+
+#endif /* _KBASE_GPU_REGMAP_CSF_H_ */
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
new file mode 100644
index 0000000..d1cd8fc
--- /dev/null
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#ifndef _KBASE_GPU_REGMAP_JM_H_
+#define _KBASE_GPU_REGMAP_JM_H_
+
+#if MALI_USE_CSF && defined(__KERNEL__)
+#error "Cannot be compiled with CSF"
+#endif
+
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_IMPL_DEF 0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_AARCH64_OUTER_WA       0x8Dull
+/* Set to inner non-cacheable, outer-non-cacheable
+ * Setting defined by the alloc bits is ignored, but set to a valid encoding:
+ * - no-alloc on read
+ * - no alloc on write
+ */
+#define AS_MEMATTR_AARCH64_NON_CACHEABLE  0x4Cull
+
+/* Symbols for default MEMATTR to use
+ * Default is - HW implementation defined caching
+ */
+#define AS_MEMATTR_INDEX_DEFAULT               0
+#define AS_MEMATTR_INDEX_DEFAULT_ACE           3
+
+/* HW implementation defined caching */
+#define AS_MEMATTR_INDEX_IMPL_DEF_CACHE_POLICY 0
+/* Force cache on */
+#define AS_MEMATTR_INDEX_FORCE_TO_CACHE_ALL    1
+/* Write-alloc */
+#define AS_MEMATTR_INDEX_WRITE_ALLOC           2
+/* Outer coherent, inner implementation defined policy */
+#define AS_MEMATTR_INDEX_OUTER_IMPL_DEF        3
+/* Outer coherent, write alloc inner */
+#define AS_MEMATTR_INDEX_OUTER_WA              4
+/* Normal memory, inner non-cacheable, outer non-cacheable (ARMv8 mode only) */
+#define AS_MEMATTR_INDEX_NON_CACHEABLE         5
+
+/* GPU control registers */
+
+#define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
+#define JS_PRESENT              0x01C   /* (RO) Job slots present */
+
+#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
+				 * region base address, low word
+				 */
+#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
+				 * region base address, high word
+				 */
+#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
+				 * configuration
+				 */
+#define PRFCNT_JM_EN     0x06C  /* (RW) Performance counter enable
+				 * flags for Job Manager
+				 */
+#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
+				 * flags for shader cores
+				 */
+#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
+				 * flags for tiler
+				 */
+#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
+				 * flags for MMU/L2 cache
+				 */
+
+#define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
+#define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
+#define JS2_FEATURES            0x0C8   /* (RO) Features of job slot 2 */
+#define JS3_FEATURES            0x0CC   /* (RO) Features of job slot 3 */
+#define JS4_FEATURES            0x0D0   /* (RO) Features of job slot 4 */
+#define JS5_FEATURES            0x0D4   /* (RO) Features of job slot 5 */
+#define JS6_FEATURES            0x0D8   /* (RO) Features of job slot 6 */
+#define JS7_FEATURES            0x0DC   /* (RO) Features of job slot 7 */
+#define JS8_FEATURES            0x0E0   /* (RO) Features of job slot 8 */
+#define JS9_FEATURES            0x0E4   /* (RO) Features of job slot 9 */
+#define JS10_FEATURES           0x0E8   /* (RO) Features of job slot 10 */
+#define JS11_FEATURES           0x0EC   /* (RO) Features of job slot 11 */
+#define JS12_FEATURES           0x0F0   /* (RO) Features of job slot 12 */
+#define JS13_FEATURES           0x0F4   /* (RO) Features of job slot 13 */
+#define JS14_FEATURES           0x0F8   /* (RO) Features of job slot 14 */
+#define JS15_FEATURES           0x0FC   /* (RO) Features of job slot 15 */
+
+#define JS_FEATURES_REG(n)      GPU_CONTROL_REG(JS0_FEATURES + ((n) << 2))
+
+#define JM_CONFIG               0xF00   /* (RW) Job manager configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
+#define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
+
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
+#define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
+#define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
+#define JOB_SLOT4               0xA00   /* Configuration registers for job slot 4 */
+#define JOB_SLOT5               0xA80   /* Configuration registers for job slot 5 */
+#define JOB_SLOT6               0xB00   /* Configuration registers for job slot 6 */
+#define JOB_SLOT7               0xB80   /* Configuration registers for job slot 7 */
+#define JOB_SLOT8               0xC00   /* Configuration registers for job slot 8 */
+#define JOB_SLOT9               0xC80   /* Configuration registers for job slot 9 */
+#define JOB_SLOT10              0xD00   /* Configuration registers for job slot 10 */
+#define JOB_SLOT11              0xD80   /* Configuration registers for job slot 11 */
+#define JOB_SLOT12              0xE00   /* Configuration registers for job slot 12 */
+#define JOB_SLOT13              0xE80   /* Configuration registers for job slot 13 */
+#define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
+#define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
+
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
+
+#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
+/* (RO) Extended affinity mask for job slot n*/
+#define JS_XAFFINITY           0x1C
+
+#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
+
+#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
+
+#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
+/* (RW) Next extended affinity mask for job slot n */
+#define JS_XAFFINITY_NEXT      0x5C
+
+#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+
+/* No JM-specific MMU control registers */
+/* No JM-specific MMU address space control registers */
+
+/* JS_COMMAND register commands */
+#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+
+/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
+#define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
+#define JS_CONFIG_START_FLUSH_CLEAN            (1u << 8)
+#define JS_CONFIG_START_FLUSH_INV_SHADER_OTHER (2u << 8)
+#define JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE (3u << 8)
+#define JS_CONFIG_START_MMU                    (1u << 10)
+#define JS_CONFIG_JOB_CHAIN_FLAG               (1u << 11)
+#define JS_CONFIG_END_FLUSH_NO_ACTION          JS_CONFIG_START_FLUSH_NO_ACTION
+#define JS_CONFIG_END_FLUSH_CLEAN              (1u << 12)
+#define JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE   (3u << 12)
+#define JS_CONFIG_ENABLE_FLUSH_REDUCTION       (1u << 14)
+#define JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK     (1u << 15)
+#define JS_CONFIG_THREAD_PRI(n)                ((n) << 16)
+
+/* JS_XAFFINITY register values */
+#define JS_XAFFINITY_XAFFINITY_ENABLE (1u << 0)
+#define JS_XAFFINITY_TILER_ENABLE     (1u << 8)
+#define JS_XAFFINITY_CACHE_ENABLE     (1u << 16)
+
+/* JS_STATUS register values */
+
+/* NOTE: Please keep this values in sync with enum base_jd_event_code in mali_base_kernel.h.
+ * The values are separated to avoid dependency of userspace and kernel code.
+ */
+
+/* Group of values representing the job status instead of a particular fault */
+#define JS_STATUS_NO_EXCEPTION_BASE   0x00
+#define JS_STATUS_INTERRUPTED         (JS_STATUS_NO_EXCEPTION_BASE + 0x02)	/* 0x02 means INTERRUPTED */
+#define JS_STATUS_STOPPED             (JS_STATUS_NO_EXCEPTION_BASE + 0x03)	/* 0x03 means STOPPED */
+#define JS_STATUS_TERMINATED          (JS_STATUS_NO_EXCEPTION_BASE + 0x04)	/* 0x04 means TERMINATED */
+
+/* General fault values */
+#define JS_STATUS_FAULT_BASE          0x40
+#define JS_STATUS_CONFIG_FAULT        (JS_STATUS_FAULT_BASE)	/* 0x40 means CONFIG FAULT */
+#define JS_STATUS_POWER_FAULT         (JS_STATUS_FAULT_BASE + 0x01)	/* 0x41 means POWER FAULT */
+#define JS_STATUS_READ_FAULT          (JS_STATUS_FAULT_BASE + 0x02)	/* 0x42 means READ FAULT */
+#define JS_STATUS_WRITE_FAULT         (JS_STATUS_FAULT_BASE + 0x03)	/* 0x43 means WRITE FAULT */
+#define JS_STATUS_AFFINITY_FAULT      (JS_STATUS_FAULT_BASE + 0x04)	/* 0x44 means AFFINITY FAULT */
+#define JS_STATUS_BUS_FAULT           (JS_STATUS_FAULT_BASE + 0x08)	/* 0x48 means BUS FAULT */
+
+/* Instruction or data faults */
+#define JS_STATUS_INSTRUCTION_FAULT_BASE  0x50
+#define JS_STATUS_INSTR_INVALID_PC        (JS_STATUS_INSTRUCTION_FAULT_BASE)	/* 0x50 means INSTR INVALID PC */
+#define JS_STATUS_INSTR_INVALID_ENC       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x01)	/* 0x51 means INSTR INVALID ENC */
+#define JS_STATUS_INSTR_TYPE_MISMATCH     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x02)	/* 0x52 means INSTR TYPE MISMATCH */
+#define JS_STATUS_INSTR_OPERAND_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x03)	/* 0x53 means INSTR OPERAND FAULT */
+#define JS_STATUS_INSTR_TLS_FAULT         (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x04)	/* 0x54 means INSTR TLS FAULT */
+#define JS_STATUS_INSTR_BARRIER_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x05)	/* 0x55 means INSTR BARRIER FAULT */
+#define JS_STATUS_INSTR_ALIGN_FAULT       (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x06)	/* 0x56 means INSTR ALIGN FAULT */
+/* NOTE: No fault with 0x57 code defined in spec. */
+#define JS_STATUS_DATA_INVALID_FAULT      (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x08)	/* 0x58 means DATA INVALID FAULT */
+#define JS_STATUS_TILE_RANGE_FAULT        (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x09)	/* 0x59 means TILE RANGE FAULT */
+#define JS_STATUS_ADDRESS_RANGE_FAULT     (JS_STATUS_INSTRUCTION_FAULT_BASE + 0x0A)	/* 0x5A means ADDRESS RANGE FAULT */
+
+/* Other faults */
+#define JS_STATUS_MEMORY_FAULT_BASE   0x60
+#define JS_STATUS_OUT_OF_MEMORY       (JS_STATUS_MEMORY_FAULT_BASE)	/* 0x60 means OUT OF MEMORY */
+#define JS_STATUS_UNKNOWN             0x7F	/* 0x7F means UNKNOWN */
+
+/* JS<n>_FEATURES register */
+#define JS_FEATURE_NULL_JOB              (1u << 1)
+#define JS_FEATURE_SET_VALUE_JOB         (1u << 2)
+#define JS_FEATURE_CACHE_FLUSH_JOB       (1u << 3)
+#define JS_FEATURE_COMPUTE_JOB           (1u << 4)
+#define JS_FEATURE_VERTEX_JOB            (1u << 5)
+#define JS_FEATURE_GEOMETRY_JOB          (1u << 6)
+#define JS_FEATURE_TILER_JOB             (1u << 7)
+#define JS_FEATURE_FUSED_JOB             (1u << 8)
+#define JS_FEATURE_FRAGMENT_JOB          (1u << 9)
+
+/* JM_CONFIG register */
+#define JM_TIMESTAMP_OVERRIDE  (1ul << 0)
+#define JM_CLOCK_GATE_OVERRIDE (1ul << 1)
+#define JM_JOB_THROTTLE_ENABLE (1ul << 2)
+#define JM_JOB_THROTTLE_LIMIT_SHIFT (3)
+#define JM_MAX_JOB_THROTTLE_LIMIT (0x3F)
+#define JM_FORCE_COHERENCY_FEATURES_SHIFT (2)
+
+/* GPU_COMMAND values */
+#define GPU_COMMAND_NOP                0x00 /* No operation, nothing happens */
+#define GPU_COMMAND_SOFT_RESET         0x01 /* Stop all external bus interfaces, and then reset the entire GPU. */
+#define GPU_COMMAND_HARD_RESET         0x02 /* Immediately reset the entire GPU. */
+#define GPU_COMMAND_PRFCNT_CLEAR       0x03 /* Clear all performance counters, setting them all to zero. */
+#define GPU_COMMAND_PRFCNT_SAMPLE      0x04 /* Sample all performance counters, writing them out to memory */
+#define GPU_COMMAND_CYCLE_COUNT_START  0x05 /* Starts the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CYCLE_COUNT_STOP   0x06 /* Stops the cycle counter, and system timestamp propagation */
+#define GPU_COMMAND_CLEAN_CACHES       0x07 /* Clean all caches */
+#define GPU_COMMAND_CLEAN_INV_CACHES   0x08 /* Clean and invalidate all caches */
+#define GPU_COMMAND_SET_PROTECTED_MODE 0x09 /* Places the GPU in protected mode */
+
+/* GPU_COMMAND cache flush alias to CSF command payload */
+#define GPU_COMMAND_CACHE_CLN_INV_L2 GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_L2_LSC GPU_COMMAND_CLEAN_INV_CACHES
+#define GPU_COMMAND_CACHE_CLN_INV_FULL GPU_COMMAND_CLEAN_INV_CACHES
+
+/* Merge cache flush commands */
+#define GPU_COMMAND_FLUSH_CACHE_MERGE(cmd1, cmd2)                              \
+	((cmd1) > (cmd2) ? (cmd1) : (cmd2))
+
+/* IRQ flags */
+#define GPU_FAULT               (1 << 0)    /* A GPU Fault has occurred */
+#define MULTIPLE_GPU_FAULTS     (1 << 7)    /* More than one GPU Fault occurred.  */
+#define RESET_COMPLETED         (1 << 8)    /* Set when a reset has completed.  */
+#define POWER_CHANGED_SINGLE    (1 << 9)    /* Set when a single core has finished powering up or down. */
+#define POWER_CHANGED_ALL       (1 << 10)   /* Set when all cores have finished powering up or down. */
+#define PRFCNT_SAMPLE_COMPLETED (1 << 16)   /* Set when a performance count sample has completed. */
+#define CLEAN_CACHES_COMPLETED  (1 << 17)   /* Set when a cache clean operation has completed. */
+
+/*
+ * In Debug build,
+ * GPU_IRQ_REG_COMMON | POWER_CHANGED_SINGLE is used to clear and enable interupts sources of GPU_IRQ
+ * by writing it onto GPU_IRQ_CLEAR/MASK registers.
+ *
+ * In Release build,
+ * GPU_IRQ_REG_COMMON is used.
+ *
+ * Note:
+ * CLEAN_CACHES_COMPLETED - Used separately for cache operation.
+ */
+#define GPU_IRQ_REG_COMMON (GPU_FAULT | MULTIPLE_GPU_FAULTS | RESET_COMPLETED \
+		| POWER_CHANGED_ALL | PRFCNT_SAMPLE_COMPLETED)
+
+#endif /* _KBASE_GPU_REGMAP_JM_H_ */
diff --git a/mali_kbase/gpu/mali_kbase_gpu_fault.h b/mali_kbase/gpu/mali_kbase_gpu_fault.h
index d1e9f77..8b50a5d 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_fault.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,8 +23,8 @@
 #define _KBASE_GPU_FAULT_H_
 
 /**
- * kbase_gpu_exception_name() -
- * Returns the name associated with a Mali exception code
+ * kbase_gpu_exception_name() - Returns associated string of the exception code
+ *
  * @exception_code: exception code
  *
  * This function is called from the interrupt handler when a GPU fault occurs.
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index 47e7781..1d2a49b 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -23,6 +23,565 @@
 #define _KBASE_GPU_REGMAP_H_
 
 #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h>
+#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h>
+#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h>
+#if MALI_USE_CSF
+#include "backend/mali_kbase_gpu_regmap_csf.h"
+#else
+#include "backend/mali_kbase_gpu_regmap_jm.h"
+#endif
+
+/* GPU_U definition */
+#ifdef __ASSEMBLER__
+#define GPU_U(x) x
+#else
+#define GPU_U(x) x##u
+#endif /* __ASSEMBLER__ */
+
+/* Begin Register Offsets */
+/* GPU control registers */
+
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
+#define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
+#define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
+#define MEM_FEATURES            0x010   /* (RO) Memory system features */
+#define MMU_FEATURES            0x014   /* (RO) MMU features */
+#define AS_PRESENT              0x018   /* (RO) Address space slots present */
+#define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
+#define GPU_IRQ_MASK            0x028   /* (RW) */
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
+
+#define GPU_COMMAND             0x030   /* (WO) */
+#define GPU_STATUS              0x034   /* (RO) */
+
+#define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
+
+#define GPU_FAULTSTATUS         0x03C   /* (RO) GPU exception type and fault status */
+#define GPU_FAULTADDRESS_LO     0x040   /* (RO) GPU exception fault address, low word */
+#define GPU_FAULTADDRESS_HI     0x044   /* (RO) GPU exception fault address, high word */
+
+#define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
+
+#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
+#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
+					  * supergroup are l2 coherent
+					  */
+
+#define PWR_KEY                 0x050   /* (WO) Power manager key register */
+#define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
+#define PWR_OVERRIDE1           0x058   /* (RW) Power manager override settings */
+#define GPU_FEATURES_LO         0x060   /* (RO) GPU features, low word */
+#define GPU_FEATURES_HI         0x064   /* (RO) GPU features, high word */
+#define PRFCNT_FEATURES         0x068   /* (RO) Performance counter features */
+#define TIMESTAMP_OFFSET_LO     0x088   /* (RW) Global time stamp offset, low word */
+#define TIMESTAMP_OFFSET_HI     0x08C   /* (RW) Global time stamp offset, high word */
+#define CYCLE_COUNT_LO          0x090   /* (RO) Cycle counter, low word */
+#define CYCLE_COUNT_HI          0x094   /* (RO) Cycle counter, high word */
+#define TIMESTAMP_LO            0x098   /* (RO) Global time stamp counter, low word */
+#define TIMESTAMP_HI            0x09C   /* (RO) Global time stamp counter, high word */
+
+#define THREAD_MAX_THREADS      0x0A0   /* (RO) Maximum number of threads per core */
+#define THREAD_MAX_WORKGROUP_SIZE 0x0A4 /* (RO) Maximum workgroup size */
+#define THREAD_MAX_BARRIER_SIZE 0x0A8   /* (RO) Maximum threads waiting at a barrier */
+#define THREAD_FEATURES         0x0AC   /* (RO) Thread features */
+#define THREAD_TLS_ALLOC        0x310   /* (RO) Number of threads per core that TLS must be allocated for */
+
+#define TEXTURE_FEATURES_0      0x0B0   /* (RO) Support flags for indexed texture formats 0..31 */
+#define TEXTURE_FEATURES_1      0x0B4   /* (RO) Support flags for indexed texture formats 32..63 */
+#define TEXTURE_FEATURES_2      0x0B8   /* (RO) Support flags for indexed texture formats 64..95 */
+#define TEXTURE_FEATURES_3      0x0BC   /* (RO) Support flags for texture order */
+
+#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
+
+#define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
+#define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
+
+#define TILER_PRESENT_LO        0x110   /* (RO) Tiler core present bitmap, low word */
+#define TILER_PRESENT_HI        0x114   /* (RO) Tiler core present bitmap, high word */
+
+#define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
+#define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
+
+#define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
+#define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
+
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
+#define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
+#define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
+#define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
+
+#define SHADER_PWROFF_LO        0x1C0   /* (WO) Shader core power off bitmap, low word */
+#define SHADER_PWROFF_HI        0x1C4   /* (WO) Shader core power off bitmap, high word */
+
+#define TILER_PWROFF_LO         0x1D0   /* (WO) Tiler core power off bitmap, low word */
+#define TILER_PWROFF_HI         0x1D4   /* (WO) Tiler core power off bitmap, high word */
+
+#define L2_PWROFF_LO            0x1E0   /* (WO) Level 2 cache power off bitmap, low word */
+#define L2_PWROFF_HI            0x1E4   /* (WO) Level 2 cache power off bitmap, high word */
+
+#define STACK_PWROFF_LO         0xE30   /* (RO) Core stack power off bitmap, low word */
+#define STACK_PWROFF_HI         0xE34   /* (RO) Core stack power off bitmap, high word */
+
+#define SHADER_PWRTRANS_LO      0x200   /* (RO) Shader core power transition bitmap, low word */
+#define SHADER_PWRTRANS_HI      0x204   /* (RO) Shader core power transition bitmap, high word */
+
+#define TILER_PWRTRANS_LO       0x210   /* (RO) Tiler core power transition bitmap, low word */
+#define TILER_PWRTRANS_HI       0x214   /* (RO) Tiler core power transition bitmap, high word */
+
+#define L2_PWRTRANS_LO          0x220   /* (RO) Level 2 cache power transition bitmap, low word */
+#define L2_PWRTRANS_HI          0x224   /* (RO) Level 2 cache power transition bitmap, high word */
+
+#define ASN_HASH_0              0x02C0 /* (RW) ASN hash function argument 0 */
+#define ASN_HASH(n)             (ASN_HASH_0 + (n)*4)
+#define ASN_HASH_COUNT          3
+
+#define SYSC_ALLOC0             0x0340 /* (RW) System cache allocation hint from source ID */
+#define SYSC_ALLOC(n) (SYSC_ALLOC0 + (n)*4)
+#define SYSC_ALLOC_COUNT 8
+
+#define STACK_PWRTRANS_LO       0xE40   /* (RO) Core stack power transition bitmap, low word */
+#define STACK_PWRTRANS_HI       0xE44   /* (RO) Core stack power transition bitmap, high word */
+
+#define SHADER_PWRACTIVE_LO     0x240   /* (RO) Shader core active bitmap, low word */
+#define SHADER_PWRACTIVE_HI     0x244   /* (RO) Shader core active bitmap, high word */
+
+#define TILER_PWRACTIVE_LO      0x250   /* (RO) Tiler core active bitmap, low word */
+#define TILER_PWRACTIVE_HI      0x254   /* (RO) Tiler core active bitmap, high word */
+
+#define L2_PWRACTIVE_LO         0x260   /* (RO) Level 2 cache active bitmap, low word */
+#define L2_PWRACTIVE_HI         0x264   /* (RO) Level 2 cache active bitmap, high word */
+
+#define COHERENCY_FEATURES      0x300   /* (RO) Coherency features present */
+#define COHERENCY_ENABLE        0x304   /* (RW) Coherency enable */
+
+
+#define SHADER_CONFIG           0xF04   /* (RW) Shader core configuration (implementation-specific) */
+#define TILER_CONFIG            0xF08   /* (RW) Tiler core configuration (implementation-specific) */
+#define L2_MMU_CONFIG           0xF0C   /* (RW) L2 cache and MMU configuration (implementation-specific) */
+
+/* Job control registers */
+
+#define JOB_CONTROL_BASE        0x1000
+
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
+#define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
+
+/* MMU control registers */
+
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
+#define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
+#define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
+#define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
+#define MMU_AS4                 0x500   /* Configuration registers for address space 4 */
+#define MMU_AS5                 0x540   /* Configuration registers for address space 5 */
+#define MMU_AS6                 0x580   /* Configuration registers for address space 6 */
+#define MMU_AS7                 0x5C0   /* Configuration registers for address space 7 */
+#define MMU_AS8                 0x600   /* Configuration registers for address space 8 */
+#define MMU_AS9                 0x640   /* Configuration registers for address space 9 */
+#define MMU_AS10                0x680   /* Configuration registers for address space 10 */
+#define MMU_AS11                0x6C0   /* Configuration registers for address space 11 */
+#define MMU_AS12                0x700   /* Configuration registers for address space 12 */
+#define MMU_AS13                0x740   /* Configuration registers for address space 13 */
+#define MMU_AS14                0x780   /* Configuration registers for address space 14 */
+#define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
+
+/* MMU address space control registers */
+
+#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
+
+#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
+#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
+
+/* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_LO         0x30
+/* (RW) Translation table configuration for address space n, high word */
+#define AS_TRANSCFG_HI         0x34
+/* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_LO       0x38
+/* (RO) Secondary fault address for address space n, high word */
+#define AS_FAULTEXTRA_HI       0x3C
+
+/* End Register Offsets */
+
+#define GPU_IRQ_REG_ALL (GPU_IRQ_REG_COMMON)
+
+/*
+ * MMU_IRQ_RAWSTAT register values. Values are valid also for
+ * MMU_IRQ_CLEAR, MMU_IRQ_MASK, MMU_IRQ_STATUS registers.
+ */
+
+#define MMU_PAGE_FAULT_FLAGS    16
+
+/* Macros returning a bitmask to retrieve page fault or bus error flags from
+ * MMU registers
+ */
+#define MMU_PAGE_FAULT(n)       (1UL << (n))
+#define MMU_BUS_ERROR(n)        (1UL << ((n) + MMU_PAGE_FAULT_FLAGS))
+
+/*
+ * Begin AARCH64 MMU TRANSTAB register values
+ */
+#define MMU_HW_OUTA_BITS 40
+#define AS_TRANSTAB_BASE_MASK ((1ULL << MMU_HW_OUTA_BITS) - (1ULL << 4))
+
+/*
+ * Begin MMU STATUS register values
+ */
+#define AS_STATUS_AS_ACTIVE 0x01
+
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MASK                      (0x7<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT         (0x0<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT          (0x1<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT        (0x2<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG               (0x3<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT        (0x4<<3)
+#define AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT   (0x5<<3)
+
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT 0
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_MASK (0xFF << AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_EXCEPTION_TYPE_MASK) >> AS_FAULTSTATUS_EXCEPTION_TYPE_SHIFT)
+#define AS_FAULTSTATUS_EXCEPTION_TYPE_TRANSLATION_FAULT_0 0xC0
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_SHIFT 8
+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+#define AS_FAULTSTATUS_ACCESS_TYPE_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
+
+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
+
+#define AS_FAULTSTATUS_SOURCE_ID_SHIFT 16
+#define AS_FAULTSTATUS_SOURCE_ID_MASK (0xFFFF << AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+#define AS_FAULTSTATUS_SOURCE_ID_GET(reg_val) \
+	(((reg_val)&AS_FAULTSTATUS_SOURCE_ID_MASK) >> AS_FAULTSTATUS_SOURCE_ID_SHIFT)
+
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT (0)
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK                                \
+	((0xFF) << PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
+#define PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(reg_val)                        \
+	(((reg_val)&PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_MASK) >>                \
+	 PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_SHIFT)
+
+/*
+ * Begin MMU TRANSCFG register values
+ */
+#define AS_TRANSCFG_ADRMODE_LEGACY      0
+#define AS_TRANSCFG_ADRMODE_UNMAPPED    1
+#define AS_TRANSCFG_ADRMODE_IDENTITY    2
+#define AS_TRANSCFG_ADRMODE_AARCH64_4K  6
+#define AS_TRANSCFG_ADRMODE_AARCH64_64K 8
+
+#define AS_TRANSCFG_ADRMODE_MASK        0xF
+
+/*
+ * Begin TRANSCFG register values
+ */
+#define AS_TRANSCFG_PTW_MEMATTR_MASK (3ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_NON_CACHEABLE (1ull << 24)
+#define AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK (2ull << 24)
+
+#define AS_TRANSCFG_PTW_SH_MASK ((3ull << 28))
+#define AS_TRANSCFG_PTW_SH_OS (2ull << 28)
+#define AS_TRANSCFG_PTW_SH_IS (3ull << 28)
+#define AS_TRANSCFG_R_ALLOCATE (1ull << 30)
+
+/*
+ * Begin Command Values
+ */
+
+/* AS_COMMAND register commands */
+#define AS_COMMAND_NOP         0x00	/* NOP Operation */
+#define AS_COMMAND_UPDATE      0x01	/* Broadcasts the values in AS_TRANSTAB and ASn_MEMATTR to all MMUs */
+#define AS_COMMAND_LOCK        0x02	/* Issue a lock region command to all MMUs */
+#define AS_COMMAND_UNLOCK      0x03	/* Issue a flush region command to all MMUs */
+/* Flush all L2 caches then issue a flush region command to all MMUs */
+#define AS_COMMAND_FLUSH_PT 0x04
+/* Wait for memory accesses to complete, flush all the L1s cache then flush all
+ * L2 caches then issue a flush region command to all MMUs
+ */
+#define AS_COMMAND_FLUSH_MEM 0x05
+
+/* AS_LOCKADDR register */
+#define AS_LOCKADDR_LOCKADDR_SIZE_SHIFT GPU_U(0)
+#define AS_LOCKADDR_LOCKADDR_SIZE_MASK                                         \
+	(GPU_U(0x3F) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_GET(reg_val)                                 \
+	(((reg_val)&AS_LOCKADDR_LOCKADDR_SIZE_MASK) >>                               \
+	 AS_LOCKADDR_LOCKADDR_SIZE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_SIZE_SET(reg_val, value)                          \
+	(((reg_val) & ~AS_LOCKADDR_LOCKADDR_SIZE_MASK) |                             \
+	 (((value) << AS_LOCKADDR_LOCKADDR_SIZE_SHIFT) &                             \
+	 AS_LOCKADDR_LOCKADDR_SIZE_MASK))
+#define AS_LOCKADDR_LOCKADDR_BASE_SHIFT GPU_U(12)
+#define AS_LOCKADDR_LOCKADDR_BASE_MASK                                         \
+	(GPU_U(0xFFFFFFFFFFFFF) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_GET(reg_val)                                 \
+	(((reg_val)&AS_LOCKADDR_LOCKADDR_BASE_MASK) >>                               \
+	 AS_LOCKADDR_LOCKADDR_BASE_SHIFT)
+#define AS_LOCKADDR_LOCKADDR_BASE_SET(reg_val, value)                          \
+	(((reg_val) & ~AS_LOCKADDR_LOCKADDR_BASE_MASK) |                             \
+	 (((value) << AS_LOCKADDR_LOCKADDR_BASE_SHIFT) &                             \
+	 AS_LOCKADDR_LOCKADDR_BASE_MASK))
+
+/* GPU_STATUS values */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
+
+/* PRFCNT_CONFIG register values */
+#define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
+#define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
+#define PRFCNT_CONFIG_SETSELECT_SHIFT   8 /* Set select position. */
+
+/* The performance counters are disabled. */
+#define PRFCNT_CONFIG_MODE_OFF          0
+/* The performance counters are enabled, but are only written out when a
+ * PRFCNT_SAMPLE command is issued using the GPU_COMMAND register.
+ */
+#define PRFCNT_CONFIG_MODE_MANUAL       1
+/* The performance counters are enabled, and are written out each time a tile
+ * finishes rendering.
+ */
+#define PRFCNT_CONFIG_MODE_TILE         2
+
+/* AS<n>_MEMATTR values from MMU_MEMATTR_STAGE1: */
+/* Use GPU implementation-defined caching policy. */
+#define AS_MEMATTR_IMPL_DEF_CACHE_POLICY 0x88ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_FORCE_TO_CACHE_ALL    0x8Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_WRITE_ALLOC           0x8Dull
+
+/* Use GPU implementation-defined  caching policy. */
+#define AS_MEMATTR_LPAE_IMPL_DEF_CACHE_POLICY 0x48ull
+/* The attribute set to force all resources to be cached. */
+#define AS_MEMATTR_LPAE_FORCE_TO_CACHE_ALL    0x4Full
+/* Inner write-alloc cache setup, no outer caching */
+#define AS_MEMATTR_LPAE_WRITE_ALLOC           0x4Dull
+/* Set to implementation defined, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_IMPL_DEF        0x88ull
+/* Set to write back memory, outer caching */
+#define AS_MEMATTR_LPAE_OUTER_WA              0x8Dull
+/* There is no LPAE support for non-cacheable, since the memory type is always
+ * write-back.
+ * Marking this setting as reserved for LPAE
+ */
+#define AS_MEMATTR_LPAE_NON_CACHEABLE_RESERVED
+
+/* L2_MMU_CONFIG register */
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT       (23)
+#define L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY             (0x1 << L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY_SHIFT)
+
+/* End L2_MMU_CONFIG register */
+
+/* THREAD_* registers */
+
+/* THREAD_FEATURES IMPLEMENTATION_TECHNOLOGY values */
+#define IMPLEMENTATION_UNSPECIFIED  0
+#define IMPLEMENTATION_SILICON      1
+#define IMPLEMENTATION_FPGA         2
+#define IMPLEMENTATION_MODEL        3
+
+/* Default values when registers are not supported by the implemented hardware */
+#define THREAD_MT_DEFAULT     256
+#define THREAD_MWS_DEFAULT    256
+#define THREAD_MBS_DEFAULT    256
+#define THREAD_MR_DEFAULT     1024
+#define THREAD_MTQ_DEFAULT    4
+#define THREAD_MTGS_DEFAULT   10
+
+/* End THREAD_* registers */
+
+/* SHADER_CONFIG register */
+#define SC_LS_ALLOW_ATTR_TYPES      (1ul << 16)
+#define SC_TLS_HASH_ENABLE          (1ul << 17)
+#define SC_LS_ATTR_CHECK_DISABLE    (1ul << 18)
+#define SC_VAR_ALGORITHM            (1ul << 29)
+/* End SHADER_CONFIG register */
+
+/* TILER_CONFIG register */
+#define TC_CLOCK_GATE_OVERRIDE      (1ul << 0)
+/* End TILER_CONFIG register */
+
+/* L2_CONFIG register */
+#define L2_CONFIG_SIZE_SHIFT        16
+#define L2_CONFIG_SIZE_MASK         (0xFFul << L2_CONFIG_SIZE_SHIFT)
+#define L2_CONFIG_HASH_SHIFT        24
+#define L2_CONFIG_HASH_MASK         (0xFFul << L2_CONFIG_HASH_SHIFT)
+#define L2_CONFIG_ASN_HASH_ENABLE_SHIFT        24
+#define L2_CONFIG_ASN_HASH_ENABLE_MASK         (1ul << L2_CONFIG_ASN_HASH_ENABLE_SHIFT)
+/* End L2_CONFIG register */
+
+
+/* IDVS_GROUP register */
+#define IDVS_GROUP_SIZE_SHIFT (16)
+#define IDVS_GROUP_MAX_SIZE (0x3F)
+
+/* SYSC_ALLOC read IDs */
+#define SYSC_ALLOC_ID_R_OTHER       0x00
+#define SYSC_ALLOC_ID_R_CSF         0x02
+#define SYSC_ALLOC_ID_R_MMU         0x04
+#define SYSC_ALLOC_ID_R_TILER_VERT  0x08
+#define SYSC_ALLOC_ID_R_TILER_PTR   0x09
+#define SYSC_ALLOC_ID_R_TILER_INDEX 0x0A
+#define SYSC_ALLOC_ID_R_TILER_OTHER 0x0B
+#define SYSC_ALLOC_ID_R_IC          0x10
+#define SYSC_ALLOC_ID_R_ATTR        0x11
+#define SYSC_ALLOC_ID_R_SCM         0x12
+#define SYSC_ALLOC_ID_R_FSDC        0x13
+#define SYSC_ALLOC_ID_R_VL          0x14
+#define SYSC_ALLOC_ID_R_PLR         0x15
+#define SYSC_ALLOC_ID_R_TEX         0x18
+#define SYSC_ALLOC_ID_R_LSC         0x1c
+
+/* SYSC_ALLOC write IDs */
+#define SYSC_ALLOC_ID_W_OTHER            0x00
+#define SYSC_ALLOC_ID_W_CSF              0x02
+#define SYSC_ALLOC_ID_W_PCB              0x07
+#define SYSC_ALLOC_ID_W_TILER_PTR        0x09
+#define SYSC_ALLOC_ID_W_TILER_VERT_PLIST 0x0A
+#define SYSC_ALLOC_ID_W_TILER_OTHER      0x0B
+#define SYSC_ALLOC_ID_W_L2_EVICT         0x0C
+#define SYSC_ALLOC_ID_W_L2_FLUSH         0x0D
+#define SYSC_ALLOC_ID_W_TIB_COLOR        0x10
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCH  0x11
+#define SYSC_ALLOC_ID_W_TIB_COLOR_AFBCB  0x12
+#define SYSC_ALLOC_ID_W_TIB_CRC          0x13
+#define SYSC_ALLOC_ID_W_TIB_DS           0x14
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCH     0x15
+#define SYSC_ALLOC_ID_W_TIB_DS_AFBCB     0x16
+#define SYSC_ALLOC_ID_W_LSC              0x1C
+
+/* SYSC_ALLOC values */
+#define SYSC_ALLOC_L2_ALLOC 0x0
+#define SYSC_ALLOC_NEVER_ALLOC 0x2
+#define SYSC_ALLOC_ALWAYS_ALLOC 0x3
+#define SYSC_ALLOC_PTL_ALLOC 0x4
+#define SYSC_ALLOC_L2_PTL_ALLOC 0x5
+
+/* SYSC_ALLOC register */
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT (0)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC0_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC0_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC0_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC0_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT (4)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC0_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC0_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC0_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC0_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC0_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC0 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT (8)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC1_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC1_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC1_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC1_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT (12)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC1_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC1_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC1_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC1_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC1_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC1 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT (16)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC2_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC2_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC2_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC2_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT (20)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC2_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC2_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC2_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC2_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC2_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC2 values */
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT (24)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_R_SYSC_ALLOC3_MASK) >>                          \
+	 SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_R_SYSC_ALLOC3_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_R_SYSC_ALLOC3_MASK) |                        \
+	 (((value) << SYSC_ALLOC_R_SYSC_ALLOC3_SHIFT) &                        \
+	  SYSC_ALLOC_R_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_R_SYSC_ALLOC3 values */
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT (28)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_MASK ((0xF) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_GET(reg_val)                                  \
+	(((reg_val)&SYSC_ALLOC_W_SYSC_ALLOC3_MASK) >>                          \
+	 SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT)
+#define SYSC_ALLOC_W_SYSC_ALLOC3_SET(reg_val, value)                           \
+	(((reg_val) & ~SYSC_ALLOC_W_SYSC_ALLOC3_MASK) |                        \
+	 (((value) << SYSC_ALLOC_W_SYSC_ALLOC3_SHIFT) &                        \
+	  SYSC_ALLOC_W_SYSC_ALLOC3_MASK))
+/* End of SYSC_ALLOC_W_SYSC_ALLOC3 values */
 
 /* Include POWER_CHANGED_SINGLE in debug builds for use in irq latency test. */
 #ifdef CONFIG_MALI_DEBUG
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index faf08ef..e1718c6 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -94,7 +94,10 @@ struct kbase_ipa_model_vinstr_data {
 struct kbase_ipa_group {
 	const char *name;
 	s32 default_value;
-	s64 (*op)(struct kbase_ipa_model_vinstr_data *, s32, u32);
+	s64 (*op)(
+		struct kbase_ipa_model_vinstr_data *model_data,
+		s32 coeff,
+		u32 counter_block_offset);
 	u32 counter_block_offset;
 };
 
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
index a47699c..66e56e2 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -115,8 +115,8 @@ static const struct kbase_ipa_counter ipa_top_level_cntrs_def_ttux[] = {
 };
 
 /* These tables provide a description of each performance counter
-  * used by the shader cores counter model for energy estimation.
-  */
+ * used by the shader cores counter model for energy estimation.
+ */
 static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_todx[] = {
 	SC_COUNTER_DEF("exec_instr_fma", 505449, EXEC_INSTR_FMA),
 	SC_COUNTER_DEF("tex_filt_num_operations", 574869, TEX_FILT_NUM_OPS),
@@ -150,7 +150,7 @@ static const struct kbase_ipa_counter ipa_shader_core_cntrs_def_ttux[] = {
 	SC_COUNTER_DEF("ls_mem_read_short", 322525, LS_MEM_READ_SHORT),
 	SC_COUNTER_DEF("full_quad_warps", 844124, FULL_QUAD_WARPS),
 	SC_COUNTER_DEF("exec_instr_cvt", 226411, EXEC_INSTR_CVT),
-	SC_COUNTER_DEF("frag_quads_ezs_update",372032, FRAG_QUADS_EZS_UPDATE),
+	SC_COUNTER_DEF("frag_quads_ezs_update", 372032, FRAG_QUADS_EZS_UPDATE),
 };
 
 #define IPA_POWER_MODEL_OPS(gpu, init_token) \
@@ -224,8 +224,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
 
 const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 	case GPU_ID2_PRODUCT_TODX:
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index e095986..f11be0d 100644
--- a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -111,20 +111,21 @@ static u32 kbase_g7x_power_model_get_sc_counter(struct kbase_ipa_model_vinstr_da
 
 /**
  * memsys_single_counter() - calculate energy for a single Memory System performance counter.
- * @model_data:   pointer to GPU model data.
- * @coeff:        default value of coefficient for IPA group.
- * @offset:       offset in bytes of the counter inside the block it belongs to.
+ * @model_data:            pointer to GPU model data.
+ * @coeff:                 default value of coefficient for IPA group.
+ * @counter_block_offset:  offset in bytes of the counter inside the block it belongs to.
  *
  * Return: Energy estimation for a single Memory System performance counter.
  */
 static s64 kbase_g7x_sum_all_memsys_blocks(
 		struct kbase_ipa_model_vinstr_data *model_data,
 		s32 coeff,
-		u32 offset)
+		u32 counter_block_offset)
 {
 	u32 counter;
 
-	counter = kbase_g7x_power_model_get_memsys_counter(model_data, offset);
+	counter = kbase_g7x_power_model_get_memsys_counter(model_data,
+						     counter_block_offset);
 	return kbase_ipa_sum_all_memsys_blocks(model_data, coeff, counter);
 }
 
@@ -531,8 +532,8 @@ const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
 
 const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	const u32 prod_id =
+		(gpu_id & GPU_ID_VERSION_PRODUCT_ID) >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
 	case GPU_ID2_PRODUCT_TMIX:
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index c0c0cbb..428e68b 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -71,7 +71,7 @@ KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
 
 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
-	const char* model_name =
+	const char *model_name =
 		kbase_ipa_counter_model_name_from_id(gpu_id);
 
 	if (!model_name)
@@ -610,7 +610,7 @@ static unsigned long kbase_get_dynamic_power(unsigned long freq,
 
 		/* Here unlike kbase_get_real_power(), shader core frequency is
 		 * used for the scaling as simple power model is used to obtain
-		 * the value of dynamic coefficient (which is is a fixed value
+		 * the value of dynamic coefficient (which is a fixed value
 		 * retrieved from the device tree).
 		 */
 		power += kbase_scale_dynamic_power(
diff --git a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
index 14df542..d554fff 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
@@ -128,8 +128,14 @@ static ssize_t param_string_set(struct file *file, const char __user *user_buf,
 
 	err = kbase_ipa_model_recalculate(model);
 	if (err < 0) {
+		u32 string_len = strscpy(param->addr.str, old_str, param->size);
+
+		string_len += sizeof(char);
+		/* Make sure that the source string fit into the buffer. */
+		KBASE_DEBUG_ASSERT(string_len <= param->size);
+		CSTD_UNUSED(string_len);
+
 		ret = err;
-		strlcpy(param->addr.str, old_str, param->size);
 	}
 
 end:
@@ -275,7 +281,7 @@ static void kbase_ipa_model_debugfs_init(struct kbase_ipa_model *model)
 				"Type not set for %s parameter %s\n",
 				model->ops->name, param->name);
 		} else {
-			debugfs_create_file(param->name, S_IRUGO | S_IWUSR,
+			debugfs_create_file(param->name, 0644,
 					    dir, param, fops);
 		}
 	}
diff --git a/mali_kbase/ipa/mali_kbase_ipa_simple.c b/mali_kbase/ipa/mali_kbase_ipa_simple.c
index 55f1d1c..fadae7d 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_simple.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_simple.c
@@ -302,8 +302,12 @@ static int kbase_simple_power_model_recalculate(struct kbase_ipa_model *model)
 		model_data->gpu_tz = NULL;
 	} else {
 		char tz_name[THERMAL_NAME_LENGTH];
+		u32 string_len = strscpy(tz_name, model_data->tz_name, sizeof(tz_name));
 
-		strlcpy(tz_name, model_data->tz_name, sizeof(tz_name));
+		string_len += sizeof(char);
+		/* Make sure that the source string fit into the buffer. */
+		KBASE_DEBUG_ASSERT(string_len <= sizeof(tz_name));
+		CSTD_UNUSED(string_len);
 
 		/* Release ipa.lock so that thermal_list_lock is not acquired
 		 * with ipa.lock held, thereby avoid lock ordering violation
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index ac8f89b..13da5e3 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -124,6 +124,18 @@
 /* Reset the GPU after each atom completion */
 #define KBASE_SERIALIZE_RESET (1 << 2)
 
+/**
+ * enum kbase_timeout_selector - The choice of which timeout to get scaled
+ *                               using the lowest GPU frequency.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
+ *                                the enum.
+ */
+enum kbase_timeout_selector {
+
+	/* Must be the last in the enum */
+	KBASE_TIMEOUT_SELECTOR_COUNT
+};
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 /**
  * struct base_job_fault_event - keeps track of the atom which faulted or which
@@ -653,11 +665,12 @@ static inline bool kbase_jd_katom_is_protected(
 
 /**
  * kbase_atom_is_younger - query if one atom is younger by age than another
+ *
  * @katom_a: the first atom
- * @katom_a: the second atom
+ * @katom_b: the second atom
  *
- * Return: true if the first atom is strictly younger than the second, false
- * otherwise.
+ * Return: true if the first atom is strictly younger than the second,
+ *         false otherwise.
  */
 static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
 					    const struct kbase_jd_atom *katom_b)
@@ -666,7 +679,9 @@ static inline bool kbase_jd_atom_is_younger(const struct kbase_jd_atom *katom_a,
 }
 
 /**
- * kbase_jd_atom_is_earlier
+ * kbase_jd_atom_is_earlier - Check whether the first atom has been submitted
+ *                            earlier than the second one
+ *
  * @katom_a: the first atom
  * @katom_b: the second atom
  *
@@ -730,17 +745,13 @@ static inline bool kbase_jd_atom_is_earlier(const struct kbase_jd_atom *katom_a,
  * A state machine is used to control incremental rendering.
  */
 enum kbase_jd_renderpass_state {
-	KBASE_JD_RP_COMPLETE,       /* COMPLETE => START */
-	KBASE_JD_RP_START,          /* START => PEND_OOM or COMPLETE */
-	KBASE_JD_RP_PEND_OOM,       /* PEND_OOM => OOM or COMPLETE */
-	KBASE_JD_RP_OOM,            /* OOM => RETRY */
-	KBASE_JD_RP_RETRY,          /* RETRY => RETRY_PEND_OOM or
-				     *          COMPLETE
-				     */
-	KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or
-				     *                   COMPLETE
-				     */
-	KBASE_JD_RP_RETRY_OOM,      /* RETRY_OOM => RETRY */
+	KBASE_JD_RP_COMPLETE, /* COMPLETE => START */
+	KBASE_JD_RP_START, /* START => PEND_OOM or COMPLETE */
+	KBASE_JD_RP_PEND_OOM, /* PEND_OOM => OOM or COMPLETE */
+	KBASE_JD_RP_OOM, /* OOM => RETRY */
+	KBASE_JD_RP_RETRY, /* RETRY => RETRY_PEND_OOM or COMPLETE */
+	KBASE_JD_RP_RETRY_PEND_OOM, /* RETRY_PEND_OOM => RETRY_OOM or COMPLETE */
+	KBASE_JD_RP_RETRY_OOM /* RETRY_OOM => RETRY */
 };
 
 /**
@@ -813,7 +824,7 @@ struct kbase_jd_renderpass {
  *                            atom completes
  *                            execution on GPU or the input fence get signaled.
  * @tb_lock:                  Lock to serialize the write access made to @tb to
- *                            to store the register access trace messages.
+ *                            store the register access trace messages.
  * @tb:                       Pointer to the Userspace accessible buffer storing
  *                            the trace messages for register read/write
  *                            accesses made by the Kbase. The buffer is filled
diff --git a/mali_kbase/jm/mali_kbase_jm_js.h b/mali_kbase/jm/mali_kbase_jm_js.h
index 5a972a5..f01e8bb 100644
--- a/mali_kbase/jm/mali_kbase_jm_js.h
+++ b/mali_kbase/jm/mali_kbase_jm_js.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,8 @@
  * The struct kbasep_js_device_data sub-structure of kbdev must be zero
  * initialized before passing to the kbasep_js_devdata_init() function. This is
  * to give efficient error path code.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbasep_js_devdata_init(struct kbase_device * const kbdev);
 
@@ -86,6 +88,8 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev);
  *
  * The struct kbase_context must be zero initialized before passing to the
  * kbase_js_init() function. This is to give efficient error path code.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbasep_js_kctx_init(struct kbase_context *const kctx);
 
@@ -206,7 +210,7 @@ bool kbasep_js_add_job(struct kbase_context *kctx, struct kbase_jd_atom *atom);
  * @kbdev: The kbase_device to operate on
  * @kctx:  The kbase_context to operate on
  * @atom: Atom to remove
-*
+ *
  * Completely removing a job requires several calls:
  * * kbasep_js_copy_atom_retained_state(), to capture the 'retained state' of
  *   the atom
@@ -356,9 +360,10 @@ void kbasep_js_runpool_release_ctx(struct kbase_device *kbdev,
 		struct kbase_context *kctx);
 
 /**
- * kbasep_js_runpool_release_ctx_and_katom_retained_state -  Variant of
+ * kbasep_js_runpool_release_ctx_and_katom_retained_state - Variant of
  * kbasep_js_runpool_release_ctx() that handles additional
  * actions from completing an atom.
+ *
  * @kbdev:                KBase device
  * @kctx:                 KBase context
  * @katom_retained_state: Retained state from the atom
@@ -381,8 +386,8 @@ void kbasep_js_runpool_release_ctx_and_katom_retained_state(
 		struct kbasep_js_atom_retained_state *katom_retained_state);
 
 /**
- * kbasep_js_runpool_release_ctx_nolock -
- * Variant of kbase_js_runpool_release_ctx() w/out locks
+ * kbasep_js_runpool_release_ctx_nolock - Variant of kbase_js_runpool_release_ctx()
+ *                                        without locks
  * @kbdev: KBase device
  * @kctx:  KBase context
  *
@@ -396,6 +401,7 @@ void kbasep_js_runpool_release_ctx_nolock(struct kbase_device *kbdev,
 
 /**
  * kbasep_js_schedule_privileged_ctx -  Schedule in a privileged context
+ *
  * @kbdev: KBase device
  * @kctx:  KBase context
  *
@@ -459,7 +465,7 @@ void kbase_js_try_run_jobs(struct kbase_device *kbdev);
  * contexts from (re)entering the runpool.
  *
  * This does not handle suspending the one privileged context: the caller must
- * instead do this by by suspending the GPU HW Counter Instrumentation.
+ * instead do this by suspending the GPU HW Counter Instrumentation.
  *
  * This will eventually cause all Power Management active references held by
  * contexts on the runpool to be released, without running any more atoms.
@@ -688,6 +694,8 @@ void kbase_js_update_ctx_priority(struct kbase_context *kctx);
  * As with any bool, never test the return value with true.
  *
  * The caller must hold hwaccess_lock.
+ *
+ * Return: true if the context is allowed to submit jobs, false otherwise.
  */
 static inline bool kbasep_js_is_submit_allowed(
 		struct kbasep_js_device_data *js_devdata,
@@ -768,8 +776,9 @@ static inline void kbasep_js_clear_submit_allowed(
 }
 
 /**
- * kbasep_js_atom_retained_state_init_invalid -
- * Create an initial 'invalid' atom retained state
+ * kbasep_js_atom_retained_state_init_invalid - Create an initial 'invalid'
+ *                                              atom retained state
+ *
  * @retained_state: pointer where to create and initialize the state
  *
  * Create an initial 'invalid' atom retained state, that requires no
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index a1d40ba..652f383 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -55,10 +55,11 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
  * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
  *                              Compute jobs.
  * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
- * 	Non-Compute jobs.
+ *                                  Non-Compute jobs.
  * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
- * 	contains compute-job atoms that aren't restricted to a coherent group,
- * 	and can run on all cores.
+ *                                        contains compute-job atoms that aren't
+ *                                        restricted to a coherent group,
+ *                                        and can run on all cores.
  * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
  *
  * Each context attribute can be thought of as a boolean value that caches some
@@ -115,7 +116,6 @@ typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
  * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
  * enough to handle anyway.
  *
- *
  */
 enum kbasep_js_ctx_attr {
 	KBASEP_JS_CTX_ATTR_COMPUTE,
@@ -217,44 +217,46 @@ typedef u32 kbase_atom_ordering_flag_t;
 /**
  * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
  * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
- *	IRQ context. The hwaccess_lock must be held when accessing.
+ *               IRQ context. The hwaccess_lock must be held when accessing.
  * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
- * 	scheduled context is allowed to submit jobs. When bit 'N' is set in
- * 	this, it indicates whether the context bound to address space 'N' is
- * 	allowed to submit jobs.
+ *                              scheduled context is allowed to submit jobs.
+ *                              When bit 'N' is set in this, it indicates whether
+ *                              the context bound to address space 'N' is
+ *                              allowed to submit jobs.
  * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
- * 	  Each is large enough to hold a refcount of the number of contexts
- * 	that can fit into the runpool. This is currently BASE_MAX_NR_AS.
- * 	  Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
- * 	the refcount. Hence, it's not worthwhile reducing this to
- * 	bit-manipulation on u32s to save space (where in contrast, 4 bit
- * 	sub-fields would be easy to do and would save space).
- * 	  Whilst this must not become negative, the sign bit is used for:
- * 	- error detection in debug builds
- * 	- Optimization: it is undefined for a signed int to overflow, and so
- * 	the compiler can optimize for that never happening (thus, no masking
- * 	is required on updating the variable)
+ *     Each is large enough to hold a refcount of the number of contexts
+ *     that can fit into the runpool. This is currently BASE_MAX_NR_AS.
+ *     Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
+ *     the refcount. Hence, it's not worthwhile reducing this to
+ *     bit-manipulation on u32s to save space (where in contrast, 4 bit
+ *     sub-fields would be easy to do and would save space).
+ *     Whilst this must not become negative, the sign bit is used for:
+ *       - error detection in debug builds
+ *       - Optimization: it is undefined for a signed int to overflow, and so
+ *         the compiler can optimize for that never happening (thus, no masking
+ *         is required on updating the variable)
  * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
- *	to aid affinity checking. Element 'n' bit 'i' indicates that slot 'n'
- *	is using core i (i.e. slot_affinity_refcount[n][i] > 0)
+ *                               to aid affinity checking.
+ *                               Element 'n' bit 'i' indicates that slot 'n'
+ *                               is using core i (i.e. slot_affinity_refcount[n][i] > 0)
  * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
- *	by each slot. Used to generate the slot_affinities array of bitvectors.
- *	  The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
- *	because it is refcounted only when a job is definitely about to be
- *	submitted to a slot, and is de-refcounted immediately after a job
- *	finishes
+ *     by each slot. Used to generate the slot_affinities array of bitvectors.
+ *     The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
+ *     because it is refcounted only when a job is definitely about to be
+ *     submitted to a slot, and is de-refcounted immediately after a job
+ *     finishes
  * @schedule_sem: Scheduling semaphore. This must be held when calling
- *	kbase_jm_kick()
+ *                kbase_jm_kick()
  * @ctx_list_pullable: List of contexts that can currently be pulled from
  * @ctx_list_unpullable: List of contexts that can not currently be pulled
- *	from, but have jobs currently running.
+ *                       from, but have jobs currently running.
  * @nr_user_contexts_running: Number of currently scheduled user contexts
- *	(excluding ones that are not submitting jobs)
+ *                            (excluding ones that are not submitting jobs)
  * @nr_all_contexts_running: Number of currently scheduled contexts (including
- *	ones that are not submitting jobs)
+ *                           ones that are not submitting jobs)
  * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
- *	@note This is a write-once member, and so no locking is required to
- *	read
+ *           @note This is a write-once member, and so no locking is required to
+ *           read
  * @scheduling_period_ns:	Value for JS_SCHEDULING_PERIOD_NS
  * @soft_stop_ticks:		Value for JS_SOFT_STOP_TICKS
  * @soft_stop_ticks_cl:		Value for JS_SOFT_STOP_TICKS_CL
@@ -268,16 +270,16 @@ typedef u32 kbase_atom_ordering_flag_t;
  * @suspended_soft_jobs_list:	List of suspended soft jobs
  * @softstop_always:		Support soft-stop on a single context
  * @init_status:The initialized-flag is placed at the end, to avoid
- * 	cache-pollution (we should only be using this during init/term paths).
- * 	@note This is a write-once member, and so no locking is required to
- * 	read
+ *              cache-pollution (we should only be using this during init/term paths).
+ *              @note This is a write-once member, and so no locking is required to
+ *              read
  * @nr_contexts_pullable:Number of contexts that can currently be pulled from
  * @nr_contexts_runnable:Number of contexts that can either be pulled from or
- * 	arecurrently running
+ *                       arecurrently running
  * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
  * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
- * 	independently of the Run Pool.
- *	Of course, you don't need the Run Pool lock to access this.
+ *               independently of the Run Pool.
+ *               Of course, you don't need the Run Pool lock to access this.
  * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
  *
  * This encapsulates the current context of the Job Scheduler on a particular
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 0f2b106..a713681 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -168,6 +168,7 @@ __attribute__((unused)) static const enum base_hw_feature base_hw_features_tTUx[
 	BASE_HW_FEATURE_L2_CONFIG,
 	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
 	BASE_HW_FEATURE_ASN_HASH,
+	BASE_HW_FEATURE_GPU_SLEEP,
 	BASE_HW_FEATURE_END
 };
 
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index ad45325..8766a6d 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -60,6 +60,7 @@ enum base_hw_issue {
 	BASE_HW_ISSUE_TTRX_3485,
 	BASE_HW_ISSUE_GPU2019_3212,
 	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -596,6 +597,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tODx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -605,6 +607,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tOD
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_GPU2019_3212,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -612,6 +615,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tGRx_r0p0
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -620,6 +624,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -627,6 +632,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -635,6 +641,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVA
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -643,6 +650,7 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tTU
 	BASE_HW_ISSUE_9435,
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
@@ -651,6 +659,15 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r0p0
 	BASE_HW_ISSUE_TSIX_2033,
 	BASE_HW_ISSUE_TTRX_1337,
 	BASE_HW_ISSUE_TURSEHW_1997,
+	BASE_HW_ISSUE_GPU2019_3878,
+	BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tTUx_r1p0[] = {
+	BASE_HW_ISSUE_9435,
+	BASE_HW_ISSUE_TSIX_2033,
+	BASE_HW_ISSUE_TTRX_1337,
+	BASE_HW_ISSUE_GPU2019_3878,
 	BASE_HW_ISSUE_END
 };
 
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 6bcb754..f8a05ce 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -109,9 +109,9 @@
 
 struct kbase_device *kbase_device_alloc(void);
 /*
-* note: configuration attributes member of kbdev needs to have
-* been setup before calling kbase_device_init
-*/
+ * note: configuration attributes member of kbdev needs to have
+ * been setup before calling kbase_device_init
+ */
 
 int kbase_device_misc_init(struct kbase_device *kbdev);
 void kbase_device_misc_term(struct kbase_device *kbdev);
@@ -256,8 +256,26 @@ void kbase_jd_done(struct kbase_jd_atom *katom, int slot_nr, ktime_t *end_timest
 		kbasep_js_atom_done_code done_code);
 void kbase_jd_cancel(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
 void kbase_jd_zap_context(struct kbase_context *kctx);
-bool jd_done_nolock(struct kbase_jd_atom *katom,
-		struct list_head *completed_jobs_ctx);
+
+/*
+ * jd_done_nolock - Perform the necessary handling of an atom that has completed
+ *                  the execution.
+ *
+ * @katom: Pointer to the atom that completed the execution
+ * @post_immediately: Flag indicating that completion event can be posted
+ *                    immediately for @katom and the other atoms depdendent
+ *                    on @katom which also completed execution. The flag is
+ *                    false only for the case where the function is called by
+ *                    kbase_jd_done_worker() on the completion of atom running
+ *                    on the GPU.
+ *
+ * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
+ * is responsible for calling kbase_finish_soft_job *before* calling this function.
+ *
+ * The caller must hold the kbase_jd_context.lock.
+ */
+bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately);
+
 void kbase_jd_free_external_resources(struct kbase_jd_atom *katom);
 void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom);
 
@@ -299,19 +317,73 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
  * virtual address space in a growable memory region and the atom currently
  * executing on a job slot is the tiler job chain at the start of a renderpass.
  *
- * Return 0 if successful, otherwise a negative error code.
+ * Return: 0 if successful, otherwise a negative error code.
  */
 int kbase_job_slot_softstop_start_rp(struct kbase_context *kctx,
 		struct kbase_va_region *reg);
 
+/**
+ * kbase_job_slot_softstop - Soft-stop the specified job slot
+ *
+ * @kbdev:         The kbase device
+ * @js:            The job slot to soft-stop
+ * @target_katom:  The job that should be soft-stopped (or NULL for any job)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ *   The job slot must not already be in the process of being soft-stopped.
+ *
+ * Where possible any job in the next register is evicted before the soft-stop.
+ */
 void kbase_job_slot_softstop(struct kbase_device *kbdev, int js,
 		struct kbase_jd_atom *target_katom);
+
 void kbase_job_slot_softstop_swflags(struct kbase_device *kbdev, int js,
 		struct kbase_jd_atom *target_katom, u32 sw_flags);
+
+/**
+ * kbase_job_slot_hardstop - Hard-stop the specified job slot
+ * @kctx:         The kbase context that contains the job(s) that should
+ *                be hard-stopped
+ * @js:           The job slot to hard-stop
+ * @target_katom: The job that should be hard-stopped (or NULL for all
+ *                jobs from the context)
+ * Context:
+ *   The job slot lock must be held when calling this function.
+ */
 void kbase_job_slot_hardstop(struct kbase_context *kctx, int js,
 		struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_job_check_enter_disjoint - potentiall enter disjoint mode
+ * @kbdev: kbase device
+ * @action: the event which has occurred
+ * @core_reqs: core requirements of the atom
+ * @target_katom: the atom which is being affected
+ *
+ * For a certain soft-stop action, work out whether to enter disjoint
+ * state.
+ *
+ * This does not register multiple disjoint events if the atom has already
+ * started a disjoint period
+ *
+ * @core_reqs can be supplied as 0 if the atom had not started on the hardware
+ * (and so a 'real' soft/hard-stop was not required, but it still interrupted
+ * flow, perhaps on another context)
+ *
+ * kbase_job_check_leave_disjoint() should be used to end the disjoint
+ * state when the soft/hard-stop action is complete
+ */
 void kbase_job_check_enter_disjoint(struct kbase_device *kbdev, u32 action,
 		base_jd_core_req core_reqs, struct kbase_jd_atom *target_katom);
+
+/**
+ * kbase_job_check_leave_disjoint - potentially leave disjoint state
+ * @kbdev: kbase device
+ * @target_katom: atom which is finishing
+ *
+ * Work out whether to leave disjoint state when finishing an atom that was
+ * originated by kbase_job_check_enter_disjoint().
+ */
 void kbase_job_check_leave_disjoint(struct kbase_device *kbdev,
 		struct kbase_jd_atom *target_katom);
 
@@ -334,7 +406,7 @@ void kbase_event_wakeup(struct kbase_context *kctx);
  *		allocation is to be validated.
  * @info:	Pointer to struct @base_jit_alloc_info
  *			which is to be validated.
- * @return: 0 if jit allocation is valid; negative error code otherwise
+ * Return: 0 if jit allocation is valid; negative error code otherwise
  */
 int kbasep_jit_alloc_validate(struct kbase_context *kctx,
 					struct base_jit_alloc_info *info);
@@ -381,9 +453,12 @@ static inline void kbase_free_user_buffer(
  * @buf_data:	Pointer to the information about external resources:
  *		pages pertaining to the external resource, number of
  *		pages to copy.
+ *
+ * Return:      0 on success, error code otherwise.
  */
 int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 		struct kbase_debug_copy_buffer *buf_data);
+
 #if !MALI_USE_CSF
 int kbase_process_soft_job(struct kbase_jd_atom *katom);
 int kbase_prepare_soft_job(struct kbase_jd_atom *katom);
@@ -405,7 +480,9 @@ void kbasep_complete_triggered_soft_events(struct kbase_context *kctx, u64 evt);
 void kbasep_as_do_poke(struct work_struct *work);
 
 /**
- * Check whether a system suspend is in progress, or has already been suspended
+ * kbase_pm_is_suspending - Check whether a system suspend is in progress,
+ * or has already been suspended
+ *
  * @kbdev: The kbase device structure for the device
  *
  * The caller should ensure that either kbdev->pm.active_count_lock is held, or
@@ -533,10 +610,12 @@ int kbase_pm_force_mcu_wakeup_after_sleep(struct kbase_device *kbdev);
 
 #if !MALI_USE_CSF
 /**
- * Return the atom's ID, as was originally supplied by userspace in
+ * kbase_jd_atom_id - Return the atom's ID, as was originally supplied by userspace in
  * base_jd_atom::atom_number
  * @kctx:  KBase context pointer
  * @katom: Atome for which to return ID
+ *
+ * Return: the atom's ID.
  */
 static inline int kbase_jd_atom_id(struct kbase_context *kctx,
 				   const struct kbase_jd_atom *katom)
@@ -567,7 +646,9 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
 #endif /* !MALI_USE_CSF */
 
 /**
- * Initialize the disjoint state
+ * kbase_disjoint_init - Initialize the disjoint state
+ *
+ * @kbdev: The kbase device
  *
  * The disjoint event count and state are both set to zero.
  *
@@ -589,14 +670,12 @@ static inline struct kbase_jd_atom *kbase_jd_atom_from_id(
  * The disjoint event counter is also incremented immediately whenever a job is soft stopped
  * and during context creation.
  *
- * @kbdev: The kbase device
- *
  * Return: 0 on success and non-zero value on failure.
  */
 void kbase_disjoint_init(struct kbase_device *kbdev);
 
 /**
- * Increase the count of disjoint events
+ * kbase_disjoint_event - Increase the count of disjoint events
  * called when a disjoint event has happened
  *
  * @kbdev: The kbase device
@@ -604,42 +683,44 @@ void kbase_disjoint_init(struct kbase_device *kbdev);
 void kbase_disjoint_event(struct kbase_device *kbdev);
 
 /**
- * Increase the count of disjoint events only if the GPU is in a disjoint state
+ * kbase_disjoint_event_potential - Increase the count of disjoint events
+ * only if the GPU is in a disjoint state
+ *
+ * @kbdev: The kbase device
  *
  * This should be called when something happens which could be disjoint if the GPU
  * is in a disjoint state. The state refcount keeps track of this.
- *
- * @kbdev: The kbase device
  */
 void kbase_disjoint_event_potential(struct kbase_device *kbdev);
 
 /**
- * Returns the count of disjoint events
+ * kbase_disjoint_event_get - Returns the count of disjoint events
  *
  * @kbdev: The kbase device
- * @return the count of disjoint events
+ * Return: the count of disjoint events
  */
 u32 kbase_disjoint_event_get(struct kbase_device *kbdev);
 
 /**
- * Increment the refcount state indicating that the GPU is in a disjoint state.
+ * kbase_disjoint_state_up - Increment the refcount state indicating that
+ * the GPU is in a disjoint state.
+ *
+ * @kbdev: The kbase device
  *
  * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
  * eventually after the disjoint state has completed @ref kbase_disjoint_state_down
  * should be called
- *
- * @kbdev: The kbase device
  */
 void kbase_disjoint_state_up(struct kbase_device *kbdev);
 
 /**
- * Decrement the refcount state
+ * kbase_disjoint_state_down - Decrement the refcount state
+ *
+ * @kbdev: The kbase device
  *
  * Also Increment the disjoint event count (calls @ref kbase_disjoint_event)
  *
  * Called after @ref kbase_disjoint_state_up once the disjoint state is over
- *
- * @kbdev: The kbase device
  */
 void kbase_disjoint_state_down(struct kbase_device *kbdev);
 
@@ -668,8 +749,8 @@ int kbase_device_pcm_dev_init(struct kbase_device *const kbdev);
 void kbase_device_pcm_dev_term(struct kbase_device *const kbdev);
 
 /**
- * If a job is soft stopped and the number of contexts is >= this value
- * it is reported as a disjoint event
+ * KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD - If a job is soft stopped
+ * and the number of contexts is >= this value it is reported as a disjoint event
  */
 #define KBASE_DISJOINT_STATE_INTERLEAVED_CONTEXT_COUNT_THRESHOLD 2
 
diff --git a/mali_kbase/mali_kbase_as_fault_debugfs.c b/mali_kbase/mali_kbase_as_fault_debugfs.c
index deb412c..77f450d 100644
--- a/mali_kbase/mali_kbase_as_fault_debugfs.c
+++ b/mali_kbase/mali_kbase_as_fault_debugfs.c
@@ -99,7 +99,7 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
 	} else {
 		for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
 			snprintf(as_name, ARRAY_SIZE(as_name), "as%u", i);
-			debugfs_create_file(as_name, S_IRUGO,
+			debugfs_create_file(as_name, 0444,
 					    debugfs_directory,
 					    (void *)(uintptr_t)i,
 					    &as_fault_fops);
@@ -108,5 +108,4 @@ void kbase_as_fault_debugfs_init(struct kbase_device *kbdev)
 
 #endif /* CONFIG_MALI_DEBUG */
 #endif /* CONFIG_DEBUG_FS */
-	return;
 }
diff --git a/mali_kbase/mali_kbase_as_fault_debugfs.h b/mali_kbase/mali_kbase_as_fault_debugfs.h
index 919fbc1..ecd2d2d 100644
--- a/mali_kbase/mali_kbase_as_fault_debugfs.h
+++ b/mali_kbase/mali_kbase_as_fault_debugfs.h
@@ -43,7 +43,6 @@ kbase_as_fault_debugfs_new(struct kbase_device *kbdev, int as_no)
 	kbdev->debugfs_as_read_bitmap |= (1ULL << as_no);
 #endif /* CONFIG_DEBUG_FS */
 #endif /* CONFIG_MALI_DEBUG */
-	return;
 }
 
 #endif  /*_KBASE_AS_FAULT_DEBUG_FS_H*/
diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h
index c232e21..6aa31f3 100644
--- a/mali_kbase/mali_kbase_caps.h
+++ b/mali_kbase/mali_kbase_caps.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,15 +28,24 @@
 
 #include <linux/types.h>
 
-typedef enum mali_kbase_cap {
+/**
+ * enum mali_kbase_cap - Enumeration for kbase capability
+ *
+ * @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
+ * @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
+ * @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
+ * @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
+ * @MALI_KBASE_NUM_CAPS: Delimiter
+ */
+enum mali_kbase_cap {
 	MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
 	MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
 	MALI_KBASE_CAP_MEM_GROW_ON_GPF,
 	MALI_KBASE_CAP_MEM_PROTECTED,
 	MALI_KBASE_NUM_CAPS
-} mali_kbase_cap;
+};
 
-extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
+extern bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap);
 
 static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
 {
diff --git a/mali_kbase/mali_kbase_ccswe.h b/mali_kbase/mali_kbase_ccswe.h
index 8e55ffc..f7fcf77 100644
--- a/mali_kbase/mali_kbase_ccswe.h
+++ b/mali_kbase/mali_kbase_ccswe.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,7 +51,6 @@ struct kbase_ccswe {
  */
 void kbase_ccswe_init(struct kbase_ccswe *self);
 
-
 /**
  * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
  *
@@ -68,7 +67,7 @@ void kbase_ccswe_init(struct kbase_ccswe *self);
  *     u64 ts = ktime_get_raw_ns();
  *     u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
  *
- * Returns: estimated value of cycle count at a given time.
+ * Return: estimated value of cycle count at a given time.
  */
 u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);
 
diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h
index 8b7ee13..ecfdb28 100644
--- a/mali_kbase/mali_kbase_config.h
+++ b/mali_kbase/mali_kbase_config.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -246,8 +246,6 @@ struct kbase_pm_callback_conf {
 	 *
 	 * For linux this callback will be called by the kernel runtime_suspend callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
-	 *
-	 * @return 0 on success, else OS error code.
 	 */
 	void (*power_runtime_off_callback)(struct kbase_device *kbdev);
 
@@ -255,6 +253,8 @@ struct kbase_pm_callback_conf {
 	 *
 	 * For linux this callback will be called by the kernel runtime_resume callback.
 	 * Note: for linux the kernel must have CONFIG_PM_RUNTIME enabled to use this feature.
+	 *
+	 * @return 0 on success, else OS error code.
 	 */
 	int (*power_runtime_on_callback)(struct kbase_device *kbdev);
 
@@ -455,7 +455,7 @@ struct kbase_platform_config {
 /**
  * kbase_get_platform_config - Gets the pointer to platform config.
  *
- * @return Pointer to the platform config
+ * Return: Pointer to the platform config
  */
 struct kbase_platform_config *kbase_get_platform_config(void);
 
@@ -564,7 +564,6 @@ void kbasep_platform_event_atom_complete(struct kbase_jd_atom *katom);
 #ifndef CONFIG_OF
 /**
  * kbase_platform_register - Register a platform device for the GPU
- *
  * This can be used to register a platform device on systems where device tree
  * is not enabled and the platform initialisation code in the kernel doesn't
  * create the GPU device. Where possible device tree should be used instead.
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 8d64184..716c74a 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,33 +31,27 @@
 #include <mali_kbase_config_platform.h>
 
 enum {
-	/**
-	 * Use unrestricted Address ID width on the AXI bus.
-	 */
+	/* Use unrestricted Address ID width on the AXI bus. */
 	KBASE_AID_32 = 0x0,
 
-	/**
-	 * Restrict GPU to a half of maximum Address ID count.
+	/* Restrict GPU to a half of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
 	KBASE_AID_16 = 0x3,
 
-	/**
-	 * Restrict GPU to a quarter of maximum Address ID count.
+	/* Restrict GPU to a quarter of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
-	KBASE_AID_8  = 0x2,
+	KBASE_AID_8 = 0x2,
 
-	/**
-	 * Restrict GPU to an eighth of maximum Address ID count.
+	/* Restrict GPU to an eighth of maximum Address ID count.
 	 * This will reduce performance, but reduce bus load due to GPU.
 	 */
-	KBASE_AID_4  = 0x1
+	KBASE_AID_4 = 0x1
 };
 
 enum {
-	/**
-	 * Use unrestricted Address ID width on the AXI bus.
+	/* Use unrestricted Address ID width on the AXI bus.
 	 * Restricting ID width will reduce performance & bus load due to GPU.
 	 */
 	KBASE_3BIT_AID_32 = 0x0,
@@ -78,21 +72,18 @@ enum {
 	KBASE_3BIT_AID_12 = 0x5,
 
 	/* Restrict GPU to 1/4 of maximum Address ID count. */
-	KBASE_3BIT_AID_8  = 0x6,
+	KBASE_3BIT_AID_8 = 0x6,
 
 	/* Restrict GPU to 1/8 of maximum Address ID count. */
-	KBASE_3BIT_AID_4  = 0x7
+	KBASE_3BIT_AID_4 = 0x7
 };
 
-/**
- * Default period for DVFS sampling (can be overridden by platform header)
- */
+/* Default period for DVFS sampling (can be overridden by platform header) */
 #ifndef DEFAULT_PM_DVFS_PERIOD
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
 #endif
 
-/**
- * Power Management poweroff tick granuality. This is in nanoseconds to
+/* Power Management poweroff tick granuality. This is in nanoseconds to
  * allow HR timer support (can be overridden by platform header).
  *
  * On each scheduling tick, the power manager core may decide to:
@@ -103,95 +94,106 @@ enum {
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
 #endif
 
-/**
- * Power Manager number of ticks before shader cores are powered off
+/* Power Manager number of ticks before shader cores are powered off
  * (can be overridden by platform header).
  */
 #ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
 #endif
 
-/**
- * Default scheduling tick granuality (can be overridden by platform header)
- */
+/* Default scheduling tick granuality (can be overridden by platform header) */
 #ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
 #endif
 
-/**
- * Default minimum number of scheduling ticks before jobs are soft-stopped.
+/* Default minimum number of scheduling ticks before jobs are soft-stopped.
  *
  * This defines the time-slice for a job (which may be different from that of a
  * context)
  */
 #define DEFAULT_JS_SOFT_STOP_TICKS       (1) /* 100ms-200ms */
 
-/**
- * Default minimum number of scheduling ticks before CL jobs are soft-stopped.
- */
+/* Default minimum number of scheduling ticks before CL jobs are soft-stopped. */
 #define DEFAULT_JS_SOFT_STOP_TICKS_CL    (1) /* 100ms-200ms */
 
-/**
- * Default minimum number of scheduling ticks before jobs are hard-stopped
- */
+/* Default minimum number of scheduling ticks before jobs are hard-stopped */
 #define DEFAULT_JS_HARD_STOP_TICKS_SS    (50) /* 5s */
 
-/**
- * Default minimum number of scheduling ticks before CL jobs are hard-stopped.
- */
+/* Default minimum number of scheduling ticks before CL jobs are hard-stopped. */
 #define DEFAULT_JS_HARD_STOP_TICKS_CL    (50) /* 5s */
 
-/**
- * Default minimum number of scheduling ticks before jobs are hard-stopped
+/* Default minimum number of scheduling ticks before jobs are hard-stopped
  * during dumping
  */
 #define DEFAULT_JS_HARD_STOP_TICKS_DUMPING   (15000) /* 1500s */
 
-/**
- * Default timeout for some software jobs, after which the software event wait
+/* Default timeout for some software jobs, after which the software event wait
  * jobs will be cancelled.
  */
 #define DEFAULT_JS_SOFT_JOB_TIMEOUT (3000) /* 3s */
 
-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job
  */
 #define DEFAULT_JS_RESET_TICKS_SS           (55) /* 5.5s */
 
-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" CL job.
  */
 #define DEFAULT_JS_RESET_TICKS_CL        (55) /* 5.5s */
 
-/**
- * Default minimum number of scheduling ticks before the GPU is reset to clear a
+/* Default minimum number of scheduling ticks before the GPU is reset to clear a
  * "stuck" job during dumping.
  */
 #define DEFAULT_JS_RESET_TICKS_DUMPING   (15020) /* 1502s */
 
-/**
- * Default number of milliseconds given for other jobs on the GPU to be
+/* Default number of milliseconds given for other jobs on the GPU to be
  * soft-stopped when the GPU needs to be reset.
  */
 #define DEFAULT_RESET_TIMEOUT_MS (3000) /* 3s */
 
-/* Waiting timeout for status change acknowledgment, in clock cycles
- * Based on 3000ms timeout at nominal 100MHz, as is required for Android - based
- * on scaling from a 50MHz GPU system.
+/* Nominal reference frequency that was used to obtain all following
+ * <...>_TIMEOUT_CYCLES macros, in kHz.
+ *
+ * Timeouts are scaled based on the relation between this value and the lowest
+ * GPU clock frequency.
  */
 #define DEFAULT_REF_TIMEOUT_FREQ_KHZ (100000)
-#define CSF_FIRMWARE_TIMEOUT_CYCLES (300000000)
 
-/* A default timeout to be used when an invalid timeout selector is
- * used to retrieve the timeout, on JM GPUs. CSF GPUs use the Firmware
- * timeout as the default.
+#if MALI_USE_CSF
+/* Waiting timeout for status change acknowledgment, in clock cycles.
+ *
+ * This is also the default timeout to be used when an invalid timeout
+ * selector is used to retrieve the timeout on CSF GPUs.
+ *
+ * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
+ * on scaling from a 50MHz GPU system.
+ */
+#define CSF_FIRMWARE_TIMEOUT_CYCLES (7500000000)
+
+/* Timeout in clock cycles for GPU Power Management to reach the desired
+ * Shader, L2 and MCU state.
+ *
+ * Based on 2500ms timeout at nominal 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_PM_TIMEOUT_CYCLES (250000000)
+
+/* Waiting timeout in clock cycles for GPU reset to complete.
+ *
+ * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
+
+#else /* MALI_USE_CSF */
+
+/* A default timeout in clock cycles to be used when an invalid timeout
+ * selector is used to retrieve the timeout, on JM GPUs.
  */
 #define JM_DEFAULT_TIMEOUT_CYCLES (150000000)
 
-/**
- * Default timeslice that a context is scheduled in for, in nanoseconds.
+#endif /* MALI_USE_CSF */
+
+/* Default timeslice that a context is scheduled in for, in nanoseconds.
  *
  * When a context has used up this amount of time across its jobs, it is
  * scheduled out to let another run.
@@ -201,16 +203,14 @@ enum {
  */
 #define DEFAULT_JS_CTX_TIMESLICE_NS (50000000) /* 50ms */
 
-/**
- * Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
+/* Maximum frequency (in kHz) that the GPU can be clocked. For some platforms
  * this isn't available, so we simply define a dummy value here. If devfreq
  * is enabled the value will be read from there, otherwise this should be
  * overridden by defining GPU_FREQ_KHZ_MAX in the platform file.
  */
 #define DEFAULT_GPU_FREQ_KHZ_MAX (5000)
 
-/**
- * Default timeout for task execution on an endpoint
+/* Default timeout for task execution on an endpoint
  *
  * Number of GPU clock cycles before the driver terminates a task that is
  * making no forward progress on an endpoint (e.g. shader core).
@@ -219,8 +219,7 @@ enum {
  */
 #define DEFAULT_PROGRESS_TIMEOUT ((u64)5 * 500 * 1024 * 1024)
 
-/**
- * Default threshold at which to switch to incremental rendering
+/* Default threshold at which to switch to incremental rendering
  *
  * Fraction of the maximum size of an allocation that grows on GPU page fault
  * that can be used up before the driver switches to incremental rendering,
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 0cbbf44..5197e4a 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -86,6 +86,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/of_platform.h>
 #include <linux/miscdevice.h>
@@ -140,12 +141,15 @@
 #define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF)
 
 /**
- * typedef mali_kbase_capability_def - kbase capabilities table
+ * struct mali_kbase_capability_def - kbase capabilities table
+ *
+ * @required_major: required major
+ * @required_minor: required minor
  */
-typedef struct mali_kbase_capability_def {
+struct mali_kbase_capability_def {
 	u16 required_major;
 	u16 required_minor;
-} mali_kbase_capability_def;
+};
 
 /*
  * This must be kept in-sync with mali_kbase_cap
@@ -153,32 +157,34 @@ typedef struct mali_kbase_capability_def {
  * TODO: The alternative approach would be to embed the cap enum values
  * in the table. Less efficient but potentially safer.
  */
-static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = {
+static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = {
 #if MALI_USE_CSF
-	{ 1, 0 },               /* SYSTEM_MONITOR 	*/
-	{ 1, 0 },               /* JIT_PRESSURE_LIMIT	*/
-	{ 1, 0 },               /* MEM_GROW_ON_GPF	*/
-	{ 1, 0 }                /* MEM_PROTECTED	*/
+	{ 1, 0 },               /* SYSTEM_MONITOR */
+	{ 1, 0 },               /* JIT_PRESSURE_LIMIT */
+	{ 1, 0 },               /* MEM_GROW_ON_GPF */
+	{ 1, 0 }                /* MEM_PROTECTED */
 #else
-	{ 11, 15 },             /* SYSTEM_MONITOR 	*/
-	{ 11, 25 },             /* JIT_PRESSURE_LIMIT	*/
-	{ 11,  2 },             /* MEM_GROW_ON_GPF	*/
-	{ 11,  2 }              /* MEM_PROTECTED	*/
+	{ 11, 15 },             /* SYSTEM_MONITOR */
+	{ 11, 25 },             /* JIT_PRESSURE_LIMIT */
+	{ 11,  2 },             /* MEM_GROW_ON_GPF */
+	{ 11,  2 }              /* MEM_PROTECTED */
 #endif
 };
 
 /**
  * mali_kbase_supports_cap - Query whether a kbase capability is supported
  *
- * @api_version: 	API version to convert
- * @cap:		Capability to query for - see mali_kbase_caps.h
+ * @api_version: API version to convert
+ * @cap:         Capability to query for - see mali_kbase_caps.h
+ *
+ * Return: true if the capability is supported
  */
-bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap)
+bool mali_kbase_supports_cap(unsigned long api_version, enum mali_kbase_cap cap)
 {
 	bool supported = false;
 	unsigned long required_ver;
 
-	mali_kbase_capability_def const *cap_def;
+	struct mali_kbase_capability_def const *cap_def;
 
 	if (WARN_ON(cap < 0))
 		return false;
@@ -205,7 +211,7 @@ bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap)
  * address space) and no API version number. Both must be assigned before
  * kbase_file_get_kctx_if_setup_complete() can be used successfully.
  *
- * @return Address of an object representing a simulated device file, or NULL
+ * Return: Address of an object representing a simulated device file, or NULL
  *         on failure.
  */
 static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
@@ -784,14 +790,14 @@ static int kbase_api_job_submit(struct kbase_context *kctx,
 }
 #endif /* !MALI_USE_CSF */
 
-static int kbase_api_get_gpuprops(struct kbase_context *kctx,
+static int kbase_api_get_gpuprops(struct kbase_file *kfile,
 		struct kbase_ioctl_get_gpuprops *get_props)
 {
-	struct kbase_gpu_props *kprops = &kctx->kbdev->gpu_props;
+	struct kbase_gpu_props *kprops = &kfile->kbdev->gpu_props;
 	int err;
 
 	if (get_props->flags != 0) {
-		dev_err(kctx->kbdev->dev, "Unsupported flags to get_gpuprops");
+		dev_err(kfile->kbdev->dev, "Unsupported flags to get_gpuprops");
 		return -EINVAL;
 	}
 
@@ -816,11 +822,12 @@ static int kbase_api_post_term(struct kbase_context *kctx)
 }
 #endif /* !MALI_USE_CSF */
 
-static int kbase_api_mem_alloc(struct kbase_context *kctx,
-		union kbase_ioctl_mem_alloc *alloc)
+#if MALI_USE_CSF
+static int kbase_api_mem_alloc_ex(struct kbase_context *kctx,
+				  union kbase_ioctl_mem_alloc_ex *alloc_ex)
 {
 	struct kbase_va_region *reg;
-	u64 flags = alloc->in.flags;
+	u64 flags = alloc_ex->in.flags;
 	u64 gpu_va;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -828,24 +835,60 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
 	 */
 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
 
+	bool gpu_executable = (flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx);
+	bool fixed_or_fixable = (flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE));
+
 	if (!kbase_mem_allow_alloc(kctx))
 		return -EINVAL;
 
+	/* The driver counts the number of FIXABLE and FIXED allocations because
+	 * they're not supposed to happen at the same time. However, that is not
+	 * a security concern: nothing bad happens if the two types of allocations
+	 * are made at the same time. The only reason why the driver is guarding
+	 * against them is because there's no client use case that is supposed
+	 * to need both of them at the same time, and the driver wants to help
+	 * the user space catch some obvious mistake.
+	 *
+	 * The driver is able to switch from FIXABLE allocations to FIXED and
+	 * vice versa, if all the allocations of one kind are freed before trying
+	 * to create allocations of a different kind.
+	 */
+	if ((flags & BASE_MEM_FIXED) && (atomic64_read(&kctx->num_fixable_allocs) > 0))
+		return -EINVAL;
+
+	if ((flags & BASE_MEM_FIXABLE) && (atomic64_read(&kctx->num_fixed_allocs) > 0))
+		return -EINVAL;
+
 	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
 		return -ENOMEM;
 
-	/* Force SAME_VA if a 64-bit client.
-	 * The only exception is GPU-executable memory if an EXEC_VA zone
-	 * has been initialized. In that case, GPU-executable memory may
-	 * or may not be SAME_VA.
+	/* The fixed_address parameter must be either a non-zero, page-aligned
+	 * value for FIXED allocations or zero for any other kind of allocation.
+	 */
+	if (flags & BASE_MEM_FIXED) {
+		u64 aligned_fixed_address = alloc_ex->in.fixed_address & PAGE_MASK;
+
+		if ((aligned_fixed_address == 0) ||
+		    (aligned_fixed_address != alloc_ex->in.fixed_address))
+			return -EINVAL;
+
+		gpu_va = aligned_fixed_address;
+	} else if (alloc_ex->in.fixed_address != 0) {
+		return -EINVAL;
+	}
+
+	/* For 64-bit clients, force SAME_VA up to 2^(47)-1.
+	 * For 32-bit clients, force SAME_VA up to 2^(32)-1.
+	 *
+	 * In both cases, the executable and fixed/fixable zones, and
+	 * the executable+fixed/fixable zone, are all above this range.
 	 */
 	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) &&
 			kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
-		if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx))
+		if (!gpu_executable && !fixed_or_fixable)
 			flags |= BASE_MEM_SAME_VA;
 	}
 
-#if MALI_USE_CSF
 	/* If CSF event memory allocation, need to force certain flags.
 	 * SAME_VA - GPU address needs to be used as a CPU address, explicit
 	 * mmap has to be avoided.
@@ -854,15 +897,75 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
 	 * to event memory so need to leverage the coherency support.
 	 */
 	if (flags & BASE_MEM_CSF_EVENT) {
+		/* We cannot honor this request */
+		if (gpu_executable || fixed_or_fixable)
+			return -ENOMEM;
+
 		flags |= (BASE_MEM_SAME_VA |
 			  BASE_MEM_CACHED_CPU |
 			  BASE_MEM_COHERENT_SYSTEM);
 	}
-#endif
 
-	reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages,
-			      alloc->in.extension, &flags, &gpu_va,
-			      mmu_sync_info);
+	reg = kbase_mem_alloc(kctx, alloc_ex->in.va_pages, alloc_ex->in.commit_pages,
+			      alloc_ex->in.extension, &flags, &gpu_va, mmu_sync_info);
+
+	if (!reg)
+		return -ENOMEM;
+
+	alloc_ex->out.flags = flags;
+	alloc_ex->out.gpu_va = gpu_va;
+
+	return 0;
+}
+
+static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc)
+{
+	int ret;
+	union kbase_ioctl_mem_alloc_ex mem_alloc_ex = { { 0 } };
+
+	mem_alloc_ex.in.va_pages = alloc->in.va_pages;
+	mem_alloc_ex.in.commit_pages = alloc->in.commit_pages;
+	mem_alloc_ex.in.extension = alloc->in.extension;
+	mem_alloc_ex.in.flags = alloc->in.flags;
+	mem_alloc_ex.in.fixed_address = 0;
+
+	ret = kbase_api_mem_alloc_ex(kctx, &mem_alloc_ex);
+
+	alloc->out.flags = mem_alloc_ex.out.flags;
+	alloc->out.gpu_va = mem_alloc_ex.out.gpu_va;
+
+	return ret;
+}
+#else
+static int kbase_api_mem_alloc(struct kbase_context *kctx, union kbase_ioctl_mem_alloc *alloc)
+{
+	struct kbase_va_region *reg;
+	u64 flags = alloc->in.flags;
+	u64 gpu_va;
+
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	if (!kbase_mem_allow_alloc(kctx))
+		return -EINVAL;
+
+	if (flags & BASEP_MEM_FLAGS_KERNEL_ONLY)
+		return -ENOMEM;
+
+	/* Force SAME_VA if a 64-bit client.
+	 * The only exception is GPU-executable memory if an EXEC_VA zone
+	 * has been initialized. In that case, GPU-executable memory may
+	 * or may not be SAME_VA.
+	 */
+	if ((!kbase_ctx_flag(kctx, KCTX_COMPAT)) && kbase_ctx_flag(kctx, KCTX_FORCE_SAME_VA)) {
+		if (!(flags & BASE_MEM_PROT_GPU_EX) || !kbase_has_exec_va_zone(kctx))
+			flags |= BASE_MEM_SAME_VA;
+	}
+
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages, alloc->in.extension,
+			      &flags, &gpu_va, mmu_sync_info);
 
 	if (!reg)
 		return -ENOMEM;
@@ -872,6 +975,7 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
 
 	return 0;
 }
+#endif /* MALI_USE_CSF */
 
 static int kbase_api_mem_query(struct kbase_context *kctx,
 		union kbase_ioctl_mem_query *query)
@@ -1649,6 +1753,10 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 					 union kbase_ioctl_kinstr_prfcnt_setup,
 					 kfile);
 		break;
+	case KBASE_IOCTL_GET_GPUPROPS:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS, kbase_api_get_gpuprops,
+				      struct kbase_ioctl_get_gpuprops, kfile);
+		break;
 	}
 
 	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
@@ -1665,12 +1773,6 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				kctx);
 		break;
 #endif /* !MALI_USE_CSF */
-	case KBASE_IOCTL_GET_GPUPROPS:
-		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_GET_GPUPROPS,
-				kbase_api_get_gpuprops,
-				struct kbase_ioctl_get_gpuprops,
-				kctx);
-		break;
 #if !MALI_USE_CSF
 	case KBASE_IOCTL_POST_TERM:
 		KBASE_HANDLE_IOCTL(KBASE_IOCTL_POST_TERM,
@@ -1684,6 +1786,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				union kbase_ioctl_mem_alloc,
 				kctx);
 		break;
+#if MALI_USE_CSF
+	case KBASE_IOCTL_MEM_ALLOC_EX:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_ALLOC_EX, kbase_api_mem_alloc_ex,
+					 union kbase_ioctl_mem_alloc_ex, kctx);
+		break;
+#endif
 	case KBASE_IOCTL_MEM_QUERY:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_MEM_QUERY,
 				kbase_api_mem_query,
@@ -2062,6 +2170,8 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 	if (count < sizeof(uevent))
 		return -ENOBUFS;
 
+	memset(&uevent, 0, sizeof(uevent));
+
 	do {
 		while (kbase_event_dequeue(kctx, &uevent)) {
 			if (out_count > 0)
@@ -2191,19 +2301,19 @@ static const struct file_operations kbase_fops = {
 };
 
 /**
- * show_policy - Show callback for the power_policy sysfs file.
- *
- * This function is called to get the contents of the power_policy sysfs
- * file. This is a list of the available policies with the currently active one
- * surrounded by square brackets.
+ * power_policy_show - Show callback for the power_policy sysfs file.
  *
  * @dev:	The device this sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The output buffer for the sysfs file contents
  *
+ * This function is called to get the contents of the power_policy sysfs
+ * file. This is a list of the available policies with the currently active one
+ * surrounded by square brackets.
+ *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_policy(struct device *dev, struct device_attribute *attr, char *const buf)
+static ssize_t power_policy_show(struct device *dev, struct device_attribute *attr, char *const buf)
 {
 	struct kbase_device *kbdev;
 	const struct kbase_pm_policy *current_policy;
@@ -2240,21 +2350,21 @@ static ssize_t show_policy(struct device *dev, struct device_attribute *attr, ch
 }
 
 /**
- * set_policy - Store callback for the power_policy sysfs file.
- *
- * This function is called when the power_policy sysfs file is written to.
- * It matches the requested policy against the available policies and if a
- * matching policy is found calls kbase_pm_set_policy() to change the
- * policy.
+ * power_policy_store - Store callback for the power_policy sysfs file.
  *
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
  * @count:	The number of bytes to write to the sysfs file
  *
+ * This function is called when the power_policy sysfs file is written to.
+ * It matches the requested policy against the available policies and if a
+ * matching policy is found calls kbase_pm_set_policy() to change the
+ * policy.
+ *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_policy(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t power_policy_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
 	const struct kbase_pm_policy *new_policy = NULL;
@@ -2293,20 +2403,20 @@ static ssize_t set_policy(struct device *dev, struct device_attribute *attr, con
  * determining which policy is currently active, and changing the active
  * policy.
  */
-static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
+static DEVICE_ATTR_RW(power_policy);
 
 /*
- * show_core_mask - Show callback for the core_mask sysfs file.
- *
- * This function is called to get the contents of the core_mask sysfs file.
+ * core_mask_show - Show callback for the core_mask sysfs file.
  *
  * @dev:	The device this sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The output buffer for the sysfs file contents
  *
+ * This function is called to get the contents of the core_mask sysfs file.
+ *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
+static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
 	unsigned long flags;
@@ -2351,18 +2461,18 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 }
 
 /**
- * set_core_mask - Store callback for the core_mask sysfs file.
- *
- * This function is called when the core_mask sysfs file is written to.
+ * core_mask_store - Store callback for the core_mask sysfs file.
  *
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
  * @count:	The number of bytes to write to the sysfs file
  *
+ * This function is called when the core_mask sysfs file is written to.
+ *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
 #if MALI_USE_CSF
@@ -2492,11 +2602,11 @@ end:
  * Reading it will show the current core mask and the mask of cores available.
  * Writing to it will set the current core mask.
  */
-static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
+static DEVICE_ATTR_RW(core_mask);
 
 #if !MALI_USE_CSF
 /**
- * set_soft_job_timeout - Store callback for the soft_job_timeout sysfs
+ * soft_job_timeout_store - Store callback for the soft_job_timeout sysfs
  * file.
  *
  * @dev: The device this sysfs file is for.
@@ -2512,7 +2622,7 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
  *
  * Return: count if the function succeeded. An error code on failure.
  */
-static ssize_t set_soft_job_timeout(struct device *dev,
+static ssize_t soft_job_timeout_store(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf, size_t count)
 {
@@ -2534,18 +2644,18 @@ static ssize_t set_soft_job_timeout(struct device *dev,
 }
 
 /**
- * show_soft_job_timeout - Show callback for the soft_job_timeout sysfs
+ * soft_job_timeout_show - Show callback for the soft_job_timeout sysfs
  * file.
  *
- * This will return the timeout for the software jobs.
- *
  * @dev: The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf: The output buffer for the sysfs file contents.
  *
+ * This will return the timeout for the software jobs.
+ *
  * Return: The number of bytes output to buf.
  */
-static ssize_t show_soft_job_timeout(struct device *dev,
+static ssize_t soft_job_timeout_show(struct device *dev,
 				       struct device_attribute *attr,
 				       char * const buf)
 {
@@ -2559,14 +2669,14 @@ static ssize_t show_soft_job_timeout(struct device *dev,
 			 atomic_read(&kbdev->js_data.soft_job_timeout_ms));
 }
 
-static DEVICE_ATTR(soft_job_timeout, S_IRUGO | S_IWUSR,
-		   show_soft_job_timeout, set_soft_job_timeout);
+static DEVICE_ATTR_RW(soft_job_timeout);
 
 static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
 				int default_ticks, u32 old_ticks)
 {
 	if (timeout_ms > 0) {
 		u64 ticks = timeout_ms * 1000000ULL;
+
 		do_div(ticks, kbdev->js_data.scheduling_period_ns);
 		if (!ticks)
 			return 1;
@@ -2579,7 +2689,12 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
 }
 
 /**
- * set_js_timeouts - Store callback for the js_timeouts sysfs file.
+ * js_timeouts_store - Store callback for the js_timeouts sysfs file.
+ *
+ * @dev:	The device with sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes to write to the sysfs file
  *
  * This function is called to get the contents of the js_timeouts sysfs
  * file. This file contains five values separated by whitespace. The values
@@ -2592,14 +2707,9 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
  * use by the job scheduler to get override. Note that a value needs to
  * be other than 0 for it to override the current job scheduler value.
  *
- * @dev:	The device with sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The value written to the sysfs file
- * @count:	The number of bytes to write to the sysfs file
- *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_js_timeouts(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t js_timeouts_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
 	int items;
@@ -2679,19 +2789,20 @@ static unsigned long get_js_timeout_in_ms(
 }
 
 /**
- * show_js_timeouts - Show callback for the js_timeouts sysfs file.
+ * js_timeouts_show - Show callback for the js_timeouts sysfs file.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
  *
  * This function is called to get the contents of the js_timeouts sysfs
  * file. It returns the last set values written to the js_timeouts sysfs file.
  * If the file didn't get written yet, the values will be current setting in
  * use.
- * @dev:	The device this sysfs file is for
- * @attr:	The attributes of the sysfs file
- * @buf:	The output buffer for the sysfs file contents
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *attr, char * const buf)
+static ssize_t js_timeouts_show(struct device *dev, struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
 	ssize_t ret;
@@ -2754,7 +2865,7 @@ static ssize_t show_js_timeouts(struct device *dev, struct device_attribute *att
  * JS_RESET_TICKS_CL
  * JS_RESET_TICKS_DUMPING.
  */
-static DEVICE_ATTR(js_timeouts, S_IRUGO | S_IWUSR, show_js_timeouts, set_js_timeouts);
+static DEVICE_ATTR_RW(js_timeouts);
 
 static u32 get_new_js_timeout(
 		u32 old_period,
@@ -2762,12 +2873,13 @@ static u32 get_new_js_timeout(
 		u32 new_scheduling_period_ns)
 {
 	u64 ticks = (u64)old_period * (u64)old_ticks;
+
 	do_div(ticks, new_scheduling_period_ns);
 	return ticks?ticks:1;
 }
 
 /**
- * set_js_scheduling_period - Store callback for the js_scheduling_period sysfs
+ * js_scheduling_period_store - Store callback for the js_scheduling_period sysfs
  *                            file
  * @dev:   The device the sysfs file is for
  * @attr:  The attributes of the sysfs file
@@ -2780,7 +2892,7 @@ static u32 get_new_js_timeout(
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_js_scheduling_period(struct device *dev,
+static ssize_t js_scheduling_period_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
@@ -2849,7 +2961,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
 }
 
 /**
- * show_js_scheduling_period - Show callback for the js_scheduling_period sysfs
+ * js_scheduling_period_show - Show callback for the js_scheduling_period sysfs
  *                             entry.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
@@ -2860,7 +2972,7 @@ static ssize_t set_js_scheduling_period(struct device *dev,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_js_scheduling_period(struct device *dev,
+static ssize_t js_scheduling_period_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -2879,12 +2991,11 @@ static ssize_t show_js_scheduling_period(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(js_scheduling_period, S_IRUGO | S_IWUSR,
-		show_js_scheduling_period, set_js_scheduling_period);
+static DEVICE_ATTR_RW(js_scheduling_period);
 
 
 #ifdef CONFIG_MALI_DEBUG
-static ssize_t set_js_softstop_always(struct device *dev,
+static ssize_t js_softstop_always_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
@@ -2909,7 +3020,7 @@ static ssize_t set_js_softstop_always(struct device *dev,
 	return count;
 }
 
-static ssize_t show_js_softstop_always(struct device *dev,
+static ssize_t js_softstop_always_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -2936,7 +3047,7 @@ static ssize_t show_js_softstop_always(struct device *dev,
  * used for debug and unit-testing purposes.
  * (see CL t6xx_stress_1 unit-test as an example whereby this feature is used.)
  */
-static DEVICE_ATTR(js_softstop_always, S_IRUGO | S_IWUSR, show_js_softstop_always, set_js_softstop_always);
+static DEVICE_ATTR_RW(js_softstop_always);
 #endif /* CONFIG_MALI_DEBUG */
 #endif /* !MALI_USE_CSF */
 
@@ -2963,24 +3074,24 @@ static void kbasep_ktrace_dump_wrapper(struct kbase_device *kbdev)
 /* Debug commands supported by the driver */
 static const struct kbasep_debug_command debug_commands[] = {
 	{
-	 .str = "dumptrace",
-	 .func = &kbasep_ktrace_dump_wrapper,
-	 }
+		.str = "dumptrace",
+		.func = &kbasep_ktrace_dump_wrapper,
+	}
 };
 
 /**
- * show_debug - Show callback for the debug_command sysfs file.
- *
- * This function is called to get the contents of the debug_command sysfs
- * file. This is a list of the available debug commands, separated by newlines.
+ * debug_command_show - Show callback for the debug_command sysfs file.
  *
  * @dev:	The device this sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The output buffer for the sysfs file contents
  *
+ * This function is called to get the contents of the debug_command sysfs
+ * file. This is a list of the available debug commands, separated by newlines.
+ *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_debug(struct device *dev, struct device_attribute *attr, char * const buf)
+static ssize_t debug_command_show(struct device *dev, struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
 	int i;
@@ -3004,21 +3115,21 @@ static ssize_t show_debug(struct device *dev, struct device_attribute *attr, cha
 }
 
 /**
- * issue_debug - Store callback for the debug_command sysfs file.
- *
- * This function is called when the debug_command sysfs file is written to.
- * It matches the requested command against the available commands, and if
- * a matching command is found calls the associated function from
- * @debug_commands to issue the command.
+ * debug_command_store - Store callback for the debug_command sysfs file.
  *
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
  * @count:	The number of bytes written to the sysfs file
  *
+ * This function is called when the debug_command sysfs file is written to.
+ * It matches the requested command against the available commands, and if
+ * a matching command is found calls the associated function from
+ * @debug_commands to issue the command.
+ *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+static ssize_t debug_command_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
 	int i;
@@ -3046,11 +3157,11 @@ static ssize_t issue_debug(struct device *dev, struct device_attribute *attr, co
  * Reading it will produce a list of debug commands, separated by newlines.
  * Writing to it with one of those commands will issue said command.
  */
-static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
+static DEVICE_ATTR_RW(debug_command);
 #endif /* CONFIG_MALI_DEBUG */
 
 /**
- * kbase_show_gpuinfo - Show callback for the gpuinfo sysfs entry.
+ * gpuinfo_show - Show callback for the gpuinfo sysfs entry.
  * @dev: The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf: The output buffer to receive the GPU information.
@@ -3064,61 +3175,61 @@ static DEVICE_ATTR(debug_command, S_IRUGO | S_IWUSR, show_debug, issue_debug);
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t kbase_show_gpuinfo(struct device *dev,
+static ssize_t gpuinfo_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
 	static const struct gpu_product_id_name {
-		unsigned id;
+		unsigned int id;
 		char *name;
 	} gpu_product_id_names[] = {
-		{ .id = GPU_ID2_PRODUCT_TMIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TMIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G71" },
-		{ .id = GPU_ID2_PRODUCT_THEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_THEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G72" },
-		{ .id = GPU_ID2_PRODUCT_TSIX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TSIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G51" },
-		{ .id = GPU_ID2_PRODUCT_TNOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TNOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G76" },
-		{ .id = GPU_ID2_PRODUCT_TDVX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TDVX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G31" },
-		{ .id = GPU_ID2_PRODUCT_TGOX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TGOX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G52" },
-		{ .id = GPU_ID2_PRODUCT_TTRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TTRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G77" },
-		{ .id = GPU_ID2_PRODUCT_TBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G78" },
-		{ .id = GPU_ID2_PRODUCT_TBAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TBAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G78AE" },
-		{ .id = GPU_ID2_PRODUCT_LBEX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_LBEX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G68" },
-		{ .id = GPU_ID2_PRODUCT_TNAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TNAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G57" },
-		{ .id = GPU_ID2_PRODUCT_TODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G710" },
-		{ .id = GPU_ID2_PRODUCT_LODX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_LODX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G610" },
-		{ .id = GPU_ID2_PRODUCT_TGRX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TGRX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G510" },
-		{ .id = GPU_ID2_PRODUCT_TVAX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G310" },
-		{ .id = GPU_ID2_PRODUCT_TTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_TTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-TTUX" },
-		{ .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
+		{ .id = GPU_ID2_PRODUCT_LTUX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-LTUX" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
 	u32 gpu_id;
-	unsigned product_id, product_id_mask;
-	unsigned i;
+	unsigned int product_id, product_id_mask;
+	unsigned int i;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-	product_id_mask = GPU_ID2_PRODUCT_MODEL >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	product_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	product_id_mask = GPU_ID2_PRODUCT_MODEL >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	for (i = 0; i < ARRAY_SIZE(gpu_product_id_names); ++i) {
 		const struct gpu_product_id_name *p = &gpu_product_id_names[i];
@@ -3130,16 +3241,16 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 		}
 	}
 
-	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n",
-		product_name, kbdev->gpu_props.num_cores,
-		(gpu_id & GPU_ID_VERSION_MAJOR) >> GPU_ID_VERSION_MAJOR_SHIFT,
-		(gpu_id & GPU_ID_VERSION_MINOR) >> GPU_ID_VERSION_MINOR_SHIFT,
-		product_id);
+	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name,
+			 kbdev->gpu_props.num_cores,
+			 (gpu_id & GPU_ID_VERSION_MAJOR) >> KBASE_GPU_ID_VERSION_MAJOR_SHIFT,
+			 (gpu_id & GPU_ID_VERSION_MINOR) >> KBASE_GPU_ID_VERSION_MINOR_SHIFT,
+			 product_id);
 }
-static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
+static DEVICE_ATTR_RO(gpuinfo);
 
 /**
- * set_dvfs_period - Store callback for the dvfs_period sysfs file.
+ * dvfs_period_store - Store callback for the dvfs_period sysfs file.
  * @dev:   The device with sysfs file is for
  * @attr:  The attributes of the sysfs file
  * @buf:   The value written to the sysfs file
@@ -3150,7 +3261,7 @@ static DEVICE_ATTR(gpuinfo, S_IRUGO, kbase_show_gpuinfo, NULL);
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_dvfs_period(struct device *dev,
+static ssize_t dvfs_period_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
@@ -3175,7 +3286,7 @@ static ssize_t set_dvfs_period(struct device *dev,
 }
 
 /**
- * show_dvfs_period - Show callback for the dvfs_period sysfs entry.
+ * dvfs_period_show - Show callback for the dvfs_period sysfs entry.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf:  The output buffer to receive the GPU information.
@@ -3185,7 +3296,7 @@ static ssize_t set_dvfs_period(struct device *dev,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_dvfs_period(struct device *dev,
+static ssize_t dvfs_period_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -3200,11 +3311,10 @@ static ssize_t show_dvfs_period(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
-		set_dvfs_period);
+static DEVICE_ATTR_RW(dvfs_period);
 
 /**
- * set_pm_poweroff - Store callback for the pm_poweroff sysfs file.
+ * pm_poweroff_store - Store callback for the pm_poweroff sysfs file.
  * @dev:   The device with sysfs file is for
  * @attr:  The attributes of the sysfs file
  * @buf:   The value written to the sysfs file
@@ -3220,7 +3330,7 @@ static DEVICE_ATTR(dvfs_period, S_IRUGO | S_IWUSR, show_dvfs_period,
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_pm_poweroff(struct device *dev,
+static ssize_t pm_poweroff_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
@@ -3259,7 +3369,7 @@ static ssize_t set_pm_poweroff(struct device *dev,
 }
 
 /**
- * show_pm_poweroff - Show callback for the pm_poweroff sysfs entry.
+ * pm_poweroff_show - Show callback for the pm_poweroff sysfs entry.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf:  The output buffer to receive the GPU information.
@@ -3269,7 +3379,7 @@ static ssize_t set_pm_poweroff(struct device *dev,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_pm_poweroff(struct device *dev,
+static ssize_t pm_poweroff_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -3293,80 +3403,10 @@ static ssize_t show_pm_poweroff(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
-		set_pm_poweroff);
+static DEVICE_ATTR_RW(pm_poweroff);
 
-#if MALI_USE_CSF
 /**
- * set_idle_hysteresis_time - Store callback for CSF idle_hysteresis_time
- *                            sysfs file.
- * @dev:   The device with sysfs file is for
- * @attr:  The attributes of the sysfs file
- * @buf:   The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
- *
- * This function is called when the idle_hysteresis_time sysfs file is
- * written to.
- *
- * This file contains values of the idle idle hysteresis duration.
- *
- * Return: @count if the function succeeded. An error code on failure.
- */
-static ssize_t set_idle_hysteresis_time(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct kbase_device *kbdev;
-	u32 dur;
-
-	kbdev = to_kbase_device(dev);
-	if (!kbdev)
-		return -ENODEV;
-
-	if (kstrtou32(buf, 0, &dur)) {
-		dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n"
-				"Use format <idle_hysteresis_time>\n");
-		return -EINVAL;
-	}
-
-	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
-
-	return count;
-}
-
-/**
- * show_idle_hysteresis_time - Show callback for CSF idle_hysteresis_time
- *                             sysfs entry.
- * @dev:  The device this sysfs file is for.
- * @attr: The attributes of the sysfs file.
- * @buf:  The output buffer to receive the GPU information.
- *
- * This function is called to get the current idle hysteresis duration in ms.
- *
- * Return: The number of bytes output to @buf.
- */
-static ssize_t show_idle_hysteresis_time(struct device *dev,
-		struct device_attribute *attr, char * const buf)
-{
-	struct kbase_device *kbdev;
-	ssize_t ret;
-	u32 dur;
-
-	kbdev = to_kbase_device(dev);
-	if (!kbdev)
-		return -ENODEV;
-
-	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
-	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
-
-	return ret;
-}
-
-static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR,
-		show_idle_hysteresis_time, set_idle_hysteresis_time);
-#endif
-
-/**
- * set_reset_timeout - Store callback for the reset_timeout sysfs file.
+ * reset_timeout_store - Store callback for the reset_timeout sysfs file.
  * @dev:   The device with sysfs file is for
  * @attr:  The attributes of the sysfs file
  * @buf:   The value written to the sysfs file
@@ -3377,7 +3417,7 @@ static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR,
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_reset_timeout(struct device *dev,
+static ssize_t reset_timeout_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
@@ -3402,7 +3442,7 @@ static ssize_t set_reset_timeout(struct device *dev,
 }
 
 /**
- * show_reset_timeout - Show callback for the reset_timeout sysfs entry.
+ * reset_timeout_show - Show callback for the reset_timeout sysfs entry.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf:  The output buffer to receive the GPU information.
@@ -3411,7 +3451,7 @@ static ssize_t set_reset_timeout(struct device *dev,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_reset_timeout(struct device *dev,
+static ssize_t reset_timeout_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -3426,11 +3466,9 @@ static ssize_t show_reset_timeout(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(reset_timeout, S_IRUGO | S_IWUSR, show_reset_timeout,
-		set_reset_timeout);
+static DEVICE_ATTR_RW(reset_timeout);
 
-
-static ssize_t show_mem_pool_size(struct device *dev,
+static ssize_t mem_pool_size_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3443,7 +3481,7 @@ static ssize_t show_mem_pool_size(struct device *dev,
 		kbase_mem_pool_debugfs_size);
 }
 
-static ssize_t set_mem_pool_size(struct device *dev,
+static ssize_t mem_pool_size_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3459,10 +3497,9 @@ static ssize_t set_mem_pool_size(struct device *dev,
 	return err ? err : count;
 }
 
-static DEVICE_ATTR(mem_pool_size, S_IRUGO | S_IWUSR, show_mem_pool_size,
-		set_mem_pool_size);
+static DEVICE_ATTR_RW(mem_pool_size);
 
-static ssize_t show_mem_pool_max_size(struct device *dev,
+static ssize_t mem_pool_max_size_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3475,7 +3512,7 @@ static ssize_t show_mem_pool_max_size(struct device *dev,
 		kbase_mem_pool_debugfs_max_size);
 }
 
-static ssize_t set_mem_pool_max_size(struct device *dev,
+static ssize_t mem_pool_max_size_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3491,11 +3528,10 @@ static ssize_t set_mem_pool_max_size(struct device *dev,
 	return err ? err : count;
 }
 
-static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
-		set_mem_pool_max_size);
+static DEVICE_ATTR_RW(mem_pool_max_size);
 
 /**
- * show_lp_mem_pool_size - Show size of the large memory pages pool.
+ * lp_mem_pool_size_show - Show size of the large memory pages pool.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf:  The output buffer to receive the pool size.
@@ -3504,7 +3540,7 @@ static DEVICE_ATTR(mem_pool_max_size, S_IRUGO | S_IWUSR, show_mem_pool_max_size,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_lp_mem_pool_size(struct device *dev,
+static ssize_t lp_mem_pool_size_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3518,7 +3554,7 @@ static ssize_t show_lp_mem_pool_size(struct device *dev,
 }
 
 /**
- * set_lp_mem_pool_size - Set size of the large memory pages pool.
+ * lp_mem_pool_size_store - Set size of the large memory pages pool.
  * @dev:   The device this sysfs file is for.
  * @attr:  The attributes of the sysfs file.
  * @buf:   The value written to the sysfs file.
@@ -3529,7 +3565,7 @@ static ssize_t show_lp_mem_pool_size(struct device *dev,
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_lp_mem_pool_size(struct device *dev,
+static ssize_t lp_mem_pool_size_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3545,11 +3581,10 @@ static ssize_t set_lp_mem_pool_size(struct device *dev,
 	return err ? err : count;
 }
 
-static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
-		set_lp_mem_pool_size);
+static DEVICE_ATTR_RW(lp_mem_pool_size);
 
 /**
- * show_lp_mem_pool_max_size - Show maximum size of the large memory pages pool.
+ * lp_mem_pool_max_size_show - Show maximum size of the large memory pages pool.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf:  The output buffer to receive the pool size.
@@ -3558,7 +3593,7 @@ static DEVICE_ATTR(lp_mem_pool_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_size,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_lp_mem_pool_max_size(struct device *dev,
+static ssize_t lp_mem_pool_max_size_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3572,7 +3607,7 @@ static ssize_t show_lp_mem_pool_max_size(struct device *dev,
 }
 
 /**
- * set_lp_mem_pool_max_size - Set maximum size of the large memory pages pool.
+ * lp_mem_pool_max_size_store - Set maximum size of the large memory pages pool.
  * @dev:   The device this sysfs file is for.
  * @attr:  The attributes of the sysfs file.
  * @buf:   The value written to the sysfs file.
@@ -3582,7 +3617,7 @@ static ssize_t show_lp_mem_pool_max_size(struct device *dev,
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_lp_mem_pool_max_size(struct device *dev,
+static ssize_t lp_mem_pool_max_size_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *const kbdev = to_kbase_device(dev);
@@ -3598,8 +3633,7 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev,
 	return err ? err : count;
 }
 
-static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
-		set_lp_mem_pool_max_size);
+static DEVICE_ATTR_RW(lp_mem_pool_max_size);
 
 /**
  * show_simplified_mem_pool_max_size - Show the maximum size for the memory
@@ -3800,7 +3834,7 @@ static DEVICE_ATTR(ctx_default_max_size, 0600,
 
 #if !MALI_USE_CSF
 /**
- * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
+ * js_ctx_scheduling_mode_show - Show callback for js_ctx_scheduling_mode sysfs
  *                               entry.
  * @dev:  The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
@@ -3810,7 +3844,7 @@ static DEVICE_ATTR(ctx_default_max_size, 0600,
  *
  * Return: The number of bytes output to @buf.
  */
-static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
+static ssize_t js_ctx_scheduling_mode_show(struct device *dev,
 		struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
@@ -3823,7 +3857,7 @@ static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
 }
 
 /**
- * set_js_ctx_scheduling_mode - Set callback for js_ctx_scheduling_mode sysfs
+ * js_ctx_scheduling_mode_store - Set callback for js_ctx_scheduling_mode sysfs
  *                              entry.
  * @dev:   The device this sysfs file is for.
  * @attr:  The attributes of the sysfs file.
@@ -3836,7 +3870,7 @@ static ssize_t show_js_ctx_scheduling_mode(struct device *dev,
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
-static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
+static ssize_t js_ctx_scheduling_mode_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_context *kctx;
@@ -3878,9 +3912,7 @@ static ssize_t set_js_ctx_scheduling_mode(struct device *dev,
 	return count;
 }
 
-static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
-		show_js_ctx_scheduling_mode,
-		set_js_ctx_scheduling_mode);
+static DEVICE_ATTR_RW(js_ctx_scheduling_mode);
 
 /* Number of entries in serialize_jobs_settings[] */
 #define NR_SERIALIZE_JOBS_SETTINGS 5
@@ -3904,15 +3936,15 @@ static struct
  * update_serialize_jobs_setting - Update the serialization setting for the
  *                                 submission of GPU jobs.
  *
- * This function is called when the serialize_jobs sysfs/debugfs file is
- * written to. It matches the requested setting against the available settings
- * and if a matching setting is found updates kbdev->serialize_jobs.
- *
  * @kbdev:  An instance of the GPU platform device, allocated from the probe
  *          method of the driver.
  * @buf:    Buffer containing the value written to the sysfs/debugfs file.
  * @count:  The number of bytes to write to the sysfs/debugfs file.
  *
+ * This function is called when the serialize_jobs sysfs/debugfs file is
+ * written to. It matches the requested setting against the available settings
+ * and if a matching setting is found updates kbdev->serialize_jobs.
+ *
  * Return: @count if the function succeeded. An error code on failure.
  */
 static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev,
@@ -4034,14 +4066,14 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
 /**
  * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file.
  *
- * This function is called to get the contents of the serialize_jobs sysfs
- * file. This is a list of the available settings with the currently active
- * one surrounded by square brackets.
- *
  * @dev:	The device this sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The output buffer for the sysfs file contents
  *
+ * This function is called to get the contents of the serialize_jobs sysfs
+ * file. This is a list of the available settings with the currently active
+ * one surrounded by square brackets.
+ *
  * Return: The number of bytes output to @buf.
  */
 static ssize_t show_serialize_jobs_sysfs(struct device *dev,
@@ -4076,15 +4108,15 @@ static ssize_t show_serialize_jobs_sysfs(struct device *dev,
 /**
  * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file.
  *
- * This function is called when the serialize_jobs sysfs file is written to.
- * It matches the requested setting against the available settings and if a
- * matching setting is found updates kbdev->serialize_jobs.
- *
  * @dev:	The device this sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
  * @count:	The number of bytes to write to the sysfs file
  *
+ * This function is called when the serialize_jobs sysfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
  * Return: @count if the function succeeded. An error code on failure.
  */
 static ssize_t store_serialize_jobs_sysfs(struct device *dev,
@@ -4364,8 +4396,8 @@ int kbase_device_pm_init(struct kbase_device *kbdev)
 			kbase_pm_register_access_enable(kbdev);
 			gpu_id = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
 			kbase_pm_register_access_disable(kbdev);
-			product_id = KBASE_UBFX32(gpu_id,
-				GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16);
+			product_id =
+				KBASE_UBFX32(gpu_id, KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, 16);
 			gpu_model_id = GPU_ID2_MODEL_MATCH_VALUE(product_id);
 
 			if (gpu_model_id != GPU_ID2_PRODUCT_TGOX
@@ -4417,7 +4449,7 @@ int power_control_init(struct kbase_device *kbdev)
 	int err = 0;
 	unsigned int i;
 #if defined(CONFIG_REGULATOR)
-	static const char *regulator_names[] = {
+	static const char * const regulator_names[] = {
 		"mali", "shadercores"
 	};
 	BUILD_BUG_ON(ARRAY_SIZE(regulator_names) < BASE_MAX_NR_CLOCKS_REGULATORS);
@@ -4787,7 +4819,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 			&kbase_device_debugfs_mem_pool_max_size_fops);
 
 	if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PROTECTED_DEBUG_MODE)) {
-		debugfs_create_file("protected_debug_mode", S_IRUGO,
+		debugfs_create_file("protected_debug_mode", 0444,
 				kbdev->mali_debugfs_directory, kbdev,
 				&fops_protected_debug_mode);
 	}
@@ -4806,7 +4838,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 #endif /* CONFIG_MALI_DEVFREQ */
 
 #if !MALI_USE_CSF
-	debugfs_create_file("serialize_jobs", S_IRUGO | S_IWUSR,
+	debugfs_create_file("serialize_jobs", 0644,
 			kbdev->mali_debugfs_directory, kbdev,
 			&kbasep_serialize_jobs_debugfs_fops);
 
@@ -4832,12 +4864,13 @@ int kbase_device_coherency_init(struct kbase_device *kbdev)
 	u32 supported_coherency_bitmap =
 		kbdev->gpu_props.props.raw_props.coherency_mode;
 	const void *coherency_override_dts;
+	bool dma_coherent;
 	u32 override_coherency, gpu_id;
 	unsigned int prod_id;
 
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
 	gpu_id &= GPU_ID_VERSION_PRODUCT_ID;
-	prod_id = gpu_id >> GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+	prod_id = gpu_id >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 	/* Only for tMIx :
 	 * (COHERENCY_ACE_LITE | COHERENCY_ACE) was incorrectly
@@ -4856,12 +4889,23 @@ int kbase_device_coherency_init(struct kbase_device *kbdev)
 
 	/* device tree may override the coherency */
 #if IS_ENABLED(CONFIG_OF)
+	/* treat "dma-coherency" as a synonym for ACE-lite */
+	dma_coherent = of_dma_is_coherent(kbdev->dev->of_node);
 	coherency_override_dts = of_get_property(kbdev->dev->of_node,
 						"system-coherency",
 						NULL);
-	if (coherency_override_dts) {
-
-		override_coherency = be32_to_cpup(coherency_override_dts);
+	if (coherency_override_dts || dma_coherent) {
+		if (coherency_override_dts) {
+			override_coherency = be32_to_cpup(coherency_override_dts);
+			if (dma_coherent && override_coherency != COHERENCY_ACE_LITE) {
+				dev_err(kbdev->dev,
+					"system-coherency needs to be 0 when dma-coherent is set\n");
+				return -EINVAL;
+			}
+		} else {
+			/* dma-coherent set and system-coherency not specified */
+			override_coherency = COHERENCY_ACE_LITE;
+		}
 
 #if MALI_USE_CSF && !IS_ENABLED(CONFIG_MALI_NO_MALI)
 		/* ACE coherency mode is not supported by Driver on CSF GPUs.
@@ -4969,8 +5013,7 @@ static ssize_t csg_scheduling_period_show(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(csg_scheduling_period, 0644, csg_scheduling_period_show,
-		   csg_scheduling_period_store);
+static DEVICE_ATTR_RW(csg_scheduling_period);
 
 /**
  * fw_timeout_store - Store callback for the fw_timeout sysfs file.
@@ -5038,7 +5081,73 @@ static ssize_t fw_timeout_show(struct device *dev,
 	return ret;
 }
 
-static DEVICE_ATTR(fw_timeout, 0644, fw_timeout_show, fw_timeout_store);
+static DEVICE_ATTR_RW(fw_timeout);
+
+/**
+ * idle_hysteresis_time_store - Store callback for CSF idle_hysteresis_time
+ *                            sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the idle_hysteresis_time sysfs file is
+ * written to.
+ *
+ * This file contains values of the idle hysteresis duration.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t idle_hysteresis_time_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u32 dur = 0;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kstrtou32(buf, 0, &dur)) {
+		dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n"
+				"Use format <idle_hysteresis_time>\n");
+		return -EINVAL;
+	}
+
+	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+
+	return count;
+}
+
+/**
+ * idle_hysteresis_time_show - Show callback for CSF idle_hysteresis_time
+ *                             sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current idle hysteresis duration in ms.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t idle_hysteresis_time_show(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RW(idle_hysteresis_time);
 #endif /* MALI_USE_CSF */
 
 static struct attribute *kbase_scheduling_attrs[] = {
@@ -5062,15 +5171,13 @@ static struct attribute *kbase_attrs[] = {
 	&dev_attr_gpuinfo.attr,
 	&dev_attr_dvfs_period.attr,
 	&dev_attr_pm_poweroff.attr,
-#if MALI_USE_CSF
-	&dev_attr_idle_hysteresis_time.attr,
-#endif
 	&dev_attr_reset_timeout.attr,
 #if !MALI_USE_CSF
 	&dev_attr_js_scheduling_period.attr,
 #else
 	&dev_attr_csg_scheduling_period.attr,
 	&dev_attr_fw_timeout.attr,
+	&dev_attr_idle_hysteresis_time.attr,
 #endif /* !MALI_USE_CSF */
 	&dev_attr_power_policy.attr,
 	&dev_attr_core_mask.attr,
@@ -5233,10 +5340,10 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 /**
  * kbase_device_suspend - Suspend callback from the OS.
  *
- * This is called by Linux when the device should suspend.
- *
  * @dev:  The device to suspend
  *
+ * This is called by Linux when the device should suspend.
+ *
  * Return: A standard Linux error code on failure, 0 otherwise.
  */
 static int kbase_device_suspend(struct device *dev)
@@ -5268,10 +5375,10 @@ static int kbase_device_suspend(struct device *dev)
 /**
  * kbase_device_resume - Resume callback from the OS.
  *
- * This is called by Linux when the device should resume from suspension.
- *
  * @dev:  The device to resume
  *
+ * This is called by Linux when the device should resume from suspension.
+ *
  * Return: A standard Linux error code
  */
 static int kbase_device_resume(struct device *dev)
@@ -5303,12 +5410,12 @@ static int kbase_device_resume(struct device *dev)
 /**
  * kbase_device_runtime_suspend - Runtime suspend callback from the OS.
  *
+ * @dev:  The device to suspend
+ *
  * This is called by Linux when the device should prepare for a condition in
  * which it will not be able to communicate with the CPU(s) and RAM due to
  * power management.
  *
- * @dev:  The device to suspend
- *
  * Return: A standard Linux error code
  */
 #ifdef KBASE_PM_RUNTIME
@@ -5349,10 +5456,10 @@ static int kbase_device_runtime_suspend(struct device *dev)
 /**
  * kbase_device_runtime_resume - Runtime resume callback from the OS.
  *
- * This is called by Linux when the device should go into a fully active state.
- *
  * @dev:  The device to suspend
  *
+ * This is called by Linux when the device should go into a fully active state.
+ *
  * Return: A standard Linux error code
  */
 
@@ -5520,7 +5627,7 @@ void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value)
 	trace_mali_page_fault_insert_pages(dev_id, event, value);
 }
 
-void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event)
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event)
 {
 	trace_mali_total_alloc_pages_change(dev_id, event);
 }
diff --git a/mali_kbase/mali_kbase_ctx_sched.h b/mali_kbase/mali_kbase_ctx_sched.h
index 334724f..f787cc3 100644
--- a/mali_kbase/mali_kbase_ctx_sched.h
+++ b/mali_kbase/mali_kbase_ctx_sched.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2017-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -79,7 +79,7 @@ void kbase_ctx_sched_term(struct kbase_device *kbdev);
 int kbase_ctx_sched_retain_ctx(struct kbase_context *kctx);
 
 /**
- * kbase_ctx_sched_retain_ctx_refcount
+ * kbase_ctx_sched_retain_ctx_refcount - Retain a reference to the @ref kbase_context
  * @kctx: The context to which to retain a reference
  *
  * This function only retains a reference to the context. It must be called
@@ -187,8 +187,8 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_nolock(
  * @kctx: Context to be refcounted
  *
  * The following locks must be held by the caller:
- * * kbase_device::mmu_hw_mutex
- * * kbase_device::hwaccess_lock
+ * &kbase_device.mmu_hw_mutex
+ * &kbase_device.hwaccess_lock
  *
  * Return: true if refcount succeeded, and the context will not be scheduled
  * out, false if the refcount failed (because the context is being/has been
diff --git a/mali_kbase/mali_kbase_debug_job_fault.h b/mali_kbase/mali_kbase_debug_job_fault.h
index 39aeed0..059d9c4 100644
--- a/mali_kbase/mali_kbase_debug_job_fault.h
+++ b/mali_kbase/mali_kbase_debug_job_fault.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,7 +53,7 @@ void kbase_debug_job_fault_dev_term(struct kbase_device *kbdev);
  * kbase_debug_job_fault_context_init - Initialize the relevant
  *		data structure per context
  * @kctx: KBase context pointer
- * @return 0 on success
+ * Return: 0 on success
  */
 int kbase_debug_job_fault_context_init(struct kbase_context *kctx);
 
@@ -68,39 +68,42 @@ void kbase_debug_job_fault_context_term(struct kbase_context *kctx);
  * kbase_debug_job_fault_kctx_unblock - Unblock the atoms blocked on job fault
  *					dumping on context termination.
  *
+ * @kctx: KBase context pointer
+ *
  * This function is called during context termination to unblock the atom for
  * which the job fault occurred and also the atoms following it. This is needed
  * otherwise the wait for zero jobs could timeout (leading to an assertion
  * failure, kernel panic in debug builds) in the pathological case where
  * although the thread/daemon capturing the job fault events is running,
  * but for some reasons has stopped consuming the events.
- *
- * @kctx: KBase context pointer
  */
 void kbase_debug_job_fault_kctx_unblock(struct kbase_context *kctx);
 
 /**
  * kbase_debug_job_fault_process - Process the failed job.
- *      It will send a event and wake up the job fault waiting queue
- *      Then create a work queue to wait for job dump finish
- *      This function should be called in the interrupt handler and before
- *      jd_done that make sure the jd_done_worker will be delayed until the
- *      job dump finish
+ *
  * @katom: The failed atom pointer
  * @completion_code: the job status
- * @return true if dump is going on
+ *
+ * It will send a event and wake up the job fault waiting queue
+ * Then create a work queue to wait for job dump finish
+ * This function should be called in the interrupt handler and before
+ * jd_done that make sure the jd_done_worker will be delayed until the
+ * job dump finish
+ *
+ * Return: true if dump is going on
  */
 bool kbase_debug_job_fault_process(struct kbase_jd_atom *katom,
 		u32 completion_code);
 
-
 /**
  * kbase_debug_job_fault_reg_snapshot_init - Set the interested registers
  *      address during the job fault process, the relevant registers will
  *      be saved when a job fault happen
  * @kctx: KBase context pointer
  * @reg_range: Maximum register address space
- * @return true if initializing successfully
+ *
+ * Return: true if initializing successfully
  */
 bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 		int reg_range);
@@ -108,8 +111,10 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 /**
  * kbase_job_fault_get_reg_snapshot - Read the interested registers for
  *      failed job dump
+ *
  * @kctx: KBase context pointer
- * @return true if getting registers successfully
+ *
+ * Return: true if getting registers successfully
  */
 bool kbase_job_fault_get_reg_snapshot(struct kbase_context *kctx);
 
diff --git a/mali_kbase/mali_kbase_debug_mem_view.c b/mali_kbase/mali_kbase_debug_mem_view.c
index 5a99b5e..ce87a00 100644
--- a/mali_kbase/mali_kbase_debug_mem_view.c
+++ b/mali_kbase/mali_kbase_debug_mem_view.c
@@ -31,6 +31,22 @@
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
+#define SHOW_GPU_MEM_DATA(type, format)                                      \
+{                                                                            \
+	unsigned int i, j;                                                   \
+	const type *ptr = (type *)cpu_addr;                                  \
+	const unsigned int col_width = sizeof(type);                         \
+	const unsigned int row_width = (col_width == sizeof(u64)) ? 32 : 16; \
+	const unsigned int num_cols = row_width / col_width;                 \
+	for (i = 0; i < PAGE_SIZE; i += row_width) {                         \
+		seq_printf(m, "%016llx:", gpu_addr + i);                     \
+		for (j = 0; j < num_cols; j++)                               \
+			seq_printf(m, format, ptr[j]);                       \
+		ptr += num_cols;                                             \
+		seq_putc(m, '\n');                                           \
+	}                                                                    \
+}
+
 struct debug_mem_mapping {
 	struct list_head node;
 
@@ -44,6 +60,7 @@ struct debug_mem_mapping {
 struct debug_mem_data {
 	struct list_head mapping_list;
 	struct kbase_context *kctx;
+	unsigned int column_width;
 };
 
 struct debug_mem_seq_off {
@@ -111,9 +128,9 @@ static int debug_mem_show(struct seq_file *m, void *v)
 	struct debug_mem_data *mem_data = m->private;
 	struct debug_mem_seq_off *data = v;
 	struct debug_mem_mapping *map;
-	int i, j;
+	unsigned long long gpu_addr;
 	struct page *page;
-	uint32_t *mapping;
+	void *cpu_addr;
 	pgprot_t prot = PAGE_KERNEL;
 
 	map = list_entry(data->lh, struct debug_mem_mapping, node);
@@ -130,20 +147,33 @@ static int debug_mem_show(struct seq_file *m, void *v)
 		prot = pgprot_writecombine(prot);
 
 	page = as_page(map->alloc->pages[data->offset]);
-	mapping = vmap(&page, 1, VM_MAP, prot);
-	if (!mapping)
+	cpu_addr = vmap(&page, 1, VM_MAP, prot);
+	if (!cpu_addr)
 		goto out;
 
-	for (i = 0; i < PAGE_SIZE; i += 4*sizeof(*mapping)) {
-		seq_printf(m, "%016llx:", i + ((map->start_pfn +
-				data->offset) << PAGE_SHIFT));
+	gpu_addr = (map->start_pfn + data->offset) << PAGE_SHIFT;
 
-		for (j = 0; j < 4*sizeof(*mapping); j += sizeof(*mapping))
-			seq_printf(m, " %08x", mapping[(i+j)/sizeof(*mapping)]);
-		seq_putc(m, '\n');
+	/* Cases for 4 supported values of column_width for showing
+	 * the GPU memory contents.
+	 */
+	switch (mem_data->column_width) {
+	case 1:
+		SHOW_GPU_MEM_DATA(u8, " %02hhx");
+		break;
+	case 2:
+		SHOW_GPU_MEM_DATA(u16, " %04hx");
+		break;
+	case 4:
+		SHOW_GPU_MEM_DATA(u32, " %08x");
+		break;
+	case 8:
+		SHOW_GPU_MEM_DATA(u64, " %016llx");
+		break;
+	default:
+		dev_warn(mem_data->kctx->kbdev->dev, "Unexpected column width");
 	}
 
-	vunmap(mapping);
+	vunmap(cpu_addr);
 
 	seq_putc(m, '\n');
 
@@ -207,6 +237,14 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	if (get_file_rcu(kctx->filp) == 0)
 		return -ENOENT;
 
+	/* Check if file was opened in write mode. GPU memory contents
+	 * are returned only when the file is not opened in write mode.
+	 */
+	if (file->f_mode & FMODE_WRITE) {
+		file->private_data = kctx;
+		return 0;
+	}
+
 	ret = seq_open(file, &ops);
 	if (ret)
 		goto open_fail;
@@ -223,6 +261,8 @@ static int debug_mem_open(struct inode *i, struct file *file)
 
 	kbase_gpu_vm_lock(kctx);
 
+	mem_data->column_width = kctx->mem_view_column_width;
+
 	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
 	if (ret != 0) {
 		kbase_gpu_vm_unlock(kctx);
@@ -241,6 +281,20 @@ static int debug_mem_open(struct inode *i, struct file *file)
 		goto out;
 	}
 
+#if MALI_USE_CSF
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
+	if (ret != 0) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+
+	ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
+	if (ret != 0) {
+		kbase_gpu_vm_unlock(kctx);
+		goto out;
+	}
+#endif
+
 	kbase_gpu_vm_unlock(kctx);
 
 	((struct seq_file *)file->private_data)->private = mem_data;
@@ -270,32 +324,70 @@ open_fail:
 static int debug_mem_release(struct inode *inode, struct file *file)
 {
 	struct kbase_context *const kctx = inode->i_private;
-	struct seq_file *sfile = file->private_data;
-	struct debug_mem_data *mem_data = sfile->private;
-	struct debug_mem_mapping *mapping;
 
-	seq_release(inode, file);
+	/* If the file wasn't opened in write mode, then release the
+	 * memory allocated to show the GPU memory contents.
+	 */
+	if (!(file->f_mode & FMODE_WRITE)) {
+		struct seq_file *sfile = file->private_data;
+		struct debug_mem_data *mem_data = sfile->private;
+		struct debug_mem_mapping *mapping;
+
+		seq_release(inode, file);
 
-	while (!list_empty(&mem_data->mapping_list)) {
-		mapping = list_first_entry(&mem_data->mapping_list,
+		while (!list_empty(&mem_data->mapping_list)) {
+			mapping = list_first_entry(&mem_data->mapping_list,
 				struct debug_mem_mapping, node);
-		kbase_mem_phy_alloc_put(mapping->alloc);
-		list_del(&mapping->node);
-		kfree(mapping);
-	}
+			kbase_mem_phy_alloc_put(mapping->alloc);
+			list_del(&mapping->node);
+			kfree(mapping);
+		}
 
-	kfree(mem_data);
+		kfree(mem_data);
+	}
 
 	fput(kctx->filp);
 
 	return 0;
 }
 
+static ssize_t debug_mem_write(struct file *file, const char __user *ubuf,
+			       size_t count, loff_t *ppos)
+{
+	struct kbase_context *const kctx = file->private_data;
+	unsigned int column_width = 0;
+	int ret = 0;
+
+	CSTD_UNUSED(ppos);
+
+	ret = kstrtouint_from_user(ubuf, count, 0, &column_width);
+
+	if (ret)
+		return ret;
+	if (!is_power_of_2(column_width)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Column width %u not a multiple of power of 2", column_width);
+		return  -EINVAL;
+	}
+	if (column_width > 8) {
+		dev_dbg(kctx->kbdev->dev,
+			"Column width %u greater than 8 not supported", column_width);
+		return  -EINVAL;
+	}
+
+	kbase_gpu_vm_lock(kctx);
+	kctx->mem_view_column_width = column_width;
+	kbase_gpu_vm_unlock(kctx);
+
+	return count;
+}
+
 static const struct file_operations kbase_debug_mem_view_fops = {
 	.owner = THIS_MODULE,
 	.open = debug_mem_open,
 	.release = debug_mem_release,
 	.read = seq_read,
+	.write = debug_mem_write,
 	.llseek = seq_lseek
 };
 
@@ -308,6 +400,9 @@ void kbase_debug_mem_view_init(struct kbase_context *const kctx)
 		WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
 		return;
 
+	/* Default column width is 4 */
+	kctx->mem_view_column_width = sizeof(u32);
+
 	debugfs_create_file("mem_view", 0400, kctx->kctx_dentry, kctx,
 			&kbase_debug_mem_view_fops);
 }
diff --git a/mali_kbase/mali_kbase_debugfs_helper.c b/mali_kbase/mali_kbase_debugfs_helper.c
index 973739f..4c1aa28 100644
--- a/mali_kbase/mali_kbase_debugfs_helper.c
+++ b/mali_kbase/mali_kbase_debugfs_helper.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,20 +34,20 @@
 /**
  * set_attr_from_string - Parse a string to set elements of an array
  *
- * This is the core of the implementation of
- * kbase_debugfs_helper_set_attr_from_string. The only difference between the
- * two functions is that this one requires the input string to be writable.
- *
  * @buf:         Input string to parse. Must be nul-terminated!
  * @array:       Address of an object that can be accessed like an array.
  * @nelems:      Number of elements in the array.
  * @set_attr_fn: Function to be called back for each array element.
  *
+ * This is the core of the implementation of
+ * kbase_debugfs_helper_set_attr_from_string. The only difference between the
+ * two functions is that this one requires the input string to be writable.
+ *
  * Return: 0 if success, negative error code otherwise.
  */
 static int
 set_attr_from_string(char *const buf, void *const array, size_t const nelems,
-		     kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+		     kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	size_t index, err = 0;
 	char *ptr = buf;
@@ -143,7 +143,7 @@ int kbase_debugfs_string_validator(char *const buf)
 
 int kbase_debugfs_helper_set_attr_from_string(
 	const char *const buf, void *const array, size_t const nelems,
-	kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+	kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	char *const wbuf = kstrdup(buf, GFP_KERNEL);
 	int err = 0;
@@ -168,7 +168,7 @@ int kbase_debugfs_helper_set_attr_from_string(
 ssize_t kbase_debugfs_helper_get_attr_to_string(
 	char *const buf, size_t const size, void *const array,
 	size_t const nelems,
-	kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
+	kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
 {
 	ssize_t total = 0;
 	size_t index;
@@ -189,7 +189,7 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
 int kbase_debugfs_helper_seq_write(
 	struct file *const file, const char __user *const ubuf,
 	size_t const count, size_t const nelems,
-	kbase_debugfs_helper_set_attr_fn *const set_attr_fn)
+	kbase_debugfs_helper_set_attr_fn * const set_attr_fn)
 {
 	const struct seq_file *const sfile = file->private_data;
 	void *const array = sfile->private;
@@ -228,8 +228,8 @@ int kbase_debugfs_helper_seq_write(
 }
 
 int kbase_debugfs_helper_seq_read(
-	struct seq_file *const sfile, size_t const nelems,
-	kbase_debugfs_helper_get_attr_fn *const get_attr_fn)
+	struct seq_file * const sfile, size_t const nelems,
+	kbase_debugfs_helper_get_attr_fn * const get_attr_fn)
 {
 	void *const array = sfile->private;
 	size_t index;
diff --git a/mali_kbase/mali_kbase_debugfs_helper.h b/mali_kbase/mali_kbase_debugfs_helper.h
index 4c69d8b..cbb24d6 100644
--- a/mali_kbase/mali_kbase_debugfs_helper.h
+++ b/mali_kbase/mali_kbase_debugfs_helper.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,6 +37,11 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
  * kbase_debugfs_helper_set_attr_from_string - Parse a string to reconfigure an
  *                                             array
  *
+ * @buf:         Input string to parse. Must be nul-terminated!
+ * @array:       Address of an object that can be accessed like an array.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
  * The given function is called once for each attribute value found in the
  * input string. It is not an error if the string specifies fewer attribute
  * values than the specified number of array elements.
@@ -46,11 +51,6 @@ typedef void kbase_debugfs_helper_set_attr_fn(void *array, size_t index,
  * Attribute values are separated by one or more space characters.
  * Additional leading and trailing spaces are ignored.
  *
- * @buf:         Input string to parse. Must be nul-terminated!
- * @array:       Address of an object that can be accessed like an array.
- * @nelems:      Number of elements in the array.
- * @set_attr_fn: Function to be called back for each array element.
- *
  * Return: 0 if success, negative error code otherwise.
  */
 int kbase_debugfs_helper_set_attr_from_string(
@@ -62,6 +62,8 @@ int kbase_debugfs_helper_set_attr_from_string(
  *                                  debugfs file for any incorrect formats
  *                                  or wrong values.
  *
+ * @buf: Null-terminated string to validate.
+ *
  * This function is to be used before any writes to debugfs values are done
  * such that any strings with erroneous values (such as octal 09 or
  * hexadecimal 0xGH are fully ignored) - without this validation, any correct
@@ -73,8 +75,6 @@ int kbase_debugfs_helper_set_attr_from_string(
  * of the input string. This function also requires the input string to be
  * writable.
  *
- * @buf: Null-terminated string to validate.
- *
  * Return: 0 with no error, else -22 (the invalid return value of kstrtoul) if
  *         any value in the string was wrong or with an incorrect format.
  */
@@ -95,17 +95,17 @@ typedef size_t kbase_debugfs_helper_get_attr_fn(void *array, size_t index);
  * kbase_debugfs_helper_get_attr_to_string - Construct a formatted string
  *                                           from elements in an array
  *
- * The given function is called once for each array element to get the
- * value of the attribute to be inspected. The attribute values are
- * written to the buffer as a formatted string of decimal numbers
- * separated by spaces and terminated by a linefeed.
- *
  * @buf:         Buffer in which to store the formatted output string.
  * @size:        The size of the buffer, in bytes.
  * @array:       Address of an object that can be accessed like an array.
  * @nelems:      Number of elements in the array.
  * @get_attr_fn: Function to be called back for each array element.
  *
+ * The given function is called once for each array element to get the
+ * value of the attribute to be inspected. The attribute values are
+ * written to the buffer as a formatted string of decimal numbers
+ * separated by spaces and terminated by a linefeed.
+ *
  * Return: Number of characters written excluding the nul terminator.
  */
 ssize_t kbase_debugfs_helper_get_attr_to_string(
@@ -116,6 +116,10 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
  * kbase_debugfs_helper_seq_read - Implements reads from a virtual file for an
  *                                 array
  *
+ * @sfile:       A virtual file previously opened by calling single_open.
+ * @nelems:      Number of elements in the array.
+ * @get_attr_fn: Function to be called back for each array element.
+ *
  * The virtual file must have been opened by calling single_open and passing
  * the address of an object that can be accessed like an array.
  *
@@ -124,10 +128,6 @@ ssize_t kbase_debugfs_helper_get_attr_to_string(
  * written to the buffer as a formatted string of decimal numbers
  * separated by spaces and terminated by a linefeed.
  *
- * @sfile:       A virtual file previously opened by calling single_open.
- * @nelems:      Number of elements in the array.
- * @get_attr_fn: Function to be called back for each array element.
- *
  * Return: 0 if success, negative error code otherwise.
  */
 int kbase_debugfs_helper_seq_read(
@@ -138,6 +138,12 @@ int kbase_debugfs_helper_seq_read(
  * kbase_debugfs_helper_seq_write - Implements writes to a virtual file for an
  *                                  array
  *
+ * @file:        A virtual file previously opened by calling single_open.
+ * @ubuf:        Source address in user space.
+ * @count:       Number of bytes written to the virtual file.
+ * @nelems:      Number of elements in the array.
+ * @set_attr_fn: Function to be called back for each array element.
+ *
  * The virtual file must have been opened by calling single_open and passing
  * the address of an object that can be accessed like an array.
  *
@@ -145,12 +151,6 @@ int kbase_debugfs_helper_seq_read(
  * data written to the virtual file. For further details, refer to the
  * description of set_attr_from_string.
  *
- * @file:        A virtual file previously opened by calling single_open.
- * @ubuf:        Source address in user space.
- * @count:       Number of bytes written to the virtual file.
- * @nelems:      Number of elements in the array.
- * @set_attr_fn: Function to be called back for each array element.
- *
  * Return: 0 if success, negative error code otherwise.
  */
 int kbase_debugfs_helper_seq_write(struct file *file,
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 86e4042..b19f0f9 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,11 +35,15 @@
 #include <backend/gpu/mali_kbase_instr_defs.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_gpuprops_types.h>
+#include <mali_kbase_hwcnt_watchdog_if.h>
+
 #if MALI_USE_CSF
 #include <mali_kbase_hwcnt_backend_csf.h>
 #else
 #include <mali_kbase_hwcnt_backend_jm.h>
+#include <mali_kbase_hwcnt_backend_jm_watchdog.h>
 #endif
+
 #include <protected_mode_switcher.h>
 
 #include <linux/atomic.h>
@@ -77,7 +81,7 @@
 #define RESET_TIMEOUT           500
 
 /**
- * The maximum number of Job Slots to support in the Hardware.
+ * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
  *
  * You can optimize this down if your target devices will only ever support a
  * small number of job slots.
@@ -85,7 +89,7 @@
 #define BASE_JM_MAX_NR_SLOTS        3
 
 /**
- * The maximum number of Address Spaces to support in the Hardware.
+ * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware.
  *
  * You can optimize this down if your target devices will only ever support a
  * small number of Address Spaces
@@ -105,19 +109,19 @@
 #define KBASEP_AS_NR_INVALID     (-1)
 
 /**
- * Maximum size in bytes of a MMU lock region, as a logarithm
+ * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region,
+ *                                   as a logarithm
  */
 #define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /*  256 TB */
 
 /**
- * Minimum size in bytes of a MMU lock region, as a logarithm
- */
-#define KBASE_LOCK_REGION_MIN_SIZE_LOG2 (15) /* 32 kB */
-
-/**
- * Maximum number of GPU memory region zones
+ * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones
  */
+#if MALI_USE_CSF
+#define KBASE_REG_ZONE_MAX 6ul
+#else
 #define KBASE_REG_ZONE_MAX 4ul
+#endif
 
 #include "mali_kbase_hwaccess_defs.h"
 
@@ -241,9 +245,10 @@ struct kbase_fault {
 
 /**
  * struct kbase_mmu_table  - object representing a set of GPU page tables
- * @mmu_teardown_pages:   Buffer of 4 Pages in size, used to cache the entries
- *                        of top & intermediate level page tables to avoid
- *                        repeated calls to kmap_atomic during the MMU teardown.
+ * @mmu_teardown_pages:   Array containing pointers to 3 separate pages, used
+ *                        to cache the entries of top (L0) & intermediate level
+ *                        page tables (L1 & L2) to avoid repeated calls to
+ *                        kmap_atomic() during the MMU teardown.
  * @mmu_lock:             Lock to serialize the accesses made to multi level GPU
  *                        page tables
  * @pgd:                  Physical address of the page allocated for the top
@@ -258,7 +263,7 @@ struct kbase_fault {
  *                        it is NULL
  */
 struct kbase_mmu_table {
-	u64 *mmu_teardown_pages;
+	u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL];
 	struct mutex mmu_lock;
 	phys_addr_t pgd;
 	u8 group_id;
@@ -350,8 +355,6 @@ struct kbase_clk_rate_listener {
  *                      enumerated GPU clock.
  * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
  *                      operations.
- * @gpu_clk_rate_trace_write: Pointer to the function that would emit the
- *                            tracepoint for the clock rate change.
  * @listeners:          List of listener attached.
  * @lock:               Lock to serialize the actions of GPU clock rate trace
  *                      manager.
@@ -366,13 +369,14 @@ struct kbase_clk_rate_trace_manager {
 
 /**
  * struct kbase_pm_device_data - Data stored per device for power management.
- * @lock: The lock protecting Power Management structures accessed outside of
- * IRQ.
- * This lock must also be held whenever the GPU is being powered on or
- * off.
- * @active_count: The reference count of active contexts on this device. Note
- * 	that some code paths keep shaders/the tiler powered whilst this is 0.
- * 	Use kbase_pm_is_active() instead to check for such cases.
+ * @lock: The lock protecting Power Management structures accessed
+ *        outside of IRQ.
+ *        This lock must also be held whenever the GPU is being
+ *        powered on or off.
+ * @active_count: The reference count of active contexts on this device.
+ *                Note that some code paths keep shaders/the tiler
+ *                powered whilst this is 0.
+ *                Use kbase_pm_is_active() instead to check for such cases.
  * @suspending: Flag indicating suspending/suspended
  * @runtime_active: Flag to track if the GPU is in runtime suspended or active
  *                  state. This ensures that runtime_put and runtime_get
@@ -381,24 +385,24 @@ struct kbase_clk_rate_trace_manager {
  *                  the call to it from runtime_gpu_active callback can be
  *                  skipped.
  * @gpu_lost: Flag indicating gpu lost
- * 	This structure contains data for the power management framework. There
- * 	is one instance of this structure per device in the system.
+ *            This structure contains data for the power management framework.
+ *            There is one instance of this structure per device in the system.
  * @zero_active_count_wait: Wait queue set when active_count == 0
  * @resume_wait: system resume of GPU device.
  * @debug_core_mask: Bit masks identifying the available shader cores that are
- * 	specified via sysfs. One mask per job slot.
+ *                   specified via sysfs. One mask per job slot.
  * @debug_core_mask_all: Bit masks identifying the available shader cores that
- * 	are specified via sysfs.
+ *                       are specified via sysfs.
  * @callback_power_runtime_init: Callback for initializing the runtime power
- * 	management. Return 0 on success, else error code
+ *                               management. Return 0 on success, else error code
  * @callback_power_runtime_term: Callback for terminating the runtime power
- * 	management.
+ *                               management.
  * @dvfs_period: Time in milliseconds between each dvfs sample
  * @backend: KBase PM backend data
  * @arb_vm_state: The state of the arbiter VM machine
  * @gpu_users_waiting: Used by virtualization to notify the arbiter that there
- * 	are users waiting for the GPU so that it can request and resume the
- * 	driver.
+ *                     are users waiting for the GPU so that it can request
+ *                     and resume the driver.
  * @clk_rtm: The state of the GPU clock rate trace manager
  */
 struct kbase_pm_device_data {
@@ -475,16 +479,16 @@ struct kbase_mem_pool {
 /**
  * struct kbase_mem_pool_group - a complete set of physical memory pools.
  *
+ * @small: Array of objects containing the state for pools of 4 KiB size
+ *         physical pages.
+ * @large: Array of objects containing the state for pools of 2 MiB size
+ *         physical pages.
+ *
  * Memory pools are used to allow efficient reallocation of previously-freed
  * physical pages. A pair of memory pools is initialized for each physical
  * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
  * should be indexed by physical memory group ID, the meaning of which is
  * defined by the systems integrator.
- *
- * @small: Array of objects containing the state for pools of 4 KiB size
- *         physical pages.
- * @large: Array of objects containing the state for pools of 2 MiB size
- *         physical pages.
  */
 struct kbase_mem_pool_group {
 	struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -505,11 +509,11 @@ struct kbase_mem_pool_config {
  * struct kbase_mem_pool_group_config - Initial configuration for a complete
  *                                      set of physical memory pools
  *
- * This array should be indexed by physical memory group ID, the meaning
- * of which is defined by the systems integrator.
- *
  * @small: Array of initial configuration for pools of 4 KiB pages.
  * @large: Array of initial configuration for pools of 2 MiB pages.
+ *
+ * This array should be indexed by physical memory group ID, the meaning
+ * of which is defined by the systems integrator.
  */
 struct kbase_mem_pool_group_config {
 	struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
@@ -742,8 +746,13 @@ struct kbase_process {
  * @hwcnt.addr:            HW counter address
  * @hwcnt.addr_bytes:      HW counter size in bytes
  * @hwcnt.backend:         Kbase instrumentation backend
- * @hwcnt_watchdog_timer:  Hardware counter watchdog interface.
+ * @hwcnt_gpu_jm_backend:  Job manager GPU backend interface, used as superclass reference
+ *                         pointer by hwcnt_gpu_iface, which wraps this implementation in
+ *                         order to extend it with periodic dumping functionality.
  * @hwcnt_gpu_iface:       Backend interface for GPU hardware counter access.
+ * @hwcnt_watchdog_timer:  Watchdog interface, used by the GPU backend hwcnt_gpu_iface to
+ *                         perform periodic dumps in order to prevent hardware counter value
+ *                         overflow or saturation.
  * @hwcnt_gpu_ctx:         Context for GPU hardware counter access.
  *                         @hwaccess_lock must be held when calling
  *                         kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
@@ -755,14 +764,6 @@ struct kbase_process {
  *                         therefore timeline is disabled.
  * @timeline:              Timeline context created per device.
  * @ktrace:                kbase device's ktrace
- * @trace_lock:            Lock to serialize the access to trace buffer.
- * @trace_first_out:       Index/offset in the trace buffer at which the first
- *                         unread message is present.
- * @trace_next_in:         Index/offset in the trace buffer at which the new
- *                         message will be written.
- * @trace_rbuf:            Pointer to the buffer storing debug messages/prints
- *                         tracing the various events in Driver.
- *                         The buffer is filled in circular fashion.
  * @reset_timeout_ms:      Number of milliseconds to wait for the soft stop to
  *                         complete for the GPU jobs before proceeding with the
  *                         GPU reset.
@@ -867,6 +868,13 @@ struct kbase_process {
  *                         backend specific data for HW access layer.
  * @faults_pending:        Count of page/bus faults waiting for bottom half processing
  *                         via workqueues.
+ * @mmu_hw_operation_in_progress: Set before sending the MMU command and is
+ *                         cleared after the command is complete. Whilst this
+ *                         flag is set, the write to L2_PWROFF register will be
+ *                         skipped which is needed to workaround the HW issue
+ *                         GPU2019-3878. PM state machine is invoked after
+ *                         clearing this flag and @hwaccess_lock is used to
+ *                         serialize the access.
  * @poweroff_pending:      Set when power off operation for GPU is started, reset when
  *                         power on for GPU is started.
  * @infinite_cache_active_default: Set to enable using infinite cache for all the
@@ -896,9 +904,6 @@ struct kbase_process {
  *                         enabled.
  * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
  *                         counters, used if atomic disable is not possible.
- * @buslogger:              Pointer to the structure required for interfacing
- *                          with the bus logger module to set the size of buffer
- *                          used by the module for capturing bus logs.
  * @irq_reset_flush:        Flag to indicate that GPU reset is in-flight and flush of
  *                          IRQ + bottom half is being done, to prevent the writes
  *                          to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
@@ -998,7 +1003,7 @@ struct kbase_device {
 	struct memory_group_manager_device *mgm_dev;
 
 	struct kbase_as as[BASE_MAX_NR_AS];
-	u16 as_free; /* Bitpattern of free Address Spaces */
+	u16 as_free;
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
 
 	spinlock_t mmu_mask_change;
@@ -1018,7 +1023,6 @@ struct kbase_device {
 
 #if MALI_USE_CSF
 	struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
-	struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
 #else
 	struct kbase_hwcnt {
 		spinlock_t lock;
@@ -1029,9 +1033,13 @@ struct kbase_device {
 
 		struct kbase_instr_backend backend;
 	} hwcnt;
+
+	struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend;
 #endif
 
 	struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
+	struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
+
 	struct kbase_hwcnt_context *hwcnt_gpu_ctx;
 	struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
 	struct kbase_vinstr_context *vinstr_ctx;
@@ -1128,6 +1136,9 @@ struct kbase_device {
 
 	atomic_t faults_pending;
 
+#if MALI_USE_CSF
+	bool mmu_hw_operation_in_progress;
+#endif
 	bool poweroff_pending;
 
 #if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE)
@@ -1479,8 +1490,8 @@ struct kbase_sub_alloc {
  * @mem_partials_lock:    Lock for protecting the operations done on the elements
  *                        added to @mem_partials list.
  * @mem_partials:         List head for the list of large pages, 2MB in size, which
- *                        which have been split into 4 KB pages and are used
- *                        partially for the allocations >= 2 MB in size.
+ *                        have been split into 4 KB pages and are used partially
+ *                        for the allocations >= 2 MB in size.
  * @reg_lock:             Lock used for GPU virtual address space management operations,
  *                        like adding/freeing a memory region in the address space.
  *                        Can be converted to a rwlock ?.
@@ -1492,6 +1503,17 @@ struct kbase_sub_alloc {
  * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
  *                        zone of the GPU virtual address space. Used for GPU-executable
  *                        allocations which don't need the SAME_VA property.
+ * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
+ *                         EXEC_FIXED_VA zone of the GPU virtual address space. Used for
+ *                        GPU-executable allocations with FIXED/FIXABLE GPU virtual
+ *                        addresses.
+ * @reg_rbtree_fixed:     RB tree of the memory regions allocated from the FIXED_VA zone
+ *                        of the GPU virtual address space. Used for allocations with
+ *                        FIXED/FIXABLE GPU virtual addresses.
+ * @num_fixable_allocs:   A count for the number of memory allocations with the
+ *                        BASE_MEM_FIXABLE property.
+ * @num_fixed_allocs:     A count for the number of memory allocations with the
+ *                        BASE_MEM_FIXED property.
  * @reg_zone:             Zone information for the reg_rbtree_<...> members.
  * @cookies:              Bitmask containing of BITS_PER_LONG bits, used mainly for
  *                        SAME_VA allocations to defer the reservation of memory region
@@ -1595,6 +1617,8 @@ struct kbase_sub_alloc {
  *                        dumping of its debug info is in progress.
  * @job_fault_resume_event_list: List containing atoms completed after the faulty
  *                        atom but before the debug data for faulty atom was dumped.
+ * @mem_view_column_width: Controls the number of bytes shown in every column of the
+ *                         output of "mem_view" debugfs file.
  * @jsctx_queue:          Per slot & priority arrays of object containing the root
  *                        of RB-tree holding currently runnable atoms on the job slot
  *                        and the head item of the linked list of atoms blocked on
@@ -1735,6 +1759,12 @@ struct kbase_context {
 	struct rb_root reg_rbtree_same;
 	struct rb_root reg_rbtree_custom;
 	struct rb_root reg_rbtree_exec;
+#if MALI_USE_CSF
+	struct rb_root reg_rbtree_exec_fixed;
+	struct rb_root reg_rbtree_fixed;
+	atomic64_t num_fixable_allocs;
+	atomic64_t num_fixed_allocs;
+#endif
 	struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
 
 #if MALI_USE_CSF
@@ -1804,6 +1834,7 @@ struct kbase_context {
 	unsigned int *reg_dump;
 	atomic_t job_fault_count;
 	struct list_head job_fault_resume_event_list;
+	unsigned int mem_view_column_width;
 
 #endif /* CONFIG_DEBUG_FS */
 	struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
@@ -1912,13 +1943,6 @@ enum kbase_share_attr_bits {
 };
 
 /**
- * enum kbase_timeout_selector - The choice of which timeout to get scaled
- *                               using current GPU frequency.
- * @CSF_FIRMWARE_TIMEOUT: Response timeout from CSF firmware.
- */
-enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT };
-
-/**
  * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
  * @kbdev: kbase device
  *
@@ -1933,6 +1957,24 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 	return false;
 }
 
+/**
+ * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock
+ * region, as a logarithm
+ *
+ * @gpu_props:   GPU properties
+ *
+ * Return: the minimum size of the MMU lock region as dictated by the corresponding
+ * arch spec.
+ */
+static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props)
+{
+	if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >=
+	    GPU_ID2_MODEL_MAKE(12, 0))
+		return 12; /* 4 kB */
+
+	return 15; /* 32 kB */
+}
+
 /* Conversion helpers for setting up high resolution timers */
 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
@@ -1942,4 +1984,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 
-#endif				/* _KBASE_DEFS_H_ */
+#endif /* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_dma_fence.c b/mali_kbase/mali_kbase_dma_fence.c
index bf2d9cc..c4129ff 100644
--- a/mali_kbase/mali_kbase_dma_fence.c
+++ b/mali_kbase/mali_kbase_dma_fence.c
@@ -161,7 +161,7 @@ kbase_dma_fence_cancel_atom(struct kbase_jd_atom *katom)
 	if (katom->status == KBASE_JD_ATOM_STATE_QUEUED) {
 		/* Wait was cancelled - zap the atom */
 		katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-		if (jd_done_nolock(katom, NULL))
+		if (jd_done_nolock(katom, true))
 			kbase_js_sched_all(katom->kctx->kbdev);
 	}
 }
@@ -196,7 +196,7 @@ kbase_dma_fence_work(struct work_struct *pwork)
 	 * dependency. Run jd_done_nolock() on the katom if it is completed.
 	 */
 	if (unlikely(katom->status == KBASE_JD_ATOM_STATE_COMPLETED))
-		jd_done_nolock(katom, NULL);
+		jd_done_nolock(katom, true);
 	else
 		kbase_jd_dep_clear_locked(katom);
 
diff --git a/mali_kbase/mali_kbase_dma_fence.h b/mali_kbase/mali_kbase_dma_fence.h
index 38d3581..be69118 100644
--- a/mali_kbase/mali_kbase_dma_fence.h
+++ b/mali_kbase/mali_kbase_dma_fence.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2016, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -132,6 +132,8 @@ void kbase_dma_fence_term(struct kbase_context *kctx);
 /**
  * kbase_dma_fence_init() - Initialize Mali dma-fence context
  * @kctx: kbase context to initialize
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbase_dma_fence_init(struct kbase_context *kctx);
 
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c
index bdc5d6d..0e0dab9 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.c
+++ b/mali_kbase/mali_kbase_dummy_job_wa.c
@@ -239,7 +239,7 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
 	return failed ? -EFAULT : 0;
 }
 
-static ssize_t show_dummy_job_wa_info(struct device * const dev,
+static ssize_t dummy_job_wa_info_show(struct device * const dev,
 		struct device_attribute * const attr, char * const buf)
 {
 	struct kbase_device *const kbdev = dev_get_drvdata(dev);
@@ -254,7 +254,7 @@ static ssize_t show_dummy_job_wa_info(struct device * const dev,
 	return err;
 }
 
-static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL);
+static DEVICE_ATTR_RO(dummy_job_wa_info);
 
 static bool wa_blob_load_needed(struct kbase_device *kbdev)
 {
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 37823d5..2842280 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,12 +41,12 @@ extern const struct dma_fence_ops kbase_fence_ops;
 #endif
 
 /**
-* struct kbase_fence_cb - Mali dma-fence callback data struct
-* @fence_cb: Callback function
-* @katom:    Pointer to katom that is waiting on this callback
-* @fence:    Pointer to the fence object on which this callback is waiting
-* @node:     List head for linking this callback to the katom
-*/
+ * struct kbase_fence_cb - Mali dma-fence callback data struct
+ * @fence_cb: Callback function
+ * @katom:    Pointer to katom that is waiting on this callback
+ * @fence:    Pointer to the fence object on which this callback is waiting
+ * @node:     List head for linking this callback to the katom
+ */
 struct kbase_fence_cb {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	struct fence_cb fence_cb;
@@ -63,7 +63,7 @@ struct kbase_fence_cb {
  * kbase_fence_out_new() - Creates a new output fence and puts it on the atom
  * @katom: Atom to create an output fence for
  *
- * return: A new fence object on success, NULL on failure.
+ * Return: A new fence object on success, NULL on failure.
  */
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 struct fence *kbase_fence_out_new(struct kbase_jd_atom *katom);
diff --git a/mali_kbase/mali_kbase_gator.h b/mali_kbase/mali_kbase_gator.h
index 88c96e0..2ed40fc 100644
--- a/mali_kbase/mali_kbase_gator.h
+++ b/mali_kbase/mali_kbase_gator.h
@@ -45,7 +45,7 @@ struct kbase_context;
 void kbase_trace_mali_job_slots_event(u32 dev_id, u32 event, const struct kbase_context *kctx, u8 atom_id);
 void kbase_trace_mali_pm_status(u32 dev_id, u32 event, u64 value);
 void kbase_trace_mali_page_fault_insert_pages(u32 dev_id, int event, u32 value);
-void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long int event);
+void kbase_trace_mali_total_alloc_pages_change(u32 dev_id, long long event);
 
 #endif /* CONFIG_MALI_GATOR_SUPPORT */
 
diff --git a/mali_kbase/mali_kbase_gpu_memory_debugfs.c b/mali_kbase/mali_kbase_gpu_memory_debugfs.c
index cb372ea..b02a32c 100644
--- a/mali_kbase/mali_kbase_gpu_memory_debugfs.c
+++ b/mali_kbase/mali_kbase_gpu_memory_debugfs.c
@@ -54,8 +54,8 @@ static int kbasep_gpu_memory_seq_show(struct seq_file *sfile, void *data)
 		mutex_lock(&kbdev->kctx_list_lock);
 		list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
 			/* output the memory usage and cap for each kctx
-			* opened on this device
-			*/
+			 * opened on this device
+			 */
 			seq_printf(sfile, "  %s-0x%pK %10u\n",
 				"kctx",
 				kctx,
@@ -88,18 +88,13 @@ static const struct file_operations kbasep_gpu_memory_debugfs_fops = {
  */
 void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_file("gpu_memory", S_IRUGO,
+	debugfs_create_file("gpu_memory", 0444,
 			kbdev->mali_debugfs_directory, NULL,
 			&kbasep_gpu_memory_debugfs_fops);
-	return;
 }
-
 #else
 /*
  * Stub functions for when debugfs is disabled
  */
-void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev)
-{
-	return;
-}
+void kbasep_gpu_memory_debugfs_init(struct kbase_device *kbdev) {}
 #endif
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index b5ba642..f2844f4 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -760,8 +760,8 @@ static struct {
 			raw_props.thread_max_workgroup_size),
 	PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
 	PROP(RAW_THREAD_FEATURES,         raw_props.thread_features),
-	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
 	PROP(RAW_COHERENCY_MODE,          raw_props.coherency_mode),
+	PROP(RAW_THREAD_TLS_ALLOC,        raw_props.thread_tls_alloc),
 	PROP(RAW_GPU_FEATURES,            raw_props.gpu_features),
 	PROP(COHERENCY_NUM_GROUPS,        coherency_info.num_groups),
 	PROP(COHERENCY_NUM_CORE_GROUPS,   coherency_info.num_core_groups),
diff --git a/mali_kbase/mali_kbase_gpuprops.h b/mali_kbase/mali_kbase_gpuprops.h
index b20b99b..f0a9731 100644
--- a/mali_kbase/mali_kbase_gpuprops.h
+++ b/mali_kbase/mali_kbase_gpuprops.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2015, 2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2015, 2017, 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -82,6 +82,8 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev);
  * @kbdev: The kbase device
  *
  * Fills prop_buffer with the GPU properties for user space to read.
+ *
+ * Return: MALI_ERROR_NONE on success. Any other value indicates failure.
  */
 int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev);
 
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 2a20a3d..16cccee 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -82,9 +82,9 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx)
 
 #endif
 	/* Mark gwt enabled before making pages read only in case a
-	   write page fault is triggered while we're still in this loop.
-	   (kbase_gpu_vm_lock() doesn't prevent this!)
-	*/
+	 * write page fault is triggered while we're still in this loop.
+	 * (kbase_gpu_vm_lock() doesn't prevent this!)
+	 */
 	kctx->gwt_enabled = true;
 	kctx->gwt_was_enabled = true;
 
diff --git a/mali_kbase/mali_kbase_gwt.h b/mali_kbase/mali_kbase_gwt.h
index 30de43d..da0d43e 100644
--- a/mali_kbase/mali_kbase_gwt.h
+++ b/mali_kbase/mali_kbase_gwt.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
  * kbase_gpu_gwt_start - Start the GPU write tracking
  * @kctx: Pointer to kernel context
  *
- * @return 0 on success, error on failure.
+ * Return: 0 on success, error on failure.
  */
 int kbase_gpu_gwt_start(struct kbase_context *kctx);
 
@@ -37,7 +37,7 @@ int kbase_gpu_gwt_start(struct kbase_context *kctx);
  * kbase_gpu_gwt_stop - Stop the GPU write tracking
  * @kctx: Pointer to kernel context
  *
- * @return 0 on success, error on failure.
+ * Return: 0 on success, error on failure.
  */
 int kbase_gpu_gwt_stop(struct kbase_context *kctx);
 
@@ -46,7 +46,7 @@ int kbase_gpu_gwt_stop(struct kbase_context *kctx);
  * @kctx:	Pointer to kernel context
  * @gwt_dump:	User space data to be passed.
  *
- * @return 0 on success, error on failure.
+ * Return: 0 on success, error on failure.
  */
 int kbase_gpu_gwt_dump(struct kbase_context *kctx,
 			union kbase_ioctl_cinstr_gwt_dump *gwt_dump);
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index 183fd18..75e4aaf 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -232,10 +232,12 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 
 		{ GPU_ID2_PRODUCT_TTUX,
 		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { U32_MAX, NULL } } },
 
 		{ GPU_ID2_PRODUCT_LTUX,
 		  { { GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tTUx_r0p0 },
+		    { GPU_ID2_VERSION_MAKE(1, 0, 0), base_hw_issues_tTUx_r1p0 },
 		    { U32_MAX, NULL } } },
 
 	};
diff --git a/mali_kbase/mali_kbase_hw.h b/mali_kbase/mali_kbase_hw.h
index 6c04a23..ddcddaa 100644
--- a/mali_kbase/mali_kbase_hw.h
+++ b/mali_kbase/mali_kbase_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,7 +29,7 @@
 #include "mali_kbase_defs.h"
 
 /**
- * Tell whether a work-around should be enabled
+ * kbase_hw_has_issue - Tell whether a work-around should be enabled
  * @kbdev: Device pointer
  * @issue: issue to be checked
  */
@@ -37,7 +37,7 @@
 	test_bit(issue, &(kbdev)->hw_issues_mask[0])
 
 /**
- * Tell whether a feature is supported
+ * kbase_hw_has_feature - Tell whether a feature is supported
  * @kbdev: Device pointer
  * @feature: feature to be checked
  */
@@ -63,7 +63,7 @@
 int kbase_hw_set_issues_mask(struct kbase_device *kbdev);
 
 /**
- * Set the features mask depending on the GPU ID
+ * kbase_hw_set_features_mask - Set the features mask depending on the GPU ID
  * @kbdev: Device pointer
  */
 void kbase_hw_set_features_mask(struct kbase_device *kbdev);
diff --git a/mali_kbase/mali_kbase_hwaccess_gpuprops.h b/mali_kbase/mali_kbase_hwaccess_gpuprops.h
index 71ccc91..f537b7f 100644
--- a/mali_kbase/mali_kbase_hwaccess_gpuprops.h
+++ b/mali_kbase/mali_kbase_hwaccess_gpuprops.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2017-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Base kernel property query backend APIs
+ * DOC: Base kernel property query backend APIs
  */
 
 #ifndef _KBASE_HWACCESS_GPUPROPS_H_
diff --git a/mali_kbase/mali_kbase_hwaccess_jm.h b/mali_kbase/mali_kbase_hwaccess_jm.h
index d0207f7..95d7624 100644
--- a/mali_kbase/mali_kbase_hwaccess_jm.h
+++ b/mali_kbase/mali_kbase_hwaccess_jm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -181,7 +181,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp);
  * @kbdev: Device pointer
  * @js:    Job slot to inspect
  *
- * Return : Atom currently at the head of slot @js, or NULL
+ * Return: Atom currently at the head of slot @js, or NULL
  */
 struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
 					int js);
@@ -192,7 +192,7 @@ struct kbase_jd_atom *kbase_backend_inspect_tail(struct kbase_device *kbdev,
  * @kbdev:	Device pointer
  * @js:		Job slot to inspect
  *
- * Return : Number of atoms currently on slot
+ * Return: Number of atoms currently on slot
  */
 int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
 
@@ -202,7 +202,7 @@ int kbase_backend_nr_atoms_on_slot(struct kbase_device *kbdev, int js);
  * @kbdev:	Device pointer
  * @js:		Job slot to inspect
  *
- * Return : Number of atoms currently on slot @js that are currently on the GPU.
+ * Return: Number of atoms currently on slot @js that are currently on the GPU.
  */
 int kbase_backend_nr_atoms_submitted(struct kbase_device *kbdev, int js);
 
@@ -231,7 +231,7 @@ void kbase_backend_timeouts_changed(struct kbase_device *kbdev);
  * @kbdev:	Device pointer
  * @js:		Job slot to inspect
  *
- * Return : Number of jobs that can be submitted.
+ * Return: Number of jobs that can be submitted.
  */
 int kbase_backend_slot_free(struct kbase_device *kbdev, int js);
 
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index a8e4b95..1c153c4 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -37,22 +37,22 @@ struct kbase_device;
 /* Functions common to all HW access backends */
 
 /**
- * Initialize the power management framework.
- *
- * Must be called before any other power management function
+ * kbase_hwaccess_pm_init - Initialize the power management framework.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
+ * Must be called before any other power management function
+ *
  * Return: 0 if the power management framework was successfully initialized.
  */
 int kbase_hwaccess_pm_init(struct kbase_device *kbdev);
 
 /**
- * Terminate the power management framework.
- *
- * No power management functions may be called after this
+ * kbase_hwaccess_pm_term - Terminate the power management framework.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * No power management functions may be called after this
  */
 void kbase_hwaccess_pm_term(struct kbase_device *kbdev);
 
@@ -70,19 +70,19 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 		unsigned int flags);
 
 /**
- * Halt the power management framework.
+ * kbase_hwaccess_pm_halt - Halt the power management framework.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Should ensure that no new interrupts are generated, but allow any currently
  * running interrupt handlers to complete successfully. The GPU is forced off by
  * the time this function returns, regardless of whether or not the active power
  * policy asks for the GPU to be powered off.
- *
- * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
 
 /**
- * Perform any backend-specific actions to suspend the GPU
+ * kbase_hwaccess_pm_suspend - Perform any backend-specific actions to suspend the GPU
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
@@ -91,23 +91,24 @@ void kbase_hwaccess_pm_halt(struct kbase_device *kbdev);
 int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev);
 
 /**
- * Perform any backend-specific actions to resume the GPU from a suspend
+ * kbase_hwaccess_pm_resume - Perform any backend-specific actions to resume the GPU
+ *                            from a suspend
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_resume(struct kbase_device *kbdev);
 
 /**
- * Perform any required actions for activating the GPU. Called when the first
- * context goes active.
+ * kbase_hwaccess_pm_gpu_active - Perform any required actions for activating the GPU.
+ *                                Called when the first context goes active.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
 
 /**
- * Perform any required actions for idling the GPU. Called when the last
- * context goes idle.
+ * kbase_hwaccess_pm_gpu_idle - Perform any required actions for idling the GPU.
+ *                              Called when the last context goes idle.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  */
@@ -115,25 +116,25 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
 
 #if MALI_USE_CSF
 /**
- * Set the debug core mask.
- *
- * This determines which cores the power manager is allowed to use.
+ * kbase_pm_set_debug_core_mask - Set the debug core mask.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  * @new_core_mask: The core mask to use
+ *
+ * This determines which cores the power manager is allowed to use.
  */
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 				  u64 new_core_mask);
 #else
 /**
- * Set the debug core mask.
- *
- * This determines which cores the power manager is allowed to use.
+ * kbase_pm_set_debug_core_mask - Set the debug core mask.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  * @new_core_mask_js0: The core mask to use for job slot 0
  * @new_core_mask_js1: The core mask to use for job slot 1
  * @new_core_mask_js2: The core mask to use for job slot 2
+ *
+ * This determines which cores the power manager is allowed to use.
  */
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 		u64 new_core_mask_js0, u64 new_core_mask_js1,
@@ -141,19 +142,19 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 #endif /* MALI_USE_CSF */
 
 /**
- * Get the current policy.
- *
- * Returns the policy that is currently active.
+ * kbase_pm_ca_get_policy - Get the current policy.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * @return The current policy
+ * Returns the policy that is currently active.
+ *
+ * Return: The current policy
  */
 const struct kbase_pm_ca_policy
 *kbase_pm_ca_get_policy(struct kbase_device *kbdev);
 
 /**
- * Change the policy to the one specified.
+ * kbase_pm_ca_set_policy - Change the policy to the one specified.
  *
  * @kbdev:  The kbase device structure for the device (must be a valid pointer)
  * @policy: The policy to change to (valid pointer returned from
@@ -163,29 +164,29 @@ void kbase_pm_ca_set_policy(struct kbase_device *kbdev,
 				const struct kbase_pm_ca_policy *policy);
 
 /**
- * Retrieve a static list of the available policies.
+ * kbase_pm_ca_list_policies - Retrieve a static list of the available policies.
  *
  * @policies: An array pointer to take the list of policies. This may be NULL.
  *            The contents of this array must not be modified.
  *
- * @return The number of policies
+ * Return: The number of policies
  */
 int
 kbase_pm_ca_list_policies(const struct kbase_pm_ca_policy * const **policies);
 
 /**
- * Get the current policy.
- *
- * Returns the policy that is currently active.
+ * kbase_pm_get_policy - Get the current policy.
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
- * @return The current policy
+ * Returns the policy that is currently active.
+ *
+ * Return: The current policy
  */
 const struct kbase_pm_policy *kbase_pm_get_policy(struct kbase_device *kbdev);
 
 /**
- * Change the policy to the one specified.
+ * kbase_pm_set_policy - Change the policy to the one specified.
  *
  * @kbdev:  The kbase device structure for the device (must be a valid
  *               pointer)
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
index 1fa6640..a54f005 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -350,7 +350,7 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
  *                                   values of enabled counters possible, and
  *                                   optionally update the set of enabled
  *                                   counters.
- * @hctx :       Non-NULL pointer to the hardware counter context
+ * @hctx:        Non-NULL pointer to the hardware counter context
  * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
  *               be written out to on success
  * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
@@ -361,6 +361,8 @@ static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
  * @new_map:     Pointer to the new counter enable map. If non-NULL, must have
  *               the same metadata as the accumulator. If NULL, the set of
  *               enabled counters will be unchanged.
+ *
+ * Return:       0 on success, else error code.
  */
 static int kbasep_hwcnt_accumulator_dump(
 	struct kbase_hwcnt_context *hctx,
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
index 4602138..c42f2a0 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1357,6 +1357,7 @@ kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
  *
  * @csf_info:    Non-NULL pointer to backend info.
  * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
  * Return: 0 on success, else error code.
  */
 static int
@@ -1554,7 +1555,8 @@ static void kbasep_hwcnt_backend_csf_info_destroy(
  * @watchdog_if:  Non-NULL pointer to a hwcnt watchdog interface structure used to create
  *                backend interface.
  * @out_info:     Non-NULL pointer to where info is stored on success.
- * @return 0 on success, else error code.
+ *
+ * Return: 0 on success, else error code.
  */
 static int kbasep_hwcnt_backend_csf_info_create(
 	struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
index 40cf6bb..15ffbfa 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -28,7 +28,7 @@
 #include <device/mali_kbase_device.h>
 #include "mali_kbase_hwcnt_gpu.h"
 #include "mali_kbase_hwcnt_types.h"
-#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include <csf/mali_kbase_csf_registers.h>
 
 #include "csf/mali_kbase_csf_firmware.h"
 #include "mali_kbase_hwcnt_backend_csf_if_fw.h"
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index d041391..e418212 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -133,6 +133,8 @@ struct kbase_hwcnt_backend_jm {
  * @info:  Non-NULL pointer to data structure to be filled in.
  *
  * The initialised info struct will only be valid for use while kbdev is valid.
+ *
+ * Return: 0 on success, else error code.
  */
 static int
 kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
@@ -854,7 +856,7 @@ static void kbasep_hwcnt_backend_jm_info_destroy(
  * @kbdev: Non_NULL pointer to kbase device.
  * @out_info: Non-NULL pointer to where info is stored on success.
  *
- * Return 0 on success, else error code.
+ * Return: 0 on success, else error code.
  */
 static int kbasep_hwcnt_backend_jm_info_create(
 	struct kbase_device *kbdev,
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
new file mode 100644
index 0000000..cdf3cd9
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -0,0 +1,821 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+
+#include <mali_kbase_hwcnt_gpu.h>
+#include <mali_kbase_hwcnt_types.h>
+
+#include <mali_kbase_hwcnt_backend.h>
+#include <mali_kbase_hwcnt_watchdog_if.h>
+
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000;
+
+/*
+ * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY     on dump_request.
+ * IDLE_BUFFER_EMPTY -> TIMER_DUMPING                 after
+ *                                                    hwcnt_backend_watchdog_timer_interval_ms
+ *                                                    milliseconds, if no dump_request has been
+ *                                                    called in the meantime.
+ * IDLE_BUFFER_FULL  -> USER_DUMPING_BUFFER_FULL      on dump_request.
+ * IDLE_BUFFER_FULL  -> TIMER_DUMPING                 after
+ *                                                    hwcnt_backend_watchdog_timer_interval_ms
+ *                                                    milliseconds, if no dump_request has been
+ *                                                    called in the meantime.
+ * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY              on dump_disable, upon discarding undumped
+ *                                                    counter values since the last dump_get.
+ * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING               on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * IDLE_BUFFER_FULL  -> BUFFER_CLEARING               on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING       on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * USER_DUMPING_BUFFER_FULL  -> BUFFER_CLEARING       on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY               on dump_clear, upon job manager backend
+ *                                                    dump_clear completion.
+ * TIMER_DUMPING -> IDLE_BUFFER_FULL                  on timer's callback completion.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR          on dump_clear, notifies the callback thread
+ *                                                    that there is no need for dumping the buffer
+ *                                                    anymore, and that the client will proceed
+ *                                                    clearing the buffer.
+ * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY      on timer's callback completion, when a user
+ *                                                    requested a dump_clear.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED      on dump_request, when a client performs a
+ *                                                    dump request while the timer is dumping (the
+ *                                                    timer will perform the dump and (once
+ *                                                    completed) the client will retrieve the value
+ *                                                    from the buffer).
+ * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY  on dump_get, when a timer completed and the
+ *                                                    user reads the periodic dump buffer.
+ * Any -> ERROR                                       if the job manager backend returns an error
+ *                                                    (of any kind).
+ * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY     on dump_get (performs get, ignores the
+ *                                                    periodic dump buffer and returns).
+ * USER_DUMPING_BUFFER_FULL  -> IDLE_BUFFER_EMPTY     on dump_get (performs get, accumulates with
+ *                                                    periodic dump buffer and returns).
+ */
+
+/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread.
+ * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls.
+ * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is
+ *                                 empty.
+ * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full.
+ * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback
+ *                               thread should just ignore and reschedule another callback in
+ *                               hwcnt_backend_watchdog_timer_interval_ms milliseconds.
+ * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user
+ *                                            requested a dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a
+ *                                        dump_clear. The timer has to complete the periodic dump
+ *                                        and clear buffer (internal and job manager backend).
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic
+ *                                         dump buffer is empty.
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump
+ *                                        buffer is full.
+ *
+ * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is
+ * allowed to call the job manager backend layer.
+ */
+enum backend_watchdog_state {
+	HWCNT_JM_WD_ERROR,
+	HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
+	HWCNT_JM_WD_IDLE_BUFFER_FULL,
+	HWCNT_JM_WD_BUFFER_CLEARING,
+	HWCNT_JM_WD_TIMER_DUMPING,
+	HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+	HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR,
+	HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY,
+	HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL,
+};
+
+/** enum wd_init_state - State machine for initialization / termination of the backend resources
+ */
+enum wd_init_state {
+	HWCNT_JM_WD_INIT_START,
+	HWCNT_JM_WD_INIT_ALLOC = HWCNT_JM_WD_INIT_START,
+	HWCNT_JM_WD_INIT_BACKEND,
+	HWCNT_JM_WD_INIT_ENABLE_MAP,
+	HWCNT_JM_WD_INIT_DUMP_BUFFER,
+	HWCNT_JM_WD_INIT_END
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an
+ *                                               instance of the job manager watchdog backend.
+ * @jm_backend_iface: Hardware counter backend interface. This module extends
+ *                    this interface with a watchdog that performs regular
+ *                    dumps. The new interface this module provides complies
+ *                    with the old backend interface.
+ * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the
+ *                       hardware counter in case no reads are requested within
+ *                       a certain time, used to avoid hardware counter's buffer
+ *                       saturation.
+ */
+struct kbase_hwcnt_backend_jm_watchdog_info {
+	struct kbase_hwcnt_backend_interface *jm_backend_iface;
+	struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend.
+ * @info: Immutable information used to create the job manager watchdog backend.
+ * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls.
+ * @timeout_ms: Time period in milliseconds for hardware counters dumping.
+ * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are
+ *                  valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+ *                  %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL.
+ * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying
+ *                 the enable_map passed as argument.
+ * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled
+ *                     during watchdog timer dumps.
+ * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls.
+ * @locked: Members protected from concurrent access by different threads.
+ * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual
+ *                        exclusion).
+ * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the
+ *                     watchdog timer are both enabled. If false then both are disabled (or soon
+ *                     will be). Races between enable and disable have undefined behavior.
+ * @locked.state: State used to synchronize timer callbacks with the main thread.
+ */
+struct kbase_hwcnt_backend_jm_watchdog {
+	const struct kbase_hwcnt_backend_jm_watchdog_info *info;
+	struct kbase_hwcnt_backend *jm_backend;
+	u32 timeout_ms;
+	struct kbase_hwcnt_dump_buffer wd_dump_buffer;
+	struct kbase_hwcnt_enable_map wd_enable_map;
+	u64 wd_dump_timestamp;
+	struct completion watchdog_complete;
+	struct {
+		spinlock_t watchdog_lock;
+		bool is_enabled;
+		enum backend_watchdog_state state;
+	} locked;
+};
+
+/* timer's callback function */
+static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend;
+	unsigned long flags;
+	bool wd_accumulate;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+
+	if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY ||
+	      wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) {
+		/*resetting the timer. Calling modify on a disabled timer enables it.*/
+		wd_backend->info->dump_watchdog_iface->modify(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+	/*start performing the dump*/
+
+	/* if there has been a previous timeout use accumulating dump_get()
+	 * otherwise use non-accumulating to overwrite buffer
+	 */
+	wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL);
+
+	wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING;
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+							     &wd_backend->wd_dump_timestamp) ||
+	    wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) ||
+	    wd_backend->info->jm_backend_iface->dump_get(
+		    wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map,
+		    wd_accumulate)) {
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+		wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		/* Unblock user if it's waiting. */
+		complete_all(&wd_backend->watchdog_complete);
+		return;
+	}
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+		wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+		wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+	if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) {
+		/* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL
+		 * to indicate timer dump is done and the buffer is full. If state changed to
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state
+		 * machine to next state.
+		 */
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL;
+	}
+	if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) {
+		/* reset the timer to schedule another callback. Calling modify on a
+		 * disabled timer enables it.
+		 */
+		/*The spin lock needs to be held in case the client calls dump_enable*/
+		wd_backend->info->dump_watchdog_iface->modify(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	/* Unblock user if it's waiting. */
+	complete_all(&wd_backend->watchdog_complete);
+}
+
+/* helper methods, info structure creation and destruction*/
+
+static struct kbase_hwcnt_backend_jm_watchdog_info *
+kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					     struct kbase_hwcnt_watchdog_interface *watchdog_iface)
+{
+	struct kbase_hwcnt_backend_jm_watchdog_info *const info =
+		kmalloc(sizeof(*info), GFP_KERNEL);
+
+	if (!info)
+		return NULL;
+
+	*info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface,
+							       .dump_watchdog_iface =
+								       watchdog_iface };
+
+	return info;
+}
+
+/****** kbase_hwcnt_backend_interface implementation *******/
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info;
+
+	if (WARN_ON(!info))
+		return NULL;
+
+	return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+}
+
+static void
+kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+					      enum wd_init_state state)
+{
+	if (!wd_backend)
+		return;
+
+	/* disable timer thread to avoid concurrent access to shared resources */
+	wd_backend->info->dump_watchdog_iface->disable(
+		wd_backend->info->dump_watchdog_iface->timer);
+
+	/*will exit the loop when state reaches HWCNT_JM_WD_INIT_START*/
+	while (state-- > HWCNT_JM_WD_INIT_START) {
+		switch (state) {
+		case HWCNT_JM_WD_INIT_ALLOC:
+			kfree(wd_backend);
+			break;
+		case HWCNT_JM_WD_INIT_BACKEND:
+			wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
+			break;
+		case HWCNT_JM_WD_INIT_ENABLE_MAP:
+			kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map);
+			break;
+		case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+			kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer);
+			break;
+		case HWCNT_JM_WD_INIT_END:
+			break;
+		}
+	}
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
+ * Calling term does *not* destroy the interface
+ */
+static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
+{
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_jm_watchdog_term_partial(
+		(struct kbase_hwcnt_backend_jm_watchdog *)backend, HWCNT_JM_WD_INIT_END);
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info,
+						 struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode = 0;
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL;
+	struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info;
+	const struct kbase_hwcnt_backend_info *jm_info;
+	const struct kbase_hwcnt_metadata *metadata;
+	enum wd_init_state state = HWCNT_JM_WD_INIT_START;
+
+	if (WARN_ON(!info) || WARN_ON(!out_backend))
+		return -EINVAL;
+
+	jm_info = wd_info->jm_backend_iface->info;
+	metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+
+	while (state < HWCNT_JM_WD_INIT_END && !errcode) {
+		switch (state) {
+		case HWCNT_JM_WD_INIT_ALLOC:
+			wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
+			if (wd_backend) {
+				*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
+					.info = wd_info,
+					.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
+					.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
+						    .is_enabled = false }
+				};
+			} else
+				errcode = -ENOMEM;
+			break;
+		case HWCNT_JM_WD_INIT_BACKEND:
+			errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
+			break;
+		case HWCNT_JM_WD_INIT_ENABLE_MAP:
+			errcode =
+				kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map);
+			break;
+		case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+			errcode = kbase_hwcnt_dump_buffer_alloc(metadata,
+								&wd_backend->wd_dump_buffer);
+			break;
+		case HWCNT_JM_WD_INIT_END:
+			break;
+		}
+		if (!errcode)
+			state++;
+	}
+
+	if (errcode) {
+		kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state);
+		*out_backend = NULL;
+		return errcode;
+	}
+
+	WARN_ON(state != HWCNT_JM_WD_INIT_END);
+
+	spin_lock_init(&wd_backend->locked.watchdog_lock);
+	init_completion(&wd_backend->watchdog_complete);
+
+	*out_backend = (struct kbase_hwcnt_backend *)wd_backend;
+	return 0;
+}
+
+/* Job manager watchdog backend, implementation of timestamp_ns */
+static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend);
+}
+
+static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+	const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler)
+{
+	int errcode = -EPERM;
+	unsigned long flags;
+
+	if (WARN_ON(!wd_backend) || WARN_ON(!enable_map))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	/* If the backend is already enabled return an error */
+	if (wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	/*We copy the enable map into our watchdog backend copy, for future usage*/
+	kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map);
+
+	errcode = enabler(wd_backend->jm_backend, enable_map);
+	if (!errcode) {
+		/*Enable dump watchdog*/
+		errcode = wd_backend->info->dump_watchdog_iface->enable(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms,
+			kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend);
+		if (!errcode) {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(wd_backend->locked.is_enabled);
+			wd_backend->locked.is_enabled = true;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		} else
+			/*Reverting the job manager backend back to disabled*/
+			wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_enable */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend,
+					     const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+		wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable);
+}
+
+/* Job manager watchdog backend, implementation of dump_enable_nolock */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+						    const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+		wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock);
+}
+
+/* Job manager watchdog backend, implementation of dump_disable */
+static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+
+	if (WARN_ON(!backend))
+		return;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+
+	wd_backend->locked.is_enabled = false;
+
+	/* Discard undumped counter values since the last dump_get. */
+	if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	wd_backend->info->dump_watchdog_iface->disable(
+		wd_backend->info->dump_watchdog_iface->timer);
+
+	wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+}
+
+/* Job manager watchdog backend, implementation of dump_clear */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+	int errcode = -EPERM;
+	bool clear_wd_wait_completion = false;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	if (WARN_ON(!backend))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+	case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+		wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING;
+		errcode = 0;
+		break;
+	case HWCNT_JM_WD_TIMER_DUMPING:
+		/* The timer asked for a dump request, when complete, the job manager backend
+		 * buffer will be zero
+		 */
+		clear_wd_wait_completion = true;
+		/* This thread will have to wait for the callback to terminate and then call a
+		 * dump_clear on the job manager backend. We change the state to
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is
+		 * no more need to dump the buffer (since we will clear it right after anyway).
+		 * We set up a wait queue to synchronize with the callback.
+		 */
+		reinit_completion(&wd_backend->watchdog_complete);
+		wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR;
+		errcode = 0;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!errcode) {
+		if (clear_wd_wait_completion) {
+			/* Waiting for the callback to finish */
+			wait_for_completion(&wd_backend->watchdog_complete);
+		}
+
+		/* Clearing job manager backend buffer */
+		errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend);
+
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+			wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING &&
+			wd_backend->locked.state != HWCNT_JM_WD_ERROR);
+
+		WARN_ON(!wd_backend->locked.is_enabled);
+
+		if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) {
+			/* Setting the internal buffer state to EMPTY */
+			wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+			/* Resetting the timer. Calling modify on a disabled timer
+			 * enables it.
+			 */
+			wd_backend->info->dump_watchdog_iface->modify(
+				wd_backend->info->dump_watchdog_iface->timer,
+				wd_backend->timeout_ms);
+		} else {
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			errcode = -EPERM;
+		}
+
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_request */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend,
+							 u64 *dump_time_ns)
+{
+	bool call_dump_request = false;
+	int errcode = 0;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	if (WARN_ON(!backend) || WARN_ON(!dump_time_ns))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+		/* progressing the state to avoid callbacks running while calling the job manager
+		 * backend
+		 */
+		wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY;
+		call_dump_request = true;
+		break;
+	case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+		wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL;
+		call_dump_request = true;
+		break;
+	case HWCNT_JM_WD_TIMER_DUMPING:
+		/* Retrieve timing information from previous dump_request */
+		*dump_time_ns = wd_backend->wd_dump_timestamp;
+		/* On the next client call (dump_wait) the thread will have to wait for the
+		 * callback to finish the dumping.
+		 * We set up a wait queue to synchronize with the callback.
+		 */
+		reinit_completion(&wd_backend->watchdog_complete);
+		wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (call_dump_request) {
+		errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+									   dump_time_ns);
+		if (!errcode) {
+			/*resetting the timer. Calling modify on a disabled timer enables it*/
+			wd_backend->info->dump_watchdog_iface->modify(
+				wd_backend->info->dump_watchdog_iface->timer,
+				wd_backend->timeout_ms);
+		} else {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(!wd_backend->locked.is_enabled);
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		}
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_wait */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+	int errcode = -EPERM;
+	bool wait_for_auto_dump = false, wait_for_user_dump = false;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+
+	if (WARN_ON(!backend))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+		wait_for_auto_dump = true;
+		errcode = 0;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+		wait_for_user_dump = true;
+		errcode = 0;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (wait_for_auto_dump)
+		wait_for_completion(&wd_backend->watchdog_complete);
+	else if (wait_for_user_dump) {
+		errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend);
+		if (errcode) {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(!wd_backend->locked.is_enabled);
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		}
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_get */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_get(
+	struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer,
+	const struct kbase_hwcnt_enable_map *enable_map, bool accumulate)
+{
+	bool call_dump_get = false;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+	int errcode = 0;
+
+	if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map))
+		return -EINVAL;
+
+	/* The resultant contents of the dump buffer are only well defined if a prior
+	 * call to dump_wait returned successfully, and a new dump has not yet been
+	 * requested by a call to dump_request.
+	 */
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+		/*we assume dump_wait has been called and completed successfully*/
+		if (accumulate)
+			kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+							   enable_map);
+		else
+			kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+						     enable_map);
+
+		/*use state to indicate the the buffer is now empty*/
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+		/*accumulate or copy watchdog data to user buffer first so that dump_get can set
+		 * the header correctly
+		 */
+		if (accumulate)
+			kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+							   enable_map);
+		else
+			kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+						     enable_map);
+
+		/*accumulate backend data into user buffer on top of watchdog data*/
+		accumulate = true;
+		call_dump_get = true;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+		call_dump_get = true;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (call_dump_get && !errcode) {
+		/*we just dump the job manager backend into the user buffer, following
+		 *accumulate flag
+		 */
+		errcode = wd_backend->info->jm_backend_iface->dump_get(
+			wd_backend->jm_backend, dump_buffer, enable_map, accumulate);
+
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY &&
+			wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+		if (!errcode)
+			wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+		else
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+	}
+
+	return errcode;
+}
+
+/* exposed methods */
+
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					   struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+					   struct kbase_hwcnt_backend_interface *out_iface)
+{
+	struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL;
+
+	if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface))
+		return -EINVAL;
+
+	info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface);
+	if (!info)
+		return -ENOMEM;
+
+	/*linking the info table with the output iface, to allow the callbacks below to access the
+	 *info object later on
+	 */
+	*out_iface = (struct kbase_hwcnt_backend_interface){
+		.info = (void *)info,
+		.metadata = kbasep_hwcnt_backend_jm_watchdog_metadata,
+		.init = kbasep_hwcnt_backend_jm_watchdog_init,
+		.term = kbasep_hwcnt_backend_jm_watchdog_term,
+		.timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns,
+		.dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable,
+		.dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock,
+		.dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable,
+		.dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear,
+		.dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request,
+		.dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait,
+		.dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get
+	};
+
+	/*registering watchdog backend module methods on the output interface*/
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface || !iface->info)
+		return;
+
+	kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
+
+	/*blanking the watchdog backend interface*/
+	*iface = (struct kbase_hwcnt_backend_interface){ NULL };
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h
new file mode 100644
index 0000000..5021b4f
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm_watchdog.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager
+ * backend. This module functionally interleaves between the hardware counter
+ * (hwcnt_accumulator) module (the interface consumer) and the job manager
+ * backend module (hwcnt_backend_jm). This module provides buffering
+ * functionality for the dumping requests requested by the hwcnt_accumulator
+ * consumer. This module is NOT multi-thread safe. The programmer must
+ * ensure the exposed methods are called by at most one thread at any time.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+
+#include <mali_kbase_hwcnt_backend.h>
+#include <mali_kbase_hwcnt_watchdog_if.h>
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog
+ *                                            backend interface.
+ * @backend_iface:  Non-NULL pointer to the backend interface structure that this module will
+ *                  extend.
+ * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface.
+ * @out_iface:      Non-NULL pointer to backend interface structure that is filled in
+ *                  on creation success.
+ *
+ * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					   struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+					   struct kbase_hwcnt_backend_interface *out_iface);
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog
+ *                                             backend interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 97a7511..752d096 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -25,6 +25,13 @@
 #include <linux/bug.h>
 #include <linux/err.h>
 
+/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
+ */
+enum enable_map_idx {
+	EM_LO,
+	EM_HI,
+	EM_COUNT,
+};
 
 static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
 				     bool is_csf)
@@ -34,18 +41,16 @@ static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
 		break;
 	case KBASE_HWCNT_SET_SECONDARY:
-		if (is_csf) {
+		if (is_csf)
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
-		} else {
+		else
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
-		}
 		break;
 	case KBASE_HWCNT_SET_TERTIARY:
-		if (is_csf) {
+		if (is_csf)
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
-		} else {
+		else
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
-		}
 		break;
 	default:
 		WARN_ON(true);
@@ -79,11 +84,10 @@ static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
 		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
 		break;
 	case KBASE_HWCNT_SET_TERTIARY:
-		if (is_csf) {
+		if (is_csf)
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
-		} else {
+		else
 			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
-		}
 		break;
 	default:
 		WARN_ON(true);
@@ -399,9 +403,8 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 		 * will always have a matching set of blk instances available to
 		 * accumulate them.
 		 */
-		else {
+		else
 			hw_res_available = true;
-		}
 
 		/*
 		 * Skip block if no values in the destination block are enabled.
@@ -530,12 +533,10 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 	const struct kbase_hwcnt_enable_map *src)
 {
 	const struct kbase_hwcnt_metadata *metadata;
-
-	u64 fe_bm = 0;
-	u64 shader_bm = 0;
-	u64 tiler_bm = 0;
-	u64 mmu_l2_bm = 0;
-
+	u64 fe_bm[EM_COUNT] = { 0 };
+	u64 shader_bm[EM_COUNT] = { 0 };
+	u64 tiler_bm[EM_COUNT] = { 0 };
+	u64 mmu_l2_bm[EM_COUNT] = { 0 };
 	size_t grp, blk, blk_inst;
 
 	if (WARN_ON(!src) || WARN_ON(!dst))
@@ -554,42 +555,51 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 
 		if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
-			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
-				/* Nothing to do in this case. */
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
-				fe_bm |= *blk_map;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
-				tiler_bm |= *blk_map;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
-				shader_bm |= *blk_map;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
-				mmu_l2_bm |= *blk_map;
-				break;
-			default:
-				WARN_ON(true);
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+					/* Nothing to do in this case. */
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+					fe_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+					tiler_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+					shader_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+					mmu_l2_bm[map_idx] |= blk_map[map_idx];
+					break;
+				default:
+					WARN_ON(true);
+				}
 			}
 		} else {
 			WARN_ON(true);
 		}
 	}
 
-	dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0);
+	dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]);
 	dst->shader_bm =
-		kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
+		kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]);
 	dst->tiler_bm =
-		kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm, 0);
+		kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
 	dst->mmu_l2_bm =
-		kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm, 0);
+		kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
 }
 
 void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
@@ -616,11 +626,10 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 
-	u64 ignored_hi;
-	u64 fe_bm;
-	u64 shader_bm;
-	u64 tiler_bm;
-	u64 mmu_l2_bm;
+	u64 fe_bm[EM_COUNT] = { 0 };
+	u64 shader_bm[EM_COUNT] = { 0 };
+	u64 tiler_bm[EM_COUNT] = { 0 };
+	u64 mmu_l2_bm[EM_COUNT] = { 0 };
 	size_t grp, blk, blk_inst;
 
 	if (WARN_ON(!src) || WARN_ON(!dst))
@@ -628,14 +637,13 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 
 	metadata = dst->metadata;
 
-	kbasep_hwcnt_backend_gpu_block_map_from_physical(
-		src->fe_bm, &fe_bm, &ignored_hi);
-	kbasep_hwcnt_backend_gpu_block_map_from_physical(
-		src->shader_bm, &shader_bm, &ignored_hi);
-	kbasep_hwcnt_backend_gpu_block_map_from_physical(
-		src->tiler_bm, &tiler_bm, &ignored_hi);
-	kbasep_hwcnt_backend_gpu_block_map_from_physical(
-		src->mmu_l2_bm, &mmu_l2_bm, &ignored_hi);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
+							 &shader_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
+							 &tiler_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
+							 &mmu_l2_bm[EM_HI]);
 
 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
 		const u64 grp_type = kbase_hwcnt_metadata_group_type(
@@ -647,29 +655,38 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 
 		if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
-			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
-				/* Nothing to do in this case. */
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
-				*blk_map = fe_bm;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
-				*blk_map = tiler_bm;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
-				*blk_map = shader_bm;
-				break;
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
-				*blk_map = mmu_l2_bm;
-				break;
-			default:
-				WARN_ON(true);
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+					/* Nothing to do in this case. */
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+					blk_map[map_idx] = fe_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+					blk_map[map_idx] = tiler_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+					blk_map[map_idx] = shader_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+					blk_map[map_idx] = mmu_l2_bm[map_idx];
+					break;
+				default:
+					WARN_ON(true);
+				}
 			}
 		} else {
 			WARN_ON(true);
@@ -697,12 +714,25 @@ void kbase_hwcnt_gpu_patch_dump_headers(
 			buf, grp, blk, blk_inst);
 		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(
 			enable_map, grp, blk, blk_inst);
-		const u32 prfcnt_en =
-			kbase_hwcnt_backend_gpu_block_map_to_physical(
-				blk_map[0], 0);
 
 		if ((enum kbase_hwcnt_gpu_group_type)grp_type ==
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			u64 prfcnt_bm[EM_COUNT] = { 0 };
+			u32 prfcnt_en = 0;
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				prfcnt_bm[map_idx] = blk_map[map_idx];
+			}
+
+			prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
+										  prfcnt_bm[EM_HI]);
+
 			buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
 		} else {
 			WARN_ON(true);
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h
index f04c0ec..9397840 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/mali_kbase_hwcnt_types.h
@@ -509,11 +509,12 @@ static inline size_t kbase_hwcnt_metadata_block_avail_bit(
 	size_t grp,
 	size_t blk)
 {
-	const size_t bit =
-		metadata->grp_metadata[grp].avail_mask_index +
-		metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
 
-	return bit;
+	return metadata->grp_metadata[grp].avail_mask_index +
+	       metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
 }
 
 /**
@@ -532,9 +533,14 @@ static inline bool kbase_hwcnt_metadata_block_instance_avail(
 	size_t blk,
 	size_t blk_inst)
 {
-	const size_t bit = kbase_hwcnt_metadata_block_avail_bit(
-		metadata, grp, blk) + blk_inst;
-	const u64 mask = 1ull << bit;
+	size_t bit;
+	u64 mask;
+
+	if (WARN_ON(!metadata))
+		return false;
+
+	bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst;
+	mask = 1ull << bit;
 
 	return (metadata->avail_mask & mask) != 0;
 }
@@ -575,6 +581,14 @@ static inline u64 *
 kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
 				      size_t grp, size_t blk, size_t blk_inst)
 {
+	if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map))
+		return NULL;
+
+	if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) ||
+	    WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) ||
+	    WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+		return map->hwcnt_enable_map;
+
 	return map->hwcnt_enable_map +
 	       map->metadata->grp_metadata[grp].enable_map_index +
 	       map->metadata->grp_metadata[grp]
@@ -612,11 +626,16 @@ static inline void kbase_hwcnt_enable_map_block_disable_all(
 	size_t blk,
 	size_t blk_inst)
 {
-	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
-		dst->metadata, grp, blk);
-	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
-	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
-		dst, grp, blk, blk_inst);
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+
+	if (WARN_ON(!dst))
+		return;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
 
 	memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
 }
@@ -628,6 +647,9 @@ static inline void kbase_hwcnt_enable_map_block_disable_all(
 static inline void kbase_hwcnt_enable_map_disable_all(
 	struct kbase_hwcnt_enable_map *dst)
 {
+	if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+		return;
+
 	if (dst->hwcnt_enable_map != NULL)
 		memset(dst->hwcnt_enable_map, 0,
 		       dst->metadata->enable_map_bytes);
@@ -648,14 +670,18 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(
 	size_t blk,
 	size_t blk_inst)
 {
-	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
-		dst->metadata, grp, blk);
-	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
-	u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
-		dst, grp, blk, blk_inst);
-
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
 	size_t bitfld_idx;
 
+	if (WARN_ON(!dst))
+		return;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
 	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
 		const u64 remaining_values = val_cnt -
 			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
@@ -678,6 +704,9 @@ static inline void kbase_hwcnt_enable_map_enable_all(
 {
 	size_t grp, blk, blk_inst;
 
+	if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+		return;
+
 	kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
 		kbase_hwcnt_enable_map_block_enable_all(
 			dst, grp, blk, blk_inst);
@@ -696,7 +725,14 @@ static inline void kbase_hwcnt_enable_map_copy(
 	struct kbase_hwcnt_enable_map *dst,
 	const struct kbase_hwcnt_enable_map *src)
 {
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+	    WARN_ON(dst->metadata != src->metadata))
+		return;
+
 	if (dst->hwcnt_enable_map != NULL) {
+		if (WARN_ON(!src->hwcnt_enable_map))
+			return;
+
 		memcpy(dst->hwcnt_enable_map,
 		       src->hwcnt_enable_map,
 		       dst->metadata->enable_map_bytes);
@@ -716,11 +752,18 @@ static inline void kbase_hwcnt_enable_map_union(
 	struct kbase_hwcnt_enable_map *dst,
 	const struct kbase_hwcnt_enable_map *src)
 {
-	const size_t bitfld_count =
-		dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
-	size_t i;
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+	    WARN_ON(dst->metadata != src->metadata))
+		return;
 
 	if (dst->hwcnt_enable_map != NULL) {
+		size_t i;
+		size_t const bitfld_count =
+			dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+
+		if (WARN_ON(!src->hwcnt_enable_map))
+			return;
+
 		for (i = 0; i < bitfld_count; i++)
 			dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i];
 	}
@@ -745,14 +788,18 @@ static inline bool kbase_hwcnt_enable_map_block_enabled(
 	size_t blk_inst)
 {
 	bool any_enabled = false;
-	const size_t val_cnt = kbase_hwcnt_metadata_block_values_count(
-		enable_map->metadata, grp, blk);
-	const size_t bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
-	const u64 *block_enable_map = kbase_hwcnt_enable_map_block_instance(
-		enable_map, grp, blk, blk_inst);
-
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	const u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
 	size_t bitfld_idx;
 
+	if (WARN_ON(!enable_map))
+		return false;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
 	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
 		const u64 remaining_values = val_cnt -
 			(bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
@@ -778,8 +825,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled(
 	const struct kbase_hwcnt_enable_map *enable_map)
 {
 	size_t grp, blk, blk_inst;
-	const u64 clk_enable_map_mask =
-		(1ull << enable_map->metadata->clk_cnt) - 1;
+	u64 clk_enable_map_mask;
+
+	if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata))
+		return false;
+
+	clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1;
 
 	if (enable_map->metadata->clk_cnt > 0 &&
 		(enable_map->clk_enable_map & clk_enable_map_mask))
@@ -914,6 +965,14 @@ static inline u64 *kbase_hwcnt_dump_buffer_block_instance(
 	const struct kbase_hwcnt_dump_buffer *buf, size_t grp, size_t blk,
 	size_t blk_inst)
 {
+	if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
+		return NULL;
+
+	if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) ||
+	    WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) ||
+	    WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+		return buf->dump_buf;
+
 	return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
 	       buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
 	       (buf->metadata->grp_metadata[grp]
@@ -944,6 +1003,9 @@ void kbase_hwcnt_dump_buffer_zero(
 static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk,
 						      size_t val_cnt)
 {
+	if (WARN_ON(!dst_blk))
+		return;
+
 	memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
 }
 
@@ -991,6 +1053,9 @@ kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
 {
 	size_t val;
 
+	if (WARN_ON(!dst_blk))
+		return;
+
 	for (val = 0; val < val_cnt; val++) {
 		if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
 			dst_blk[val] = 0;
@@ -1025,6 +1090,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk,
 						      const u64 *src_blk,
 						      size_t val_cnt)
 {
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
 	/* Copy all the counters in the block instance.
 	 * Values of non-enabled counters are undefined.
 	 */
@@ -1073,6 +1141,9 @@ static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk,
 {
 	size_t val;
 
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
 	for (val = 0; val < val_cnt; val++) {
 		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(
 			blk_em, val);
@@ -1116,6 +1187,10 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk,
 							    size_t ctr_cnt)
 {
 	size_t ctr;
+
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
 	/* Copy all the headers in the block instance.
 	 * Values of non-enabled headers are undefined.
 	 */
@@ -1172,6 +1247,9 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
 {
 	size_t ctr;
 
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
 	kbase_hwcnt_dump_buffer_block_copy_strict(
 		dst_blk, src_blk, blk_em, hdr_cnt);
 
@@ -1206,6 +1284,8 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
 static inline bool kbase_hwcnt_clk_enable_map_enabled(
 	const u64 clk_enable_map, const size_t index)
 {
+	if (WARN_ON(index >= 64))
+		return false;
 	if (clk_enable_map & (1ull << index))
 		return true;
 	return false;
diff --git a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
index 4a03080..69b957a 100644
--- a/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
+++ b/mali_kbase/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -23,39 +23,40 @@
 #include "mali_kbase_hwcnt_watchdog_if.h"
 #include "mali_kbase_hwcnt_watchdog_if_timer.h"
 
-#include <linux/timer.h>
+#include <linux/workqueue.h>
 #include <linux/slab.h>
 
 /**
  * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog
  *                                             interface.
  *
- * @watchdog_timer: Watchdog timer
+ * @workq:          Single threaded work queue in which to execute callbacks.
+ * @dwork:          Worker to execute callback function.
  * @timer_enabled:  True if watchdog timer enabled, otherwise false
  * @callback:       Watchdog callback function
  * @user_data:      Pointer to user data passed as argument to the callback
  *                  function
  */
 struct kbase_hwcnt_watchdog_if_timer_info {
-	struct timer_list watchdog_timer;
+	struct workqueue_struct *workq;
+	struct delayed_work dwork;
 	bool timer_enabled;
 	kbase_hwcnt_watchdog_callback_fn *callback;
 	void *user_data;
 };
 
 /**
- * kbasep_hwcnt_watchdog_callback() - Watchdog timer callback
+ * kbasep_hwcnt_watchdog_callback() - Watchdog callback
  *
- * @timer: Timer structure
+ * @work: Work structure
  *
- * Function to be called when watchdog timer expires. Will call the callback
- * function provided at enable().
+ * Function to be called in a work queue after watchdog timer has expired.
  */
-static void kbasep_hwcnt_watchdog_callback(struct timer_list *const timer)
+static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work)
 {
 	struct kbase_hwcnt_watchdog_if_timer_info *const info =
-		container_of(timer, struct kbase_hwcnt_watchdog_if_timer_info,
-			     watchdog_timer);
+		container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work);
+
 	if (info->callback)
 		info->callback(info->user_data);
 }
@@ -68,14 +69,13 @@ static int kbasep_hwcnt_watchdog_if_timer_enable(
 	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
 		(void *)timer;
 
-	if (WARN_ON(!timer) || WARN_ON(!callback))
+	if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled))
 		return -EINVAL;
 
 	timer_info->callback = callback;
 	timer_info->user_data = user_data;
 
-	mod_timer(&timer_info->watchdog_timer,
-		  jiffies + msecs_to_jiffies(period_ms));
+	queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms));
 	timer_info->timer_enabled = true;
 
 	return 0;
@@ -93,7 +93,7 @@ static void kbasep_hwcnt_watchdog_if_timer_disable(
 	if (!timer_info->timer_enabled)
 		return;
 
-	del_timer_sync(&timer_info->watchdog_timer);
+	cancel_delayed_work_sync(&timer_info->dwork);
 	timer_info->timer_enabled = false;
 }
 
@@ -103,11 +103,10 @@ static void kbasep_hwcnt_watchdog_if_timer_modify(
 	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info =
 		(void *)timer;
 
-	if (WARN_ON(!timer))
+	if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled))
 		return;
 
-	mod_timer(&timer_info->watchdog_timer,
-		  jiffies + msecs_to_jiffies(delay_ms));
+	mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms));
 }
 
 void kbase_hwcnt_watchdog_if_timer_destroy(
@@ -123,10 +122,10 @@ void kbase_hwcnt_watchdog_if_timer_destroy(
 	if (WARN_ON(!timer_info))
 		return;
 
-	del_timer_sync(&timer_info->watchdog_timer);
+	destroy_workqueue(timer_info->workq);
 	kfree(timer_info);
 
-	memset(watchdog_if, 0, sizeof(*watchdog_if));
+	*watchdog_if = (struct kbase_hwcnt_watchdog_interface){ NULL };
 }
 
 int kbase_hwcnt_watchdog_if_timer_create(
@@ -145,8 +144,7 @@ int kbase_hwcnt_watchdog_if_timer_create(
 		(struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled =
 								     false };
 
-	kbase_timer_setup(&timer_info->watchdog_timer,
-			  kbasep_hwcnt_watchdog_callback);
+	INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback);
 
 	*watchdog_if = (struct kbase_hwcnt_watchdog_interface){
 		.timer = (void *)timer_info,
@@ -155,5 +153,10 @@ int kbase_hwcnt_watchdog_if_timer_create(
 		.modify = kbasep_hwcnt_watchdog_if_timer_modify,
 	};
 
-	return 0;
+	timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (timer_info->workq)
+		return 0;
+
+	kfree(timer_info);
+	return -ENOMEM;
 }
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 08824bd..157f238 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -40,8 +40,6 @@
 
 #include <mali_kbase_caps.h>
 
-#define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
-
 /* Return whether katom will run on the GPU or not. Currently only soft jobs and
  * dependency-only atoms do not run on the GPU
  */
@@ -150,7 +148,7 @@ void kbase_jd_dep_clear_locked(struct kbase_jd_atom *katom)
 
 		if (katom->status == KBASE_JD_ATOM_STATE_COMPLETED) {
 			/* The atom has already finished */
-			resched |= jd_done_nolock(katom, NULL);
+			resched |= jd_done_nolock(katom, true);
 		}
 
 		if (resched)
@@ -295,6 +293,7 @@ static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const st
 		struct kbase_mem_phy_alloc *alloc;
 #ifdef CONFIG_MALI_DMA_FENCE
 		bool exclusive;
+
 		exclusive = (res->ext_resource & BASE_EXT_RES_ACCESS_EXCLUSIVE)
 				? true : false;
 #endif
@@ -704,17 +703,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 }
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-/*
- * Perform the necessary handling of an atom that has finished running
- * on the GPU.
- *
- * Note that if this is a soft-job that has had kbase_prepare_soft_job called on it then the caller
- * is responsible for calling kbase_finish_soft_job *before* calling this function.
- *
- * The caller must hold the kbase_jd_context.lock.
- */
-bool jd_done_nolock(struct kbase_jd_atom *katom,
-		struct list_head *completed_jobs_ctx)
+bool jd_done_nolock(struct kbase_jd_atom *katom, bool post_immediately)
 {
 	struct kbase_context *kctx = katom->kctx;
 	struct list_head completed_jobs;
@@ -811,10 +800,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 		 * is in a disjoint state (ie. being reset).
 		 */
 		kbase_disjoint_event_potential(kctx->kbdev);
-		if (completed_jobs_ctx)
-			list_add_tail(&katom->jd_item, completed_jobs_ctx);
-		else
+		if (post_immediately && list_empty(&kctx->completed_jobs))
 			kbase_event_post(kctx, katom);
+		else
+			list_add_tail(&katom->jd_item, &kctx->completed_jobs);
 
 		/* Decrement and check the TOTAL number of jobs. This includes
 		 * those not tracked by the scheduler: 'not ready to run' and
@@ -978,9 +967,9 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 #endif
 
 	/* Don't do anything if there is a mess up with dependencies.
-	   This is done in a separate cycle to check both the dependencies at ones, otherwise
-	   it will be extra complexity to deal with 1st dependency ( just added to the list )
-	   if only the 2nd one has invalid config.
+	 * This is done in a separate cycle to check both the dependencies at ones, otherwise
+	 * it will be extra complexity to deal with 1st dependency ( just added to the list )
+	 * if only the 2nd one has invalid config.
 	 */
 	for (i = 0; i < 2; i++) {
 		int dep_atom_number = user_atom->pre_dep[i].atom_id;
@@ -1000,7 +989,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				 * dependencies.
 				 */
 				jd_trace_atom_submit(kctx, katom, NULL);
-				return jd_done_nolock(katom, NULL);
+				return jd_done_nolock(katom, true);
 			}
 		}
 	}
@@ -1064,7 +1053,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				if (err >= 0)
 					kbase_finish_soft_job(katom);
 			}
-			return jd_done_nolock(katom, NULL);
+			return jd_done_nolock(katom, true);
 		}
 
 		katom->will_fail_event_code = katom->event_code;
@@ -1098,7 +1087,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 			"Rejecting atom with unsupported core_req 0x%x\n",
 			katom->core_req);
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 #endif /* !MALI_INCREMENTAL_RENDERING */
 
@@ -1112,7 +1101,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		 */
 		dev_err(kctx->kbdev->dev, "Rejecting atom with jc = NULL\n");
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 
 	/* Reject atoms with an invalid device_nr */
@@ -1122,7 +1111,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				"Rejecting atom with invalid device_nr %d\n",
 				katom->device_nr);
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 
 	/* Reject atoms with invalid core requirements */
@@ -1132,7 +1121,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				"Rejecting atom with invalid core requirements\n");
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		katom->core_req &= ~BASE_JD_REQ_EVENT_COALESCE;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 
 	/* Reject soft-job atom of certain types from accessing external resources */
@@ -1143,7 +1132,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		dev_err(kctx->kbdev->dev,
 				"Rejecting soft-job atom accessing external resources\n");
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 
 	if (katom->core_req & BASE_JD_REQ_EXTERNAL_RESOURCES) {
@@ -1151,7 +1140,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		if (kbase_jd_pre_external_resources(katom, user_atom) != 0) {
 			/* setup failed (no access, bad resource, unknown resource types, etc.) */
 			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-			return jd_done_nolock(katom, NULL);
+			return jd_done_nolock(katom, true);
 		}
 	}
 
@@ -1162,7 +1151,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		 * JIT IDs - atom is invalid.
 		 */
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-		return jd_done_nolock(katom, NULL);
+		return jd_done_nolock(katom, true);
 	}
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
@@ -1176,13 +1165,13 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	if ((katom->core_req & BASE_JD_REQ_SOFT_JOB) == 0) {
 		if (!kbase_js_is_atom_valid(kctx->kbdev, katom)) {
 			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-			return jd_done_nolock(katom, NULL);
+			return jd_done_nolock(katom, true);
 		}
 	} else {
 		/* Soft-job */
 		if (kbase_prepare_soft_job(katom) != 0) {
 			katom->event_code = BASE_JD_EVENT_JOB_INVALID;
-			return jd_done_nolock(katom, NULL);
+			return jd_done_nolock(katom, true);
 		}
 	}
 
@@ -1204,7 +1193,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
-			return jd_done_nolock(katom, NULL);
+			return jd_done_nolock(katom, true);
 		}
 		return false;
 	}
@@ -1234,7 +1223,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	}
 
 	/* This is a pure dependency. Resolve it immediately */
-	return jd_done_nolock(katom, NULL);
+	return jd_done_nolock(katom, true);
 }
 
 int kbase_jd_submit(struct kbase_context *kctx,
@@ -1249,7 +1238,7 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	u32 latest_flush;
 
 	bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) ||
-	                      stride == offsetof(struct base_jd_atom_v2, renderpass_id));
+		stride == offsetof(struct base_jd_atom_v2, renderpass_id));
 
 	/*
 	 * kbase_jd_submit isn't expected to fail and so all errors with the
@@ -1257,8 +1246,6 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	 */
 	kbdev = kctx->kbdev;
 
-	beenthere(kctx, "%s", "Enter");
-
 	if (kbase_ctx_flag(kctx, KCTX_SUBMIT_DISABLED)) {
 		dev_err(kbdev->dev, "Attempt to submit to a context that has SUBMIT_DISABLED set on it\n");
 		return -EINVAL;
@@ -1494,7 +1481,7 @@ void kbase_jd_done_worker(struct work_struct *data)
 	mutex_unlock(&js_kctx_info->ctx.jsctx_mutex);
 	mutex_unlock(&js_devdata->queue_mutex);
 	/* jd_done_nolock() requires the jsctx_mutex lock to be dropped */
-	jd_done_nolock(katom, &kctx->completed_jobs);
+	jd_done_nolock(katom, false);
 
 	/* katom may have been freed now, do not use! */
 
@@ -1634,7 +1621,7 @@ static void jd_cancel_worker(struct work_struct *data)
 
 	mutex_lock(&jctx->lock);
 
-	need_to_try_schedule_context = jd_done_nolock(katom, NULL);
+	need_to_try_schedule_context = jd_done_nolock(katom, true);
 	/* Because we're zapping, we're not adding any more jobs to this ctx, so no need to
 	 * schedule the context. There's also no need for the jsctx_mutex to have been taken
 	 * around this too.
diff --git a/mali_kbase/mali_kbase_jd_debugfs.c b/mali_kbase/mali_kbase_jd_debugfs.c
index 6378931..7cc082d 100644
--- a/mali_kbase/mali_kbase_jd_debugfs.c
+++ b/mali_kbase/mali_kbase_jd_debugfs.c
@@ -117,7 +117,7 @@ static void kbasep_jd_debugfs_atom_deps(
 	int i;
 
 	for (i = 0; i < 2; i++)	{
-		deps[i].id = (unsigned)(atom->dep[i].atom ?
+		deps[i].id = (unsigned int)(atom->dep[i].atom ?
 				kbase_jd_atom_id(kctx, atom->dep[i].atom) : 0);
 
 		switch (atom->dep[i].dep_type) {
@@ -231,9 +231,9 @@ static const struct file_operations kbasep_jd_debugfs_atoms_fops = {
 void kbasep_jd_debugfs_ctx_init(struct kbase_context *kctx)
 {
 #if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
-	const mode_t mode = S_IRUGO;
+	const mode_t mode = 0444;
 #else
-	const mode_t mode = S_IRUSR;
+	const mode_t mode = 0400;
 #endif
 
 	/* Caller already ensures this, but we keep the pattern for
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index 898606b..6cbd6f1 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -138,10 +138,11 @@ struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
 	if (katom->event_code != BASE_JD_EVENT_STOPPED &&
 			katom->event_code != BASE_JD_EVENT_REMOVED_FROM_NEXT) {
 		return kbase_js_complete_atom(katom, NULL);
-	} else {
-		kbase_js_unpull(katom->kctx, katom);
-		return NULL;
 	}
+
+	kbase_js_unpull(katom->kctx, katom);
+
+	return NULL;
 }
 
 struct kbase_jd_atom *kbase_jm_complete(struct kbase_device *kbdev,
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 799c7e5..43662ae 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -2528,11 +2528,10 @@ bool kbase_js_dep_resolved_submit(struct kbase_context *kctx,
 	/* If slot will transition from unpullable to pullable then add to
 	 * pullable list
 	 */
-	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr)) {
+	if (jsctx_rb_none_to_pull(kctx, katom->slot_nr))
 		enqueue_required = true;
-	} else {
+	else
 		enqueue_required = false;
-	}
 
 	if ((katom->atom_flags & KBASE_KATOM_FLAG_X_DEP_BLOCKED) ||
 			(katom->pre_dep && (katom->pre_dep->atom_flags &
@@ -2658,9 +2657,9 @@ static void kbase_js_evict_deps(struct kbase_context *kctx,
 			(void *)x_dep);
 
 		/* Fail if it had a data dependency. */
-		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER) {
+		if (x_dep->atom_flags & KBASE_KATOM_FLAG_FAIL_BLOCKER)
 			x_dep->will_fail_event_code = katom->event_code;
-		}
+
 		if (x_dep->atom_flags & KBASE_KATOM_FLAG_JSCTX_IN_X_DEP_LIST)
 			kbase_js_move_to_tree(x_dep);
 	}
@@ -3926,7 +3925,7 @@ void kbase_js_zap_context(struct kbase_context *kctx)
 
 		kbasep_js_clear_submit_allowed(js_devdata, kctx);
 
-		/* Retain and (later) release the context whilst it is is now
+		/* Retain and (later) release the context whilst it is now
 		 * disallowed from submitting jobs - ensures that someone
 		 * somewhere will be removing the context later on
 		 */
diff --git a/mali_kbase/mali_kbase_js_ctx_attr.c b/mali_kbase/mali_kbase_js_ctx_attr.c
index 7775648..04ea06b 100644
--- a/mali_kbase/mali_kbase_js_ctx_attr.c
+++ b/mali_kbase/mali_kbase_js_ctx_attr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,8 +27,9 @@
  */
 
 /**
- * Check whether a ctx has a certain attribute, and if so, retain that
- * attribute on the runpool.
+ * kbasep_js_ctx_attr_runpool_retain_attr - Check whether a ctx has a certain attribute
+ * and if so, retain that attribute on the runpool.
+ *
  * @kbdev: Device pointer
  * @kctx:  KBase context
  * @attribute: Atribute to check/retain
@@ -38,11 +39,11 @@
  * - runpool_irq spinlock
  * - ctx is scheduled on the runpool
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * In this state, the scheduler might be able to submit more jobs than
  * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
  * or similar is called sometime later.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
 {
@@ -76,8 +77,9 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s
 }
 
 /**
- * Check whether a ctx has a certain attribute, and if so, release that
- * attribute on the runpool.
+ * kbasep_js_ctx_attr_runpool_release_attr - Check whether a ctx has a certain attribute,
+ * and if so, release that attribute on the runpool.
+ *
  * @kbdev: Device pointer
  * @kctx:  KBase context
  * @attribute: Atribute to release
@@ -87,11 +89,11 @@ static bool kbasep_js_ctx_attr_runpool_retain_attr(struct kbase_device *kbdev, s
  * - runpool_irq spinlock
  * - ctx is scheduled on the runpool
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * In this state, the scheduler might be able to submit more jobs than
  * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
  * or similar is called sometime later.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
 {
@@ -124,8 +126,9 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev,
 }
 
 /**
- * Retain a certain attribute on a ctx, also retaining it on the runpool
- * if the context is scheduled.
+ * kbasep_js_ctx_attr_ctx_retain_attr - Retain a certain attribute on a ctx,
+ * also retaining it on the runpool if the context is scheduled.
+ *
  * @kbdev: Device pointer
  * @kctx:  KBase context
  * @attribute: Atribute to retain
@@ -134,9 +137,9 @@ static bool kbasep_js_ctx_attr_runpool_release_attr(struct kbase_device *kbdev,
  * - jsctx mutex
  * - If the context is scheduled, then runpool_irq spinlock must also be held
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * This may allow the scheduler to submit more jobs than previously.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
 {
@@ -164,8 +167,9 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc
 }
 
 /**
- * Release a certain attribute on a ctx, also releasing it from the runpool
- * if the context is scheduled.
+ * kbasep_js_ctx_attr_ctx_release_attr - Release a certain attribute on a ctx,
+ * also releasing it from the runpool if the context is scheduled.
+ *
  * @kbdev: Device pointer
  * @kctx:  KBase context
  * @attribute: Atribute to release
@@ -174,9 +178,9 @@ static bool kbasep_js_ctx_attr_ctx_retain_attr(struct kbase_device *kbdev, struc
  * - jsctx mutex
  * - If the context is scheduled, then runpool_irq spinlock must also be held
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * This may allow the scheduler to submit more jobs than previously.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 static bool kbasep_js_ctx_attr_ctx_release_attr(struct kbase_device *kbdev, struct kbase_context *kctx, enum kbasep_js_ctx_attr attribute)
 {
diff --git a/mali_kbase/mali_kbase_js_ctx_attr.h b/mali_kbase/mali_kbase_js_ctx_attr.h
index 6f29241..2dc640d 100644
--- a/mali_kbase/mali_kbase_js_ctx_attr.h
+++ b/mali_kbase/mali_kbase_js_ctx_attr.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2015, 2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,8 @@
 #define _KBASE_JS_CTX_ATTR_H_
 
 /**
- * Retain all attributes of a context
+ * kbasep_js_ctx_attr_runpool_retain_ctx - Retain all attributes of a context
+ *
  * @kbdev: KBase device
  * @kctx:  KBase context
  *
@@ -42,7 +43,8 @@
 void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
 /**
- * Release all attributes of a context
+ * kbasep_js_ctx_attr_runpool_release_ctx - Release all attributes of a context
+ *
  * @kbdev: KBase device
  * @kctx:  KBase context
  *
@@ -54,16 +56,17 @@ void kbasep_js_ctx_attr_runpool_retain_ctx(struct kbase_device *kbdev, struct kb
  * - runpool_irq spinlock
  * - ctx->is_scheduled is true
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * In this state, the scheduler might be able to submit more jobs than
  * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
  * or similar is called sometime later.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
 /**
- * Retain all attributes of an atom
+ * kbasep_js_ctx_attr_ctx_retain_atom - Retain all attributes of an atom
+ *
  * @kbdev: KBase device
  * @kctx:  KBase context
  * @katom: Atom
@@ -77,7 +80,9 @@ bool kbasep_js_ctx_attr_runpool_release_ctx(struct kbase_device *kbdev, struct k
 void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbase_jd_atom *katom);
 
 /**
- * Release all attributes of an atom, given its retained state.
+ * kbasep_js_ctx_attr_ctx_release_atom - Release all attributes of an atom,
+ * given its retained state.
+ *
  * @kbdev: KBase device
  * @kctx:  KBase context
  * @katom_retained_state: Retained state
@@ -90,11 +95,11 @@ void kbasep_js_ctx_attr_ctx_retain_atom(struct kbase_device *kbdev, struct kbase
  *
  * This is a no-op when \a katom_retained_state is invalid.
  *
- * @return true indicates a change in ctx attributes state of the runpool.
+ * Return: true indicates a change in ctx attributes state of the runpool.
  * In this state, the scheduler might be able to submit more jobs than
  * previously, and so the caller should ensure kbasep_js_try_run_next_job_nolock()
  * or similar is called sometime later.
- * @return false indicates no change in ctx attributes state of the runpool.
+ * false indicates no change in ctx attributes state of the runpool.
  */
 bool kbasep_js_ctx_attr_ctx_release_atom(struct kbase_device *kbdev, struct kbase_context *kctx, struct kbasep_js_atom_retained_state *katom_retained_state);
 
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index 1b23b41..84efbb3 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -47,9 +47,14 @@
 #include <linux/version.h>
 #include <linux/wait.h>
 
+/* Define static_assert().
+ *
+ * The macro was introduced in kernel 5.1. But older vendor kernels may define
+ * it too.
+ */
 #if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE
 #include <linux/build_bug.h>
-#else
+#elif !defined(static_assert)
 // Stringify the expression if no message is given.
 #define static_assert(e, ...)  __static_assert(e, #__VA_ARGS__, #e)
 #define __static_assert(e, msg, ...) _Static_assert(e, msg)
@@ -204,9 +209,8 @@ struct reader_changes {
  */
 static inline bool reader_changes_is_valid_size(const size_t size)
 {
-	typedef struct reader_changes changes_t;
-	const size_t elem_size = sizeof(*((changes_t *)0)->data);
-	const size_t size_size = sizeof(((changes_t *)0)->size);
+	const size_t elem_size = sizeof(*((struct reader_changes *)0)->data);
+	const size_t size_size = sizeof(((struct reader_changes *)0)->size);
 	const size_t size_max = (1ull << (size_size * 8)) - 1;
 
 	return is_power_of_2(size) && /* Is a power of two */
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index 27ff3bb..e0c2c2c 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,7 +22,6 @@
 #include "mali_kbase.h"
 #include "mali_kbase_kinstr_prfcnt.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
-#include "mali_kbase_hwcnt_types.h"
 #include "mali_kbase_hwcnt_gpu.h"
 #include <uapi/gpu/arm/midgard/mali_kbase_ioctl.h>
 #include "mali_malisw.h"
@@ -157,6 +156,9 @@ struct kbase_kinstr_prfcnt_async {
  * @sample_arr:           Array of dump buffers allocated by this client.
  * @read_idx:             Index of buffer read by userspace.
  * @write_idx:            Index of buffer being written by dump worker.
+ * @fetch_idx:            Index of buffer being fetched by userspace, but
+ *                        pending a confirmation of being read (consumed) if it
+ *                        differs from the read_idx.
  * @waitq:                Client's notification queue.
  * @sample_size:          Size of the data required for one sample, in bytes.
  * @sample_count:         Number of samples the client is able to capture.
@@ -185,6 +187,7 @@ struct kbase_kinstr_prfcnt_client {
 	struct kbase_kinstr_prfcnt_sample_array sample_arr;
 	atomic_t read_idx;
 	atomic_t write_idx;
+	atomic_t fetch_idx;
 	wait_queue_head_t waitq;
 	size_t sample_size;
 	size_t sample_count;
@@ -248,7 +251,7 @@ kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
 
 	poll_wait(filp, &cli->waitq, wait);
 
-	if (atomic_read(&cli->write_idx) != atomic_read(&cli->read_idx))
+	if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx))
 		return POLLIN;
 
 	return 0;
@@ -398,17 +401,30 @@ kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
 	return block_type;
 }
 
+static bool kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata *metadata,
+						size_t grp, size_t blk)
+{
+	enum prfcnt_block_type block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
+		kbase_hwcnt_metadata_block_type(metadata, grp, blk));
+
+	return block_type == PRFCNT_BLOCK_TYPE_RESERVED;
+}
+
 /**
  * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
  *                                               item array.
+ * @enable_map:      Non-NULL pointer to the map of enabled counters.
  * @dst:             Non-NULL pointer to the sample's dump buffer object.
  * @block_meta_base: Non-NULL double pointer to the start of the block meta
  *                   data items.
  * @base_addr:       Address of allocated pages for array of samples. Used
  *                   to calculate offset of block values.
  * @counter_set:     The SET which blocks represent.
+ *
+ * Return: 0 on success, else error code.
  */
-int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst,
+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map,
+					      struct kbase_hwcnt_dump_buffer *dst,
 					      struct prfcnt_metadata **block_meta_base,
 					      u64 base_addr, u8 counter_set)
 {
@@ -423,8 +439,10 @@ int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *ds
 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
 		u64 *dst_blk;
 
-		/* Skip unused blocks */
-		if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst))
+		/* Skip unavailable or non-enabled blocks */
+		if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
+		    !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
+		    !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
 			continue;
 
 		dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
@@ -482,8 +500,9 @@ static void kbasep_kinstr_prfcnt_set_sample_metadata(
 
 	/* Dealing with counter blocks */
 	ptr_md++;
-	if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(
-		    dump_buf, &ptr_md, cli->sample_arr.page_addr, cli->config.counter_set)))
+	if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md,
+							      cli->sample_arr.page_addr,
+							      cli->config.counter_set)))
 		return;
 
 	/* Handle the last sentinel item */
@@ -676,6 +695,9 @@ kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
 	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
 						 &cli->config.phys_em);
 
+	/* Enable all the available clk_enable_map. */
+	cli->enable_map.clk_enable_map = (1ull << cli->kinstr_ctx->metadata->clk_cnt) - 1;
+
 	mutex_lock(&cli->kinstr_ctx->lock);
 	/* Enable HWC from the configuration of the client creation */
 	ret = kbase_hwcnt_virtualizer_client_set_counters(
@@ -778,7 +800,7 @@ kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
 
 	mutex_unlock(&cli->kinstr_ctx->lock);
 
-	return ret;
+	return 0;
 }
 
 static int
@@ -887,28 +909,35 @@ kbasep_kinstr_prfcnt_client_async_dump(struct kbase_kinstr_prfcnt_client *cli,
 static int
 kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli)
 {
+	unsigned int write_idx;
+
 	WARN_ON(!cli);
 	lockdep_assert_held(&cli->cmd_sync_lock);
 
 	mutex_lock(&cli->kinstr_ctx->lock);
 
-	/* Discard (Clear) all internally buffered samples */
-	atomic_set(&cli->read_idx, atomic_read(&cli->write_idx));
+	write_idx = atomic_read(&cli->write_idx);
+
+	/* Discard (clear) all internally buffered samples. Note, if there
+	 * is a fetched sample in flight, one should not touch the read index,
+	 * leaving it alone for the put-sample operation to update it. The
+	 * consistency between the read_idx and the fetch_idx is coordinated by
+	 * holding the cli->cmd_sync_lock.
+	 */
+	if (atomic_read(&cli->fetch_idx) != atomic_read(&cli->read_idx)) {
+		atomic_set(&cli->fetch_idx, write_idx);
+	} else {
+		atomic_set(&cli->fetch_idx, write_idx);
+		atomic_set(&cli->read_idx, write_idx);
+	}
 
 	mutex_unlock(&cli->kinstr_ctx->lock);
 
 	return 0;
 }
 
-/**
- * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session.
- * @cli:         Non-NULL pointer to kinstr_prfcnt client.
- * @control_cmd: Control command to execute.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
-				    struct prfcnt_control_cmd *control_cmd)
+int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
+			     struct prfcnt_control_cmd *control_cmd)
 {
 	int ret = 0;
 
@@ -950,14 +979,36 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
 {
 	unsigned int write_idx;
 	unsigned int read_idx;
+	unsigned int fetch_idx;
 	u64 sample_offset_bytes;
 	struct prfcnt_metadata *sample_meta;
+	int err = 0;
 
+	mutex_lock(&cli->cmd_sync_lock);
 	write_idx = atomic_read(&cli->write_idx);
 	read_idx = atomic_read(&cli->read_idx);
 
-	if (write_idx == read_idx)
-		return -EINVAL;
+	if (write_idx == read_idx) {
+		err = -EINVAL;
+		goto error_out;
+	}
+
+	/* If the client interface has already had a sample been fetched,
+	 * reflected by the fetch index not equal to read_idx, i.e., typically
+	 *   read_idx + 1 == fetch_idx,
+	 * further fetch is not allowed until the previously fetched buffer
+	 * is put back (which brings the read_idx == fetch_idx). As a design,
+	 * the above add one equal condition (i.e. typical cases) may only be
+	 * untrue if there had been an interface operation on sample discard,
+	 * after the sample in question already been fetched, in which case,
+	 * the fetch_idx could have a delta larger than 1 relative to the
+	 * read_idx.
+	 */
+	fetch_idx = atomic_read(&cli->fetch_idx);
+	if (read_idx != fetch_idx) {
+		err = -EBUSY;
+		goto error_out;
+	}
 
 	read_idx %= cli->sample_arr.sample_count;
 	sample_offset_bytes =
@@ -972,19 +1023,21 @@ kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
 	 * for instance if the client is trying to get an asynchronous
 	 * sample which has not been dumped yet.
 	 */
-	if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE)
-		return -EINVAL;
-	if (sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION)
-		return -EINVAL;
+	if (sample_meta->hdr.item_type != PRFCNT_SAMPLE_META_TYPE_SAMPLE ||
+	    sample_meta->hdr.item_version != PRFCNT_READER_API_VERSION) {
+		err = -EINVAL;
+		goto error_out;
+	}
 
 	sample_access->sequence = sample_meta->u.sample_md.seq;
 	sample_access->sample_offset_bytes = sample_offset_bytes;
 
-	/* read_idx is not incremented here, because the interface allows
-	 * only one sample to be "in flight" between kernel space and user space.
-	 */
+	/* Marking a sample has been fetched by advancing the fetch index */
+	atomic_inc(&cli->fetch_idx);
 
-	return 0;
+error_out:
+	mutex_unlock(&cli->cmd_sync_lock);
+	return err;
 }
 
 static int
@@ -993,28 +1046,39 @@ kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli,
 {
 	unsigned int write_idx;
 	unsigned int read_idx;
+	unsigned int fetch_idx;
 	u64 sample_offset_bytes;
+	int err = 0;
 
+	mutex_lock(&cli->cmd_sync_lock);
 	write_idx = atomic_read(&cli->write_idx);
 	read_idx = atomic_read(&cli->read_idx);
 
-	if (write_idx == read_idx)
-		return -EINVAL;
-
-	if (sample_access->sequence != read_idx)
-		return -EINVAL;
+	if (write_idx == read_idx || sample_access->sequence != read_idx) {
+		err = -EINVAL;
+		goto error_out;
+	}
 
 	read_idx %= cli->sample_arr.sample_count;
 	sample_offset_bytes =
 		(u64)(uintptr_t)cli->sample_arr.samples[read_idx].sample_meta -
 		(u64)(uintptr_t)cli->sample_arr.page_addr;
 
-	if (sample_access->sample_offset_bytes != sample_offset_bytes)
-		return -EINVAL;
+	if (sample_access->sample_offset_bytes != sample_offset_bytes) {
+		err = -EINVAL;
+		goto error_out;
+	}
 
-	atomic_inc(&cli->read_idx);
+	fetch_idx = atomic_read(&cli->fetch_idx);
+	WARN_ON(read_idx == fetch_idx);
+	/* Setting the read_idx matching the fetch_idx, signals no in-flight
+	 * fetched sample.
+	 */
+	atomic_set(&cli->read_idx, fetch_idx);
 
-	return 0;
+error_out:
+	mutex_unlock(&cli->cmd_sync_lock);
+	return err;
 }
 
 /**
@@ -1137,12 +1201,7 @@ static void kbasep_kinstr_prfcnt_sample_array_free(
 	memset(sample_arr, 0, sizeof(*sample_arr));
 }
 
-/**
- * kbasep_kinstr_prfcnt_client_destroy() - Destroy a kinstr_prfcnt client.
- * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context.
- */
-static void
-kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
 {
 	if (!cli)
 		return;
@@ -1191,7 +1250,8 @@ static const struct file_operations kinstr_prfcnt_client_fops = {
 	.release = kbasep_kinstr_prfcnt_hwcnt_reader_release,
 };
 
-size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata)
+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata,
+						struct kbase_hwcnt_enable_map *enable_map)
 {
 	size_t grp, blk, blk_inst;
 	size_t md_count = 0;
@@ -1200,8 +1260,10 @@ size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadat
 		return 0;
 
 	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
-		/* Skip unused blocks */
-		if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst))
+		/* Skip unavailable, non-enabled or reserved blocks */
+		if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
+		    !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
+		    !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
 			continue;
 
 		md_count++;
@@ -1216,14 +1278,14 @@ size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadat
 	return md_count;
 }
 
-static size_t kbasep_kinstr_prfcnt_get_sample_size(
-	const struct kbase_hwcnt_metadata *metadata,
-	struct kbase_hwcnt_dump_buffer *dump_buf)
+static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_client *cli,
+						   const struct kbase_hwcnt_metadata *metadata)
 {
 	size_t dump_buf_bytes;
 	size_t clk_cnt_buf_bytes;
 	size_t sample_meta_bytes;
-	size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata);
+	struct kbase_hwcnt_dump_buffer *dump_buf = &cli->tmp_buf;
+	size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map);
 
 	if (!metadata)
 		return 0;
@@ -1311,6 +1373,8 @@ static void kbasep_kinstr_prfcnt_async_dump_worker(struct work_struct *work)
  * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
  *                              execution as soon as possible.
  * @timer: Timer structure.
+ *
+ * Return: HRTIMER_NORESTART always.
  */
 static enum hrtimer_restart
 kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer)
@@ -1373,7 +1437,7 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 	if (WARN_ON(kinstr_ctx->client_count > 0)) {
 		struct kbase_kinstr_prfcnt_client *pos, *n;
 
-		list_for_each_entry_safe(pos, n, &kinstr_ctx->clients, node) {
+		list_for_each_entry_safe (pos, n, &kinstr_ctx->clients, node) {
 			list_del(&pos->node);
 			kinstr_ctx->client_count--;
 			kbasep_kinstr_prfcnt_client_destroy(pos);
@@ -1431,7 +1495,7 @@ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 			struct kbase_kinstr_prfcnt_client *pos;
 			bool has_periodic_clients = false;
 
-			list_for_each_entry(pos, &kinstr_ctx->clients, node) {
+			list_for_each_entry (pos, &kinstr_ctx->clients, node) {
 				if (pos->dump_interval_ns != 0) {
 					has_periodic_clients = true;
 					break;
@@ -1448,10 +1512,10 @@ void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 	mutex_unlock(&kinstr_ctx->lock);
 }
 
-static int kbasep_kinstr_prfcnt_sample_array_alloc(
-	const struct kbase_hwcnt_metadata *metadata, size_t n,
-	struct kbase_kinstr_prfcnt_sample_array *sample_arr)
+static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_client *cli,
+						   const struct kbase_hwcnt_metadata *metadata)
 {
+	struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr;
 	struct kbase_kinstr_prfcnt_sample *samples;
 	size_t sample_idx;
 	u64 addr;
@@ -1461,24 +1525,25 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(
 	size_t sample_meta_bytes;
 	size_t md_count;
 	size_t sample_size;
+	size_t buffer_count = cli->config.buffer_count;
 
 	if (!metadata || !sample_arr)
 		return -EINVAL;
 
-	md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata);
+	md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map);
 	sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count;
 	dump_buf_bytes = metadata->dump_buf_bytes;
 	clk_cnt_buf_bytes =
 		sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt;
 	sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes;
 
-	samples = kmalloc_array(n, sizeof(*samples), GFP_KERNEL);
+	samples = kmalloc_array(buffer_count, sizeof(*samples), GFP_KERNEL);
 
 	if (!samples)
 		return -ENOMEM;
 
-	order = get_order(sample_size * n);
-	addr = (u64)(uintptr_t)kzalloc(sample_size * n, GFP_KERNEL);
+	order = get_order(sample_size * buffer_count);
+	addr = (u64)(uintptr_t)kzalloc(sample_size * buffer_count, GFP_KERNEL);
 
 	if (!addr) {
 		kfree((void *)samples);
@@ -1487,10 +1552,10 @@ static int kbasep_kinstr_prfcnt_sample_array_alloc(
 
 	sample_arr->page_addr = addr;
 	sample_arr->page_order = order;
-	sample_arr->sample_count = n;
+	sample_arr->sample_count = buffer_count;
 	sample_arr->samples = samples;
 
-	for (sample_idx = 0; sample_idx < n; sample_idx++) {
+	for (sample_idx = 0; sample_idx < buffer_count; sample_idx++) {
 		const size_t sample_meta_offset = sample_size * sample_idx;
 		const size_t dump_buf_offset =
 			sample_meta_offset + sample_meta_bytes;
@@ -1652,6 +1717,7 @@ static int kbasep_kinstr_prfcnt_parse_request_scope(
  * @kinstr_ctx: Pointer to the kinstr_prfcnt context.
  * @setup:      Session setup information to parse.
  * @config:     Client object the session configuration should be written to.
+ * @req_arr:    Pointer to array of request items for client session.
  *
  * This function parses the list of "request" items sent by the user space
  * client, and writes the configuration for the new client to be created
@@ -1659,41 +1725,18 @@ static int kbasep_kinstr_prfcnt_parse_request_scope(
  *
  * Return: 0 on success, else error code.
  */
-static int kbasep_kinstr_prfcnt_parse_setup(
-	struct kbase_kinstr_prfcnt_context *kinstr_ctx,
-	union kbase_ioctl_kinstr_prfcnt_setup *setup,
-	struct kbase_kinstr_prfcnt_client_config *config)
+static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+					    union kbase_ioctl_kinstr_prfcnt_setup *setup,
+					    struct kbase_kinstr_prfcnt_client_config *config,
+					    struct prfcnt_request_item *req_arr)
 {
 	uint32_t i;
-	struct prfcnt_request_item *req_arr;
 	unsigned int item_count = setup->in.request_item_count;
-	unsigned long bytes;
 	int err = 0;
 
-	/* Limiting the request items to 2x of the expected: acommodating
-	 * moderate duplications but rejecting excessive abuses.
-	 */
-	if (!setup->in.requests_ptr || (item_count < 2) ||
-	    (setup->in.request_item_size == 0) ||
-	    item_count > 2 * kinstr_ctx->info_item_count) {
-		return -EINVAL;
-	}
-
-	bytes = item_count * sizeof(*req_arr);
-	req_arr = kmalloc(bytes, GFP_KERNEL);
-	if (!req_arr)
-		return -ENOMEM;
-
-	if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr),
-			   bytes)) {
-		err = -EFAULT;
-		goto free_buf;
-	}
-
 	if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE ||
 	    req_arr[item_count - 1].hdr.item_version != 0) {
-		err = -EINVAL;
-		goto free_buf;
+		return -EINVAL;
 	}
 
 	/* The session configuration can only feature one value for some
@@ -1770,9 +1813,6 @@ static int kbasep_kinstr_prfcnt_parse_setup(
 			break;
 	}
 
-free_buf:
-	kfree(req_arr);
-
 	if (!err) {
 		/* Verify that properties (like capture mode and block counter
 		 * set) have been defined by the user space client.
@@ -1787,28 +1827,17 @@ free_buf:
 	return err;
 }
 
-/**
- * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client.
- *                                        Does not attach to the kinstr_prfcnt
- *                                        context.
- * @kinstr_ctx: Non-NULL pointer to kinstr_prfcnt context.
- * @setup:      Non-NULL pointer to hardware counter ioctl setup structure.
- * @out_vcli:   Non-NULL pointer to where created client will be stored on
- *              success.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_kinstr_prfcnt_client_create(
-	struct kbase_kinstr_prfcnt_context *kinstr_ctx,
-	union kbase_ioctl_kinstr_prfcnt_setup *setup,
-	struct kbase_kinstr_prfcnt_client **out_vcli)
+int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+				       union kbase_ioctl_kinstr_prfcnt_setup *setup,
+				       struct kbase_kinstr_prfcnt_client **out_vcli,
+				       struct prfcnt_request_item *req_arr)
 {
 	int err;
 	struct kbase_kinstr_prfcnt_client *cli;
-	struct kbase_hwcnt_physical_enable_map phys_em;
 
 	WARN_ON(!kinstr_ctx);
 	WARN_ON(!setup);
+	WARN_ON(!req_arr);
 
 	cli = kzalloc(sizeof(*cli), GFP_KERNEL);
 
@@ -1816,7 +1845,7 @@ static int kbasep_kinstr_prfcnt_client_create(
 		return -ENOMEM;
 
 	cli->kinstr_ctx = kinstr_ctx;
-	err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config);
+	err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config, req_arr);
 
 	if (err < 0)
 		goto error;
@@ -1827,6 +1856,7 @@ static int kbasep_kinstr_prfcnt_client_create(
 	cli->active = false;
 	atomic_set(&cli->write_idx, 0);
 	atomic_set(&cli->read_idx, 0);
+	atomic_set(&cli->fetch_idx, 0);
 
 	err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata,
 					   &cli->enable_map);
@@ -1834,17 +1864,11 @@ static int kbasep_kinstr_prfcnt_client_create(
 	if (err < 0)
 		goto error;
 
-	phys_em.fe_bm = 0;
-	phys_em.shader_bm = 0;
-	phys_em.tiler_bm = 0;
-	phys_em.mmu_l2_bm = 0;
-
-	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
+	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &cli->config.phys_em);
 
 	cli->sample_count = cli->config.buffer_count;
 	atomic_set(&cli->sync_sample_count, cli->sample_count);
-	cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(
-		kinstr_ctx->metadata, &cli->tmp_buf);
+	cli->sample_size = kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
 
 	/* Use virtualizer's metadata to alloc tmp buffer which interacts with
 	 * the HWC virtualizer.
@@ -1855,20 +1879,21 @@ static int kbasep_kinstr_prfcnt_client_create(
 	if (err < 0)
 		goto error;
 
-	/* Enable all the available clk_enable_map. */
-	cli->enable_map.clk_enable_map =
-		(1ull << kinstr_ctx->metadata->clk_cnt) - 1;
+	/* Disable clock map in setup, and enable clock map when start */
+	cli->enable_map.clk_enable_map = 0;
 
 	/* Use metadata from virtualizer to allocate dump buffers  if
 	 * kinstr_prfcnt doesn't have the truncated metadata.
 	 */
-	err = kbasep_kinstr_prfcnt_sample_array_alloc(kinstr_ctx->metadata,
-						      cli->config.buffer_count,
-						      &cli->sample_arr);
+	err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
 
 	if (err < 0)
 		goto error;
 
+	/* Set enable map to be 0 to prevent virtualizer to init and kick the backend to count */
+	kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
+						 &(struct kbase_hwcnt_physical_enable_map){ 0 });
+
 	err = kbase_hwcnt_virtualizer_client_create(
 		kinstr_ctx->hvirt, &cli->enable_map, &cli->hvcli);
 
@@ -1891,22 +1916,23 @@ error:
 static size_t kbasep_kinstr_prfcnt_get_block_info_count(
 	const struct kbase_hwcnt_metadata *metadata)
 {
-	size_t grp;
+	size_t grp, blk;
 	size_t block_info_count = 0;
 
 	if (!metadata)
 		return 0;
 
 	for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
-		block_info_count +=
-			kbase_hwcnt_metadata_block_count(metadata, grp);
+		for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) {
+			if (!kbase_kinstr_is_block_type_reserved(metadata, grp, blk))
+				block_info_count++;
+		}
 	}
 
 	return block_info_count;
 }
 
 static void kbasep_kinstr_prfcnt_get_request_info_list(
-	struct kbase_kinstr_prfcnt_context *kinstr_ctx,
 	struct prfcnt_enum_item *item_arr, size_t *arr_idx)
 {
 	memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests,
@@ -1914,6 +1940,24 @@ static void kbasep_kinstr_prfcnt_get_request_info_list(
 	*arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests);
 }
 
+static void kbasep_kinstr_prfcnt_get_sample_info_item(const struct kbase_hwcnt_metadata *metadata,
+						      struct prfcnt_enum_item *item_arr,
+						      size_t *arr_idx)
+{
+	struct prfcnt_enum_item sample_info = {
+		.hdr = {
+				.item_type = PRFCNT_ENUM_TYPE_SAMPLE_INFO,
+				.item_version = PRFCNT_READER_API_VERSION,
+			},
+		.u.sample_info = {
+				.num_clock_domains = metadata->clk_cnt,
+			},
+	};
+
+	item_arr[*arr_idx] = sample_info;
+	*arr_idx += 1;
+}
+
 int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata,
 					     size_t block_set, struct prfcnt_enum_item *item_arr,
 					     size_t *arr_idx)
@@ -1924,23 +1968,17 @@ int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *
 		return -EINVAL;
 
 	for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
-		for (blk = 0;
-		     blk < kbase_hwcnt_metadata_block_count(metadata, grp);
-		     blk++, (*arr_idx)++) {
+		for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) {
 			size_t blk_inst;
 			size_t unused_blk_inst_count = 0;
 			size_t blk_inst_count =
 				kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk);
-
-			item_arr[*arr_idx].hdr.item_type =
-				PRFCNT_ENUM_TYPE_BLOCK;
-			item_arr[*arr_idx].hdr.item_version =
-				PRFCNT_READER_API_VERSION;
-			item_arr[*arr_idx].u.block_counter.set = block_set;
-			item_arr[*arr_idx].u.block_counter.block_type =
+			enum prfcnt_block_type block_type =
 				kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
-					kbase_hwcnt_metadata_block_type(
-						metadata, grp, blk));
+					kbase_hwcnt_metadata_block_type(metadata, grp, blk));
+
+			if (block_type == PRFCNT_BLOCK_TYPE_RESERVED)
+				continue;
 
 			/* Count number of unused blocks to updated number of instances */
 			for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) {
@@ -1949,20 +1987,24 @@ int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *
 					unused_blk_inst_count++;
 			}
 
-			item_arr[*arr_idx].u.block_counter.num_instances =
-				blk_inst_count - unused_blk_inst_count;
-			item_arr[*arr_idx].u.block_counter.num_values =
-				kbase_hwcnt_metadata_block_values_count(
-					metadata, grp, blk);
-
-			/* The bitmask of available counters should be dynamic.
-			 * Temporarily, it is set to U64_MAX, waiting for the
-			 * required functionality to be available in the future.
-			 */
-			item_arr[*arr_idx].u.block_counter.counter_mask[0] =
-				U64_MAX;
-			item_arr[*arr_idx].u.block_counter.counter_mask[1] =
-				U64_MAX;
+			item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){
+				.hdr = {
+					.item_type = PRFCNT_ENUM_TYPE_BLOCK,
+					.item_version = PRFCNT_READER_API_VERSION,
+				},
+				.u.block_counter = {
+					.set = block_set,
+					.block_type = block_type,
+					.num_instances = blk_inst_count - unused_blk_inst_count,
+					.num_values = kbase_hwcnt_metadata_block_values_count(
+						metadata, grp, blk),
+					/* The bitmask of available counters should be dynamic.
+					 * Temporarily, it is set to U64_MAX, waiting for the
+					 * required functionality to be available in the future.
+					 */
+					.counter_mask = {U64_MAX, U64_MAX},
+				},
+			};
 		}
 	}
 
@@ -1980,6 +2022,11 @@ static int kbasep_kinstr_prfcnt_enum_info_count(
 
 	count = ARRAY_SIZE(kinstr_prfcnt_supported_requests);
 	metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
+
+	/* Add the sample_info (clock domain) descriptive item */
+	count++;
+
+	/* Other blocks based on meta data */
 	block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
 	count += block_info_count;
 
@@ -2014,9 +2061,12 @@ static int kbasep_kinstr_prfcnt_enum_info_list(
 	if (!prfcnt_item_arr)
 		return -ENOMEM;
 
-	kbasep_kinstr_prfcnt_get_request_info_list(kinstr_ctx, prfcnt_item_arr,
-						   &arr_idx);
+	kbasep_kinstr_prfcnt_get_request_info_list(prfcnt_item_arr, &arr_idx);
+
 	metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
+	/* Place the sample_info item */
+	kbasep_kinstr_prfcnt_get_sample_info_item(metadata, prfcnt_item_arr, &arr_idx);
+
 	block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
 
 	if (arr_idx + block_info_count >= enum_info->info_item_count)
@@ -2080,12 +2130,36 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
 			      union kbase_ioctl_kinstr_prfcnt_setup *setup)
 {
 	int err;
+	unsigned int item_count;
+	unsigned long bytes;
+	struct prfcnt_request_item *req_arr;
 	struct kbase_kinstr_prfcnt_client *cli = NULL;
 
 	if (!kinstr_ctx || !setup)
 		return -EINVAL;
 
-	err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli);
+	item_count = setup->in.request_item_count;
+
+	/* Limiting the request items to 2x of the expected: acommodating
+	 * moderate duplications but rejecting excessive abuses.
+	 */
+	if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) ||
+	    item_count > 2 * kinstr_ctx->info_item_count) {
+		return -EINVAL;
+	}
+
+	bytes = item_count * sizeof(*req_arr);
+	req_arr = kmalloc(bytes, GFP_KERNEL);
+
+	if (!req_arr)
+		return -ENOMEM;
+
+	if (copy_from_user(req_arr, u64_to_user_ptr(setup->in.requests_ptr), bytes)) {
+		err = -EFAULT;
+		goto free_buf;
+	}
+
+	err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr);
 
 	if (err < 0)
 		goto error;
@@ -2107,7 +2181,7 @@ int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
 	if (err < 0)
 		goto client_installed_error;
 
-	return err;
+	goto free_buf;
 
 client_installed_error:
 	mutex_lock(&kinstr_ctx->lock);
@@ -2116,5 +2190,7 @@ client_installed_error:
 	mutex_unlock(&kinstr_ctx->lock);
 error:
 	kbasep_kinstr_prfcnt_client_destroy(cli);
+free_buf:
+	kfree(req_arr);
 	return err;
 }
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.h b/mali_kbase/mali_kbase_kinstr_prfcnt.h
index c42408b..ec42ce0 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.h
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.h
@@ -26,9 +26,11 @@
 #ifndef _KBASE_KINSTR_PRFCNT_H_
 #define _KBASE_KINSTR_PRFCNT_H_
 
+#include "mali_kbase_hwcnt_types.h"
 #include <uapi/gpu/arm/midgard/mali_kbase_hwcnt_reader.h>
 
 struct kbase_kinstr_prfcnt_context;
+struct kbase_kinstr_prfcnt_client;
 struct kbase_hwcnt_virtualizer;
 struct kbase_ioctl_hwcnt_reader_setup;
 struct kbase_ioctl_kinstr_prfcnt_enum_info;
@@ -98,15 +100,18 @@ int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *
 /**
  * kbasep_kinstr_prfcnt_get_sample_md_count() - Get count of sample
  *                                              metadata items.
- * @metadata: Non-NULL pointer to the hardware counter metadata.
+ * @metadata:   Non-NULL pointer to the hardware counter metadata.
+ * @enable_map: Non-NULL pointer to the map of enabled counters.
  *
  * Return: Number of metadata items for available blocks in each sample.
  */
-size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata);
+size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata,
+						struct kbase_hwcnt_enable_map *enable_map);
 
 /**
  * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
  *                                               item array.
+ * @enable_map:      Non-NULL pointer to the map of enabled counters.
  * @dst:             Non-NULL pointer to the sample's dump buffer object.
  * @block_meta_base: Non-NULL double pointer to the start of the block meta
  *                   data items.
@@ -116,9 +121,43 @@ size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadat
  *
  * Return: 0 on success, else error code.
  */
-int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_dump_buffer *dst,
+int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map,
+					      struct kbase_hwcnt_dump_buffer *dst,
 					      struct prfcnt_metadata **block_meta_base,
 					      u64 base_addr, u8 counter_set);
+
+/**
+ * kbasep_kinstr_prfcnt_client_create() - Create a kinstr_prfcnt client.
+ *                                        Does not attach to the kinstr_prfcnt
+ *                                        context.
+ * @kinstr_ctx: Non-NULL pointer to kinstr_prfcnt context.
+ * @setup:      Non-NULL pointer to hardware counter ioctl setup structure.
+ * @out_vcli:   Non-NULL pointer to where created client will be stored on
+ *              success.
+ * @req_arr:    Non-NULL pointer to array of request items for client session.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
+				       union kbase_ioctl_kinstr_prfcnt_setup *setup,
+				       struct kbase_kinstr_prfcnt_client **out_vcli,
+				       struct prfcnt_request_item *req_arr);
+
+/**
+ * kbasep_kinstr_prfcnt_cmd() - Execute command for a client session.
+ * @cli:         Non-NULL pointer to kinstr_prfcnt client.
+ * @control_cmd: Control command to execute.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
+			     struct prfcnt_control_cmd *control_cmd);
+
+/**
+ * kbasep_kinstr_prfcnt_client_destroy() - Destroy a kinstr_prfcnt client.
+ * @cli: kinstr_prfcnt client. Must not be attached to a kinstr_prfcnt context.
+ */
+void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli);
 #endif /* MALI_KERNEL_TEST_API */
 
 /**
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index de854f3..625043c 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Base kernel memory APIs
+ * DOC: Base kernel memory APIs
  */
 #include <linux/dma-buf.h>
 #include <linux/kernel.h>
@@ -104,12 +104,24 @@ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
 								    u64 gpu_pfn)
 {
 	struct rb_root *rbtree = NULL;
-	struct kbase_reg_zone *exec_va_zone =
-		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
 
-	/* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA
-	 * zone if this has been initialized.
-	 */
+	struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+
+#if MALI_USE_CSF
+	struct kbase_reg_zone *fixed_va_zone =
+		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
+
+	struct kbase_reg_zone *exec_fixed_va_zone =
+		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
+
+	if (gpu_pfn >= fixed_va_zone->base_pfn) {
+		rbtree = &kctx->reg_rbtree_fixed;
+		return rbtree;
+	} else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
+		rbtree = &kctx->reg_rbtree_exec_fixed;
+		return rbtree;
+	}
+#endif
 	if (gpu_pfn >= exec_va_zone->base_pfn)
 		rbtree = &kctx->reg_rbtree_exec;
 	else {
@@ -350,7 +362,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
 }
 
 /**
- * Remove a region object from the global list.
+ * kbase_remove_va_region - Remove a region object from the global list.
+ *
  * @kbdev: The kbase device
  * @reg: Region object to remove
  *
@@ -478,6 +491,8 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
  * @at_reg: The region to replace
  * @start_pfn: The Page Frame Number to insert at
  * @nr_pages: The number of pages of the region
+ *
+ * Return: 0 on success, error code otherwise.
  */
 static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
 		struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
@@ -551,6 +566,8 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
  * @addr: the address to insert the region at
  * @nr_pages: the number of pages in the region
  * @align: the minimum alignment in pages
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbase_add_va_region(struct kbase_context *kctx,
 		struct kbase_va_region *reg, u64 addr,
@@ -567,12 +584,19 @@ int kbase_add_va_region(struct kbase_context *kctx,
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	/* The executable allocation from the SAME_VA zone would already have an
+	/* The executable allocation from the SAME_VA zone should already have an
 	 * appropriately aligned GPU VA chosen for it.
-	 * Also the executable allocation from EXEC_VA zone doesn't need the
-	 * special alignment.
+	 * Also, executable allocations from EXEC_VA don't need the special
+	 * alignment.
 	 */
+#if MALI_USE_CSF
+	/* The same is also true for the EXEC_FIXED_VA zone.
+	 */
+#endif
 	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
+#if MALI_USE_CSF
+		((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
+#endif
 	    ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
 		if (cpu_va_bits > gpu_pc_bits) {
 			align = max(align, (size_t)((1ULL << gpu_pc_bits)
@@ -604,15 +628,17 @@ KBASE_EXPORT_TEST_API(kbase_add_va_region);
 /**
  * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
  *
- * Insert a region into the rbtree that was specified when the region was
- * created. If addr is 0 a free area in the rbtree is used, otherwise the
- * specified address is used.
- *
  * @kbdev: The kbase device
  * @reg: The region to add
  * @addr: The address to add the region at, or 0 to map at any available address
  * @nr_pages: The size of the region in pages
  * @align: The minimum alignment in pages
+ *
+ * Insert a region into the rbtree that was specified when the region was
+ * created. If addr is 0 a free area in the rbtree is used, otherwise the
+ * specified address is used.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 		struct kbase_va_region *reg,
@@ -699,6 +725,59 @@ exit:
 /*
  * @brief Initialize the internal region tracker data structure.
  */
+#if MALI_USE_CSF
+static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
+					 struct kbase_va_region *same_va_reg,
+					 struct kbase_va_region *custom_va_reg,
+					 struct kbase_va_region *exec_va_reg,
+					 struct kbase_va_region *exec_fixed_va_reg,
+					 struct kbase_va_region *fixed_va_reg)
+{
+	u64 last_zone_end_pfn;
+
+	kctx->reg_rbtree_same = RB_ROOT;
+	kbase_region_tracker_insert(same_va_reg);
+
+	last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
+
+	/* Although custom_va_reg doesn't always exist, initialize
+	 * unconditionally because of the mem_view debugfs
+	 * implementation which relies on it being empty.
+	 */
+	kctx->reg_rbtree_custom = RB_ROOT;
+	kctx->reg_rbtree_exec = RB_ROOT;
+
+	if (custom_va_reg) {
+		WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
+		kbase_region_tracker_insert(custom_va_reg);
+		last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
+	}
+
+	/* Initialize exec, fixed and exec_fixed. These are always
+	 * initialized at this stage, if they will exist at all.
+	 */
+	kctx->reg_rbtree_fixed = RB_ROOT;
+	kctx->reg_rbtree_exec_fixed = RB_ROOT;
+
+	if (exec_va_reg) {
+		WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
+		kbase_region_tracker_insert(exec_va_reg);
+		last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
+	}
+
+	if (exec_fixed_va_reg) {
+		WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
+		kbase_region_tracker_insert(exec_fixed_va_reg);
+		last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
+	}
+
+	if (fixed_va_reg) {
+		WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
+		kbase_region_tracker_insert(fixed_va_reg);
+		last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
+	}
+}
+#else
 static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 		struct kbase_va_region *same_va_reg,
 		struct kbase_va_region *custom_va_reg)
@@ -719,6 +798,7 @@ static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
 	if (custom_va_reg)
 		kbase_region_tracker_insert(custom_va_reg);
 }
+#endif /* MALI_USE_CSF */
 
 static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 {
@@ -753,6 +833,9 @@ void kbase_region_tracker_term(struct kbase_context *kctx)
 	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
 #if MALI_USE_CSF
 	WARN_ON(!list_empty(&kctx->csf.event_pages_head));
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
+	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
+
 #endif
 	kbase_gpu_vm_unlock(kctx);
 }
@@ -764,8 +847,8 @@ void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
 
 static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
 {
-	return min(kbase_get_num_cpu_va_bits(kctx),
-			(size_t) kctx->kbdev->gpu_props.mmu.va_bits);
+	return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
+			kctx->kbdev->gpu_props.mmu.va_bits);
 }
 
 int kbase_region_tracker_init(struct kbase_context *kctx)
@@ -774,15 +857,38 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	struct kbase_va_region *custom_va_reg = NULL;
 	size_t same_va_bits = kbase_get_same_va_bits(kctx);
 	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
-	u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT;
+	u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
+	u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
 	u64 same_va_pages;
 	u64 same_va_base = 1u;
 	int err;
+#if MALI_USE_CSF
+	struct kbase_va_region *exec_va_reg;
+	struct kbase_va_region *exec_fixed_va_reg;
+	struct kbase_va_region *fixed_va_reg;
+
+	u64 exec_va_base;
+	u64 fixed_va_end;
+	u64 exec_fixed_va_base;
+	u64 fixed_va_base;
+	u64 fixed_va_pages;
+#endif
 
 	/* Take the lock as kbase_free_alloced_region requires it */
 	kbase_gpu_vm_lock(kctx);
 
 	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
+
+#if MALI_USE_CSF
+	if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
+		/* Depending on how the kernel is configured, it's possible (eg on aarch64) for
+		 * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
+		 * doesn't cross into the exec_va zone.
+		 */
+		same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
+	}
+#endif
+
 	/* all have SAME_VA */
 	same_va_reg =
 		kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
@@ -827,6 +933,69 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 		custom_va_size = 0;
 	}
 #endif
+
+#if MALI_USE_CSF
+	/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
+	exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+
+	/* Similarly the end of the FIXED_VA zone also depends on whether the client
+	 * is 32 or 64-bits.
+	 */
+	fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+
+#if IS_ENABLED(CONFIG_64BIT)
+	if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
+		exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
+		fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
+	}
+#endif
+
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
+				KBASE_REG_ZONE_EXEC_VA_SIZE);
+
+	exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
+					      KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
+
+	if (!exec_va_reg) {
+		err = -ENOMEM;
+		goto fail_free_custom_va;
+	}
+
+	exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
+
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
+				KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+
+	exec_fixed_va_reg =
+		kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
+					KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
+					KBASE_REG_ZONE_EXEC_FIXED_VA);
+
+	if (!exec_fixed_va_reg) {
+		err = -ENOMEM;
+		goto fail_free_exec_va;
+	}
+
+	fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
+	fixed_va_pages = fixed_va_end - fixed_va_base;
+
+	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
+
+	fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
+					       fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
+
+	kctx->gpu_va_end = fixed_va_end;
+
+	if (!fixed_va_reg) {
+		err = -ENOMEM;
+		goto fail_free_exec_fixed_va;
+	}
+
+	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
+				     exec_fixed_va_reg, fixed_va_reg);
+
+	INIT_LIST_HEAD(&kctx->csf.event_pages_head);
+#else
 	/* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
 	 * initially U64_MAX
 	 */
@@ -834,17 +1003,23 @@ int kbase_region_tracker_init(struct kbase_context *kctx)
 	/* Other zones are 0: kbase_create_context() uses vzalloc */
 
 	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
-
 	kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
-	kctx->jit_va = false;
-
-#if MALI_USE_CSF
-	INIT_LIST_HEAD(&kctx->csf.event_pages_head);
 #endif
+	kctx->jit_va = false;
 
 	kbase_gpu_vm_unlock(kctx);
 	return 0;
 
+#if MALI_USE_CSF
+fail_free_exec_fixed_va:
+	kbase_free_alloced_region(exec_fixed_va_reg);
+fail_free_exec_va:
+	kbase_free_alloced_region(exec_va_reg);
+fail_free_custom_va:
+	if (custom_va_reg)
+		kbase_free_alloced_region(custom_va_reg);
+#endif
+
 fail_free_same_va:
 	kbase_free_alloced_region(same_va_reg);
 fail_unlock:
@@ -874,7 +1049,9 @@ bool kbase_has_exec_va_zone(struct kbase_context *kctx)
 }
 
 /**
- * Determine if any allocations have been made on a context's region tracker
+ * kbase_region_tracker_has_allocs - Determine if any allocations have been made
+ * on a context's region tracker
+ *
  * @kctx: KBase context
  *
  * Check the context to determine if any allocations have been made yet from
@@ -1081,6 +1258,7 @@ exit_unlock:
 
 int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
 {
+#if !MALI_USE_CSF
 	struct kbase_va_region *exec_va_reg;
 	struct kbase_reg_zone *exec_va_zone;
 	struct kbase_reg_zone *target_zone;
@@ -1089,6 +1267,7 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 	unsigned long target_zone_bits;
 	u64 exec_va_start;
 	int err;
+#endif
 
 	/* The EXEC_VA zone shall be created by making space either:
 	 * - for 64-bit clients, at the end of the process's address space
@@ -1102,6 +1281,12 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 	if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
 		return -EINVAL;
 
+#if MALI_USE_CSF
+	/* For CSF GPUs we now setup the EXEC_VA zone during initialization,
+	 * so this request is a null-op.
+	 */
+	return 0;
+#else
 	kbase_gpu_vm_lock(kctx);
 
 	/* Verify that we've not already created a EXEC_VA zone, and that the
@@ -1187,6 +1372,7 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 exit_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return err;
+#endif /* MALI_USE_CSF */
 }
 
 #if MALI_USE_CSF
@@ -1330,7 +1516,8 @@ void kbase_mem_term(struct kbase_device *kbdev)
 KBASE_EXPORT_TEST_API(kbase_mem_term);
 
 /**
- * Allocate a free region object.
+ * kbase_alloc_free_region - Allocate a free region object.
+ *
  * @rbtree:    Backlink to the red-black tree of memory regions.
  * @start_pfn: The Page Frame Number in GPU virtual address space.
  * @nr_pages:  The size of the region in pages.
@@ -1341,6 +1528,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
  *
  * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
  *
+ * Return: pointer to the allocated region object on success, NULL otherwise.
  */
 struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
 		u64 start_pfn, size_t nr_pages, int zone)
@@ -1398,6 +1586,14 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
 		kctx = container_of(rbtree, struct kbase_context,
 				reg_rbtree_exec);
 		break;
+#if MALI_USE_CSF
+	case KBASE_REG_ZONE_EXEC_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
+		break;
+	case KBASE_REG_ZONE_FIXED_VA:
+		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
+		break;
+#endif
 	default:
 		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
 		break;
@@ -1407,7 +1603,8 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
 }
 
 /**
- * Free a region object.
+ * kbase_free_alloced_region - Free a region object.
+ *
  * @reg: Region
  *
  * The described region must be freed of any mapping.
@@ -1629,30 +1826,33 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 	if (!reg->gpu_alloc)
 		return -EINVAL;
 
-	/* Tear down down GPU page tables, depending on memory type. */
+	/* Tear down GPU page tables, depending on memory type. */
 	switch (reg->gpu_alloc->type) {
 	case KBASE_MEM_TYPE_ALIAS: {
-		size_t i = 0;
-		struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
-
-		/* Due to the way the number of valid PTEs and ATEs are tracked
-		 * currently, only the GPU virtual range that is backed & mapped
-		 * should be passed to the kbase_mmu_teardown_pages() function,
-		 * hence individual aliased regions needs to be unmapped
-		 * separately.
-		 */
-		for (i = 0; i < alloc->imported.alias.nents; i++) {
-			if (alloc->imported.alias.aliased[i].alloc) {
-				err = kbase_mmu_teardown_pages(
-					kctx->kbdev, &kctx->mmu,
-					reg->start_pfn +
-						(i *
-						 alloc->imported.alias.stride),
-					alloc->imported.alias.aliased[i].length,
-					kctx->as_nr);
+			size_t i = 0;
+			struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+			/* Due to the way the number of valid PTEs and ATEs are tracked
+			 * currently, only the GPU virtual range that is backed & mapped
+			 * should be passed to the kbase_mmu_teardown_pages() function,
+			 * hence individual aliased regions needs to be unmapped
+			 * separately.
+			 */
+			for (i = 0; i < alloc->imported.alias.nents; i++) {
+				if (alloc->imported.alias.aliased[i].alloc) {
+					int err_loop = kbase_mmu_teardown_pages(
+						kctx->kbdev, &kctx->mmu,
+						reg->start_pfn +
+							(i *
+							alloc->imported.alias.stride),
+						alloc->imported.alias.aliased[i].length,
+						kctx->as_nr);
+					if (WARN_ON_ONCE(err_loop))
+						err = err_loop;
+				}
 			}
 		}
-	} break;
+		break;
 	case KBASE_MEM_TYPE_IMPORTED_UMM:
 		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
 				reg->start_pfn, reg->nr_pages, kctx->as_nr);
@@ -1682,7 +1882,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 				/* The allocation could still have active mappings. */
 				if (user_buf->current_mapping_usage_count == 0) {
 					kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
-						(reg->flags & KBASE_REG_GPU_WR));
+								(reg->flags & (KBASE_REG_CPU_WR |
+									       KBASE_REG_GPU_WR)));
 				}
 			}
 		}
@@ -2016,22 +2217,35 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 		goto out;
 	}
 
+#if MALI_USE_CSF
+	if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
+	    ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+		if (reg->flags & KBASE_REG_FIXED_ADDRESS)
+			atomic64_dec(&kctx->num_fixed_allocs);
+		else
+			atomic64_dec(&kctx->num_fixable_allocs);
+	}
+#endif
+
 	/* This will also free the physical pages */
 	kbase_free_alloced_region(reg);
 
- out:
+out:
 	return err;
 }
 
 KBASE_EXPORT_TEST_API(kbase_mem_free_region);
 
 /**
- * Free the region from the GPU and unregister it.
+ * kbase_mem_free - Free the region from the GPU and unregister it.
+ *
  * @kctx:  KBase context
  * @gpu_addr: GPU address to free
  *
  * This function implements the free operation on a memory segment.
  * It will loudly fail if called with outstanding mappings.
+ *
+ * Return: 0 on success.
  */
 int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 {
@@ -2043,12 +2257,14 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 		__func__, gpu_addr, (void *)kctx);
 
 	if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
-		dev_warn(kctx->kbdev->dev, "kbase_mem_free: gpu_addr parameter is invalid");
+		dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__);
 		return -EINVAL;
 	}
 
 	if (gpu_addr == 0) {
-		dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n");
+		dev_warn(kctx->kbdev->dev,
+			"gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n",
+			__func__);
 		return -EINVAL;
 	}
 	kbase_gpu_vm_lock(kctx);
@@ -2074,8 +2290,8 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 		/* Validate the region */
 		reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
 		if (kbase_is_region_invalid_or_free(reg)) {
-			dev_warn(kctx->kbdev->dev, "kbase_mem_free called with nonexistent gpu_addr 0x%llX",
-					gpu_addr);
+			dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX",
+				__func__, gpu_addr);
 			err = -EINVAL;
 			goto out_unlock;
 		}
@@ -2090,7 +2306,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 		err = kbase_mem_free_region(kctx, reg);
 	}
 
- out_unlock:
+out_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return err;
 }
@@ -2196,6 +2412,11 @@ int kbase_update_region_flags(struct kbase_context *kctx,
 	if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
 		reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
 
+#if MALI_USE_CSF
+	if (flags & BASE_MEM_FIXED)
+		reg->flags |= KBASE_REG_FIXED_ADDRESS;
+#endif
+
 	return 0;
 }
 
@@ -3047,6 +3268,14 @@ bool kbase_check_alloc_flags(unsigned long flags)
 			(BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
 		return false;
 
+#if MALI_USE_CSF
+	if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED)))
+		return false;
+
+	if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED))
+		return false;
+#endif
+
 	return true;
 }
 
@@ -3068,7 +3297,11 @@ bool kbase_check_import_flags(unsigned long flags)
 	if (flags & BASE_MEM_GROW_ON_GPF)
 		return false;
 
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+	/* Imported memory cannot be fixed */
+	if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)))
+		return false;
+#else
 	/* Imported memory cannot be aligned to the end of its initial commit */
 	if (flags & BASE_MEM_TILER_ALIGN_TOP)
 		return false;
@@ -3229,7 +3462,7 @@ KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 struct kbase_jit_debugfs_data {
-	int (*func)(struct kbase_jit_debugfs_data *);
+	int (*func)(struct kbase_jit_debugfs_data *data);
 	struct mutex lock;
 	struct kbase_context *kctx;
 	u64 active_value;
@@ -3452,8 +3685,8 @@ KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops,
 void kbase_jit_debugfs_init(struct kbase_context *kctx)
 {
 	/* prevent unprivileged use of debug file system
-         * in old kernel version
-         */
+	 * in old kernel version
+	 */
 #if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
 	/* only for newer kernel version debug file system is safe */
 	const mode_t mode = 0444;
@@ -4236,9 +4469,8 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		mutex_unlock(&kctx->jit_evict_lock);
 		kbase_gpu_vm_unlock(kctx);
 
-		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
-				      info->extension, &flags, &gpu_addr,
-				      mmu_sync_info);
+		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
+				      &flags, &gpu_addr, mmu_sync_info);
 		if (!reg) {
 			/* Most likely not enough GPU virtual space left for
 			 * the new JIT allocation.
@@ -4560,6 +4792,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
 	struct mm_struct *mm = alloc->imported.user_buf.mm;
 	long pinned_pages;
 	long i;
+	int write;
 
 	if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
 		return -EINVAL;
@@ -4574,41 +4807,28 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
 	if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
 		return -EINVAL;
 
+	write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+
 #if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
-	pinned_pages = get_user_pages(NULL, mm,
-			address,
-			alloc->imported.user_buf.nr_pages,
+	pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages,
 #if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
 KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
-			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
-			pages, NULL);
+				      write ? FOLL_WRITE : 0, pages, NULL);
 #else
-			reg->flags & KBASE_REG_GPU_WR,
-			0, pages, NULL);
+				      write, 0, pages, NULL);
 #endif
 #elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
-	pinned_pages = get_user_pages_remote(NULL, mm,
-			address,
-			alloc->imported.user_buf.nr_pages,
-			reg->flags & KBASE_REG_GPU_WR,
-			0, pages, NULL);
+	pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
+					     write, 0, pages, NULL);
 #elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
-	pinned_pages = get_user_pages_remote(NULL, mm,
-			address,
-			alloc->imported.user_buf.nr_pages,
-			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
-			pages, NULL);
+	pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
+					     write ? FOLL_WRITE : 0, pages, NULL);
 #elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
-	pinned_pages = get_user_pages_remote(NULL, mm,
-			address,
-			alloc->imported.user_buf.nr_pages,
-			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
-			pages, NULL, NULL);
+	pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
+					     write ? FOLL_WRITE : 0, pages, NULL, NULL);
 #else
-	pinned_pages = pin_user_pages_remote(
-		mm, address, alloc->imported.user_buf.nr_pages,
-		reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL,
-		NULL);
+	pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages,
+					     write ? FOLL_WRITE : 0, pages, NULL, NULL);
 #endif
 
 	if (pinned_pages <= 0)
@@ -4665,7 +4885,8 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		dma_addr = dma_map_page(dev, pages[i],
 				offset, min,
 				DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(dev, dma_addr))
+		err = dma_mapping_error(dev, dma_addr);
+		if (err)
 			goto unwind;
 
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
@@ -4842,7 +5063,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx,
 						kbase_reg_current_backed_size(reg),
 						kctx->as_nr);
 
-			if (reg && ((reg->flags & KBASE_REG_GPU_WR) == 0))
+			if (reg && ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0))
 				writeable = false;
 
 			kbase_jd_user_buf_unmap(kctx, alloc, writeable);
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 9cb4088..4ac4feb 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -359,19 +359,26 @@ struct kbase_va_region {
 /* inner & outer shareable coherency */
 #define KBASE_REG_SHARE_BOTH        (1ul << 10)
 
+#if MALI_USE_CSF
+/* Space for 8 different zones */
+#define KBASE_REG_ZONE_BITS 3
+#else
 /* Space for 4 different zones */
-#define KBASE_REG_ZONE_MASK         ((KBASE_REG_ZONE_MAX - 1ul) << 11)
-#define KBASE_REG_ZONE(x)           (((x) & (KBASE_REG_ZONE_MAX - 1ul)) << 11)
+#define KBASE_REG_ZONE_BITS 2
+#endif
+
+#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11)
+#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11)
 #define KBASE_REG_ZONE_IDX(x)       (((x) & KBASE_REG_ZONE_MASK) >> 11)
 
-#if ((KBASE_REG_ZONE_MAX - 1) & 0x3) != (KBASE_REG_ZONE_MAX - 1)
-#error KBASE_REG_ZONE_MAX too large for allocation of KBASE_REG_<...> bits
+#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS)
+#error "Too many zones for the number of zone bits defined"
 #endif
 
 /* GPU read access */
-#define KBASE_REG_GPU_RD            (1ul<<13)
+#define KBASE_REG_GPU_RD (1ul << 14)
 /* CPU read access */
-#define KBASE_REG_CPU_RD            (1ul<<14)
+#define KBASE_REG_CPU_RD (1ul << 15)
 
 /* Index of chosen MEMATTR for this region (0..7) */
 #define KBASE_REG_MEMATTR_MASK      (7ul << 16)
@@ -442,21 +449,39 @@ struct kbase_va_region {
 /* Allocation is actively used for JIT memory */
 #define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
 
-#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
-
-/* only used with 32-bit clients */
-/*
- * On a 32bit platform, custom VA should be wired from 4GB
- * to the VA limit of the GPU. Unfortunately, the Linux mmap() interface
- * limits us to 2^32 pages (2^44 bytes, see mmap64 man page for reference).
- * So we put the default limit to the maximum possible on Linux and shrink
- * it down, if required by the GPU, during initialization.
+#if MALI_USE_CSF
+/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA
+ * memory zones, and it determines whether they were created with a fixed
+ * GPU VA address requested by the user.
  */
+#define KBASE_REG_FIXED_ADDRESS (1ul << 29)
+#else
+#define KBASE_REG_RESERVED_BIT_29 (1ul << 29)
+#endif
+
+#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
 
 #define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(1)
 #define KBASE_REG_ZONE_CUSTOM_VA_BASE    (0x100000000ULL >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_CUSTOM_VA_SIZE    (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+
+#if MALI_USE_CSF
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43).
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
+		(((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+#else
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the
+ * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44
+ * bytes, see mmap64 man page for reference).  So we put the default limit to the
+ * maximum possible on Linux and shrink it down, if required by the GPU, during
+ * initialization.
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
+		(((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
 /* end 32-bit clients only */
+#endif
 
 /* The starting address and size of the GPU-executable zone are dynamic
  * and depend on the platform and the number of pages requested by the
@@ -470,6 +495,33 @@ struct kbase_va_region {
 #define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT)
 #define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \
 		KBASE_REG_ZONE_MCU_SHARED_BASE)
+
+/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit
+ * clients, and 2^43 for 32-bit clients.
+ */
+#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+/* Executable zone supporting FIXED/FIXABLE allocations.
+ * It is always 4GB in size.
+ */
+
+#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4)
+#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+/* Non-executable zone supporting FIXED/FIXABLE allocations.
+ * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from
+ * (2^43) up to (2^44)-1 for 32-bit userspace clients.
+ */
+#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5)
+
+/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so
+ * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1.
+ */
+#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT)
+
 #endif
 
 	unsigned long flags;
@@ -521,8 +573,10 @@ static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits)
 {
 	WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
 	return (zone_bits == KBASE_REG_ZONE_SAME_VA ||
-		zone_bits == KBASE_REG_ZONE_CUSTOM_VA ||
-		zone_bits == KBASE_REG_ZONE_EXEC_VA);
+#if MALI_USE_CSF
+		zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA ||
+#endif
+		zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA);
 }
 
 /* Special marker for failed JIT allocations that still must be marked as
@@ -1025,7 +1079,7 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
  * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
  * become larger than the maximum size specified.
  *
- * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
+ * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
  */
 int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
 
@@ -1107,9 +1161,9 @@ void kbase_region_tracker_term(struct kbase_context *kctx);
 /**
  * kbase_region_tracker_term_rbtree - Free memory for a region tracker
  *
- * This will free all the regions within the region tracker
- *
  * @rbtree: Region tracker tree root
+ *
+ * This will free all the regions within the region tracker
  */
 void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
 
@@ -1119,11 +1173,14 @@ struct kbase_va_region *kbase_find_region_enclosing_address(
 		struct rb_root *rbtree, u64 gpu_addr);
 
 /**
- * Check that a pointer is actually a valid region.
+ * kbase_region_tracker_find_region_base_address - Check that a pointer is
+ *                                                 actually a valid region.
  * @kctx: kbase context containing the region
  * @gpu_addr: pointer to check
  *
  * Must be called with context lock held.
+ *
+ * Return: pointer to the valid region on success, NULL otherwise
  */
 struct kbase_va_region *kbase_region_tracker_find_region_base_address(
 		struct kbase_context *kctx, u64 gpu_addr);
@@ -1182,7 +1239,8 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx);
 int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size);
 
 /**
- * Register region and map it on the GPU.
+ * kbase_gpu_mmap - Register region and map it on the GPU.
+ *
  * @kctx: kbase context containing the region
  * @reg: the region to add
  * @addr: the address to insert the region at
@@ -1191,17 +1249,22 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size
  * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
  *
  * Call kbase_add_va_region() and map the region on the GPU.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 		   u64 addr, size_t nr_pages, size_t align,
 		   enum kbase_caller_mmu_sync_info mmu_sync_info);
 
 /**
- * Remove the region from the GPU and unregister it.
+ * kbase_gpu_munmap - Remove the region from the GPU and unregister it.
+ *
  * @kctx:  KBase context
  * @reg:   The region to remove
  *
  * Must be called with context lock held.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
 
@@ -1209,13 +1272,13 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg);
  * kbase_mmu_update - Configure an address space on the GPU to the specified
  *                    MMU tables
  *
- * The caller has the following locking conditions:
- * - It must hold kbase_device->mmu_hw_mutex
- * - It must hold the hwaccess_lock
- *
  * @kbdev: Kbase device structure
  * @mmut:  The set of MMU tables to be configured on the address space
  * @as_nr: The address space to be configured
+ *
+ * The caller has the following locking conditions:
+ * - It must hold kbase_device->mmu_hw_mutex
+ * - It must hold the hwaccess_lock
  */
 void kbase_mmu_update(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 		int as_nr);
@@ -1251,6 +1314,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
 /**
  * kbase_mmu_dump() - Dump the MMU tables to a buffer.
  *
+ * @kctx:        The kbase context to dump
+ * @nr_pages:    The number of pages to allocate for the buffer.
+ *
  * This function allocates a buffer (of @c nr_pages pages) to hold a dump
  * of the MMU tables and fills it. If the buffer is too small
  * then the return value will be NULL.
@@ -1260,9 +1326,6 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
  * The buffer returned should be freed with @ref vfree when it is no longer
  * required.
  *
- * @kctx:        The kbase context to dump
- * @nr_pages:    The number of pages to allocate for the buffer.
- *
  * Return: The address of the buffer containing the MMU dump or NULL on error
  * (including if the @c nr_pages is too small)
  */
@@ -1292,11 +1355,11 @@ void kbase_os_mem_map_unlock(struct kbase_context *kctx);
  * kbasep_os_process_page_usage_update() - Update the memory allocation
  *                                         counters for the current process.
  *
- * OS specific call to updates the current memory allocation counters
- * for the current process with the supplied delta.
- *
  * @kctx:  The kbase context
  * @pages: The desired delta to apply to the memory usage counters.
+ *
+ * OS specific call to updates the current memory allocation counters
+ * for the current process with the supplied delta.
  */
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
@@ -1305,11 +1368,11 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages);
  * kbase_process_page_usage_inc() - Add to the memory allocation counters for
  *                                  the current process
  *
- * OS specific call to add to the current memory allocation counters for
- * the current process by the supplied amount.
- *
  * @kctx:  The kernel base context used for the allocation.
  * @pages: The desired delta to apply to the memory usage counters.
+ *
+ * OS specific call to add to the current memory allocation counters for
+ * the current process by the supplied amount.
  */
 
 static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int pages)
@@ -1321,11 +1384,11 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, int
  * kbase_process_page_usage_dec() - Subtract from the memory allocation
  *                                  counters for the current process.
  *
- * OS specific call to subtract from the current memory allocation counters
- * for the current process by the supplied amount.
- *
  * @kctx:  The kernel base context used for the allocation.
  * @pages: The desired delta to apply to the memory usage counters.
+ *
+ * OS specific call to subtract from the current memory allocation counters
+ * for the current process by the supplied amount.
  */
 
 static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int pages)
@@ -1337,16 +1400,16 @@ static inline void kbase_process_page_usage_dec(struct kbase_context *kctx, int
  * kbasep_find_enclosing_cpu_mapping_offset() - Find the offset of the CPU
  * mapping of a memory allocation containing a given address range
  *
- * Searches for a CPU mapping of any part of any region that fully encloses the
- * CPU virtual address range specified by @uaddr and @size. Returns a failure
- * indication if only part of the address range lies within a CPU mapping.
- *
  * @kctx:      The kernel base context used for the allocation.
  * @uaddr:     Start of the CPU virtual address range.
  * @size:      Size of the CPU virtual address range (in bytes).
  * @offset:    The offset from the start of the allocation to the specified CPU
  *             virtual address.
  *
+ * Searches for a CPU mapping of any part of any region that fully encloses the
+ * CPU virtual address range specified by @uaddr and @size. Returns a failure
+ * indication if only part of the address range lies within a CPU mapping.
+ *
  * Return: 0 if offset was obtained successfully. Error code otherwise.
  */
 int kbasep_find_enclosing_cpu_mapping_offset(
@@ -1358,13 +1421,6 @@ int kbasep_find_enclosing_cpu_mapping_offset(
  * the start of GPU virtual memory region which encloses @gpu_addr for the
  * @size length in bytes
  *
- * Searches for the memory region in GPU virtual memory space which contains
- * the region defined by the @gpu_addr and @size, where @gpu_addr is the
- * beginning and @size the length in bytes of the provided region. If found,
- * the location of the start address of the GPU virtual memory region is
- * passed in @start pointer and the location of the offset of the region into
- * the GPU virtual memory region is passed in @offset pointer.
- *
  * @kctx:	The kernel base context within which the memory is searched.
  * @gpu_addr:	GPU virtual address for which the region is sought; defines
  *              the beginning of the provided region.
@@ -1374,6 +1430,15 @@ int kbasep_find_enclosing_cpu_mapping_offset(
  *              the found GPU virtual memory region is.
  * @offset:     Pointer to the location where the offset of @gpu_addr into
  *              the found GPU virtual memory region is.
+ *
+ * Searches for the memory region in GPU virtual memory space which contains
+ * the region defined by the @gpu_addr and @size, where @gpu_addr is the
+ * beginning and @size the length in bytes of the provided region. If found,
+ * the location of the start address of the GPU virtual memory region is
+ * passed in @start pointer and the location of the offset of the region into
+ * the GPU virtual memory region is passed in @offset pointer.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 int kbasep_find_enclosing_gpu_mapping_start_and_offset(
 		struct kbase_context *kctx,
@@ -1452,11 +1517,11 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 /**
  * kbase_free_phy_pages_helper() - Free physical pages.
  *
- * Frees \a nr_pages and updates the alloc object.
- *
  * @alloc:            allocation object to free pages from
  * @nr_pages_to_free: number of physical pages to free
  *
+ * Free @nr_pages_to_free pages and updates the alloc object.
+ *
  * Return: 0 on success, otherwise a negative error code
  */
 int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pages_to_free);
@@ -1553,7 +1618,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx);
  * kbase_jit_init - Initialize the JIT memory pool management
  * @kctx: kbase context
  *
- * Returns zero on success or negative error number on failure.
+ * Return: zero on success or negative error number on failure.
  */
 int kbase_jit_init(struct kbase_context *kctx);
 
@@ -1791,11 +1856,11 @@ static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx,
 /**
  * kbase_has_exec_va_zone - EXEC_VA zone predicate
  *
+ * @kctx: kbase context
+ *
  * Determine whether an EXEC_VA zone has been created for the GPU address space
  * of the given kbase context.
  *
- * @kctx: kbase context
- *
  * Return: True if the kbase context has an EXEC_VA zone.
  */
 bool kbase_has_exec_va_zone(struct kbase_context *kctx);
@@ -1846,7 +1911,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
  * kbase_sticky_resource_init - Initialize sticky resource management.
  * @kctx: kbase context
  *
- * Returns zero on success or negative error number on failure.
+ * Return: zero on success or negative error number on failure.
  */
 int kbase_sticky_resource_init(struct kbase_context *kctx);
 
@@ -1968,7 +2033,7 @@ static inline void kbase_unlink_event_mem_page(struct kbase_context *kctx,
  *         manage the shared interface segment of MCU firmware address space.
  * @kbdev: Pointer to the kbase device
  *
- * Returns zero on success or negative error number on failure.
+ * Return: zero on success or negative error number on failure.
  */
 int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev);
 
@@ -1987,7 +2052,7 @@ void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev);
  *
  * Map a dma-buf on the GPU. The mappings are reference counted.
  *
- * Returns 0 on success, or a negative error code.
+ * Return: 0 on success, or a negative error code.
  */
 int kbase_mem_umm_map(struct kbase_context *kctx,
 		struct kbase_va_region *reg);
@@ -2007,7 +2072,7 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
  * @alloc must be a valid physical allocation of type
  * KBASE_MEM_TYPE_IMPORTED_UMM that was previously mapped by
  * kbase_mem_umm_map(). The dma-buf attachment referenced by @alloc will
- * release it's mapping reference, and if the refcount reaches 0, also be be
+ * release it's mapping reference, and if the refcount reaches 0, also be
  * unmapped, regardless of the value of @reg.
  */
 void kbase_mem_umm_unmap(struct kbase_context *kctx,
@@ -2133,6 +2198,8 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
  * from the forked child process using the mali device file fd inherited from
  * the parent process.
+ *
+ * Return: true if allocation is allowed.
  */
 static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
 {
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index d252373..ae8c4b0 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -291,10 +291,9 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
 	 */
 }
 
-struct kbase_va_region *
-kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
-		u64 extension, u64 *flags, u64 *gpu_va,
-		enum kbase_caller_mmu_sync_info mmu_sync_info)
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va,
+					enum kbase_caller_mmu_sync_info mmu_sync_info)
 {
 	int zone;
 	struct kbase_va_region *reg;
@@ -311,19 +310,21 @@ kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
 		va_pages, commit_pages, extension, *flags);
 
 #if MALI_USE_CSF
-	*gpu_va = 0; /* return 0 on failure */
+	if (!(*flags & BASE_MEM_FIXED))
+		*gpu_va = 0; /* return 0 on failure */
 #else
 	if (!(*flags & BASE_MEM_FLAG_MAP_FIXED))
 		*gpu_va = 0; /* return 0 on failure */
+#endif
 	else
-		dev_err(dev,
+		dev_dbg(dev,
 			"Keeping requested GPU VA of 0x%llx\n",
 			(unsigned long long)*gpu_va);
-#endif
 
 	if (!kbase_check_alloc_flags(*flags)) {
 		dev_warn(dev,
-				"kbase_mem_alloc called with bad flags (%llx)",
+				"%s called with bad flags (%llx)",
+				__func__,
 				(unsigned long long)*flags);
 		goto bad_flags;
 	}
@@ -345,7 +346,8 @@ kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
 	}
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
 			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
-		dev_warn(dev, "kbase_mem_alloc call required coherent mem when unavailable");
+		dev_warn(dev, "%s call required coherent mem when unavailable",
+			__func__);
 		goto bad_flags;
 	}
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
@@ -368,7 +370,20 @@ kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
 	if (*flags & BASE_MEM_SAME_VA) {
 		rbtree = &kctx->reg_rbtree_same;
 		zone = KBASE_REG_ZONE_SAME_VA;
-	} else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
+	}
+#if MALI_USE_CSF
+	/* fixed va_zone always exists */
+	else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) {
+		if (*flags & BASE_MEM_PROT_GPU_EX) {
+			rbtree = &kctx->reg_rbtree_exec_fixed;
+			zone = KBASE_REG_ZONE_EXEC_FIXED_VA;
+		} else {
+			rbtree = &kctx->reg_rbtree_fixed;
+			zone = KBASE_REG_ZONE_FIXED_VA;
+		}
+	}
+#endif
+	else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
 		rbtree = &kctx->reg_rbtree_exec;
 		zone = KBASE_REG_ZONE_EXEC_VA;
 	} else {
@@ -492,6 +507,14 @@ kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	kbase_gpu_vm_unlock(kctx);
+
+#if MALI_USE_CSF
+	if (*flags & BASE_MEM_FIXABLE)
+		atomic64_inc(&kctx->num_fixable_allocs);
+	else if (*flags & BASE_MEM_FIXED)
+		atomic64_inc(&kctx->num_fixed_allocs);
+#endif
+
 	return reg;
 
 no_mmap:
@@ -602,6 +625,13 @@ int kbase_mem_query(struct kbase_context *kctx,
 #if MALI_USE_CSF
 		if (KBASE_REG_CSF_EVENT & reg->flags)
 			*out |= BASE_MEM_CSF_EVENT;
+		if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) ||
+		    ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+			if (KBASE_REG_FIXED_ADDRESS & reg->flags)
+				*out |= BASE_MEM_FIXED;
+			else
+				*out |= BASE_MEM_FIXABLE;
+		}
 #endif
 		if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
 			*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
@@ -1651,8 +1681,17 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 	faulted_pages = get_user_pages(address, *va_pages,
 			write ? FOLL_WRITE : 0, pages, NULL);
 #else
-	faulted_pages = pin_user_pages(address, *va_pages,
-				       write ? FOLL_WRITE : 0, pages, NULL);
+	/* pin_user_pages function cannot be called with pages param NULL.
+	 * get_user_pages function will be used instead because it is safe to be
+	 * used with NULL pages param as long as it doesn't have FOLL_GET flag.
+	 */
+	if (pages != NULL) {
+		faulted_pages =
+			pin_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL);
+	} else {
+		faulted_pages =
+			get_user_pages(address, *va_pages, write ? FOLL_WRITE : 0, pages, NULL);
+	}
 #endif
 
 	up_read(kbase_mem_get_process_mmap_lock());
@@ -1745,7 +1784,8 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 
 	if (!(*flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR))) {
 		dev_warn(kctx->kbdev->dev,
-				"kbase_mem_alias called with bad flags (%llx)",
+				"%s called with bad flags (%llx)",
+				__func__,
 				(unsigned long long)*flags);
 		goto bad_flags;
 	}
@@ -1968,7 +2008,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 
 	if (!kbase_check_import_flags(*flags)) {
 		dev_warn(kctx->kbdev->dev,
-				"kbase_mem_import called with bad flags (%llx)",
+				"%s called with bad flags (%llx)",
+				__func__,
 				(unsigned long long)*flags);
 		goto bad_flags;
 	}
@@ -1981,7 +2022,8 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED) != 0 &&
 			!kbase_device_is_cpu_coherent(kctx->kbdev)) {
 		dev_warn(kctx->kbdev->dev,
-				"kbase_mem_import call required coherent mem when unavailable");
+				"%s call required coherent mem when unavailable",
+				__func__);
 		goto bad_flags;
 	}
 	if ((*flags & BASE_MEM_COHERENT_SYSTEM) != 0 &&
@@ -2286,7 +2328,7 @@ out_unlock:
 }
 
 int kbase_mem_shrink(struct kbase_context *const kctx,
-		struct kbase_va_region *const reg, u64 const new_pages)
+		struct kbase_va_region *const reg, u64 new_pages)
 {
 	u64 delta, old_pages;
 	int err;
@@ -2316,6 +2358,18 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
 		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 		if (reg->cpu_alloc != reg->gpu_alloc)
 			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
+#ifdef CONFIG_MALI_2MB_ALLOC
+		if (kbase_reg_current_backed_size(reg) > new_pages) {
+			old_pages = new_pages;
+			new_pages = kbase_reg_current_backed_size(reg);
+
+			/* Update GPU mapping. */
+			err = kbase_mem_grow_gpu_mapping(kctx, reg,
+					new_pages, old_pages, CALLER_MMU_ASYNC);
+		}
+#else
+		WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
+#endif
 	}
 
 	return err;
@@ -2599,7 +2653,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 	size_t size;
 	int err = 0;
 
-	dev_dbg(kctx->kbdev->dev, "in kbase_mmu_dump_mmap\n");
+	dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
 	size = (vma->vm_end - vma->vm_start);
 	nr_pages = size >> PAGE_SHIFT;
 
@@ -2644,7 +2698,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 	*kmap_addr = kaddr;
 	*reg = new_reg;
 
-	dev_dbg(kctx->kbdev->dev, "kbase_mmu_dump_mmap done\n");
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
 	return 0;
 
 out_no_alloc:
@@ -2684,7 +2738,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 
 	*aligned_offset = 0;
 
-	dev_dbg(kctx->kbdev->dev, "in kbasep_reg_mmap\n");
+	dev_dbg(kctx->kbdev->dev, "%s\n", __func__);
 
 	/* SAME_VA stuff, fetch the right region */
 	reg = kctx->pending_regions[cookie];
@@ -2741,7 +2795,7 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 	vma->vm_pgoff = reg->start_pfn - ((*aligned_offset)>>PAGE_SHIFT);
 out:
 	*regm = reg;
-	dev_dbg(kctx->kbdev->dev, "kbasep_reg_mmap done\n");
+	dev_dbg(kctx->kbdev->dev, "%s done\n", __func__);
 
 	return err;
 }
@@ -2871,8 +2925,7 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 					dev_warn(dev, "mmap aliased: invalid params!\n");
 					goto out_unlock;
 				}
-			}
-			else if (reg->cpu_alloc->nents <
+			} else if (reg->cpu_alloc->nents <
 					(vma->vm_pgoff - reg->start_pfn + nr_pages)) {
 				/* limit what we map to the amount currently backed */
 				if ((vma->vm_pgoff - reg->start_pfn) >= reg->cpu_alloc->nents)
@@ -3089,6 +3142,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
 		struct kbase_vmap_struct *map)
 {
 	void *addr = (void *)((uintptr_t)map->addr & PAGE_MASK);
+
 	vunmap(addr);
 
 	if (map->sync_needed)
@@ -3287,10 +3341,11 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
 	    (vma->vm_pgoff != queue->db_file_offset))
 		return VM_FAULT_SIGBUS;
 
-	mutex_lock(&queue->kctx->csf.lock);
 	kbdev = queue->kctx->kbdev;
 	mgm_dev = kbdev->mgm_dev;
 
+	mutex_lock(&kbdev->csf.reg_lock);
+
 	/* Always map the doorbell page as uncached */
 	doorbell_pgprot = pgprot_device(vma->vm_page_prot);
 
@@ -3317,12 +3372,10 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
 #else
 	if (vmf->address == doorbell_cpu_addr) {
 #endif
-		mutex_lock(&kbdev->csf.reg_lock);
 		doorbell_page_pfn = get_queue_doorbell_pfn(kbdev, queue);
 		ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
 			KBASE_MEM_GROUP_CSF_IO, vma, doorbell_cpu_addr,
 			doorbell_page_pfn, doorbell_pgprot);
-		mutex_unlock(&kbdev->csf.reg_lock);
 	} else {
 		/* Map the Input page */
 		input_cpu_addr = doorbell_cpu_addr + PAGE_SIZE;
@@ -3342,7 +3395,7 @@ static vm_fault_t kbase_csf_user_io_pages_vm_fault(struct vm_fault *vmf)
 	}
 
 exit:
-	mutex_unlock(&queue->kctx->csf.lock);
+	mutex_unlock(&kbdev->csf.reg_lock);
 	return ret;
 }
 
@@ -3452,6 +3505,7 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
 	size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
 	vm_fault_t ret = VM_FAULT_SIGBUS;
+	unsigned long flags;
 
 	/* Few sanity checks up front */
 	if (WARN_ON(nr_pages != 1) ||
@@ -3460,8 +3514,8 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 			PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE)))
 		return VM_FAULT_SIGBUS;
 
-	mutex_lock(&kbdev->pm.lock);
-
+	mutex_lock(&kbdev->csf.reg_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	/* Don't map in the actual register page if GPU is powered down.
 	 * Always map in the dummy page in no mali builds.
 	 */
@@ -3471,13 +3525,13 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 	if (!kbdev->pm.backend.gpu_powered)
 		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
 #endif
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
 						   KBASE_MEM_GROUP_CSF_FW, vma,
 						   vma->vm_start, pfn,
 						   vma->vm_page_prot);
-
-	mutex_unlock(&kbdev->pm.lock);
+	mutex_unlock(&kbdev->csf.reg_lock);
 
 	return ret;
 }
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index f123d17..1f6877a 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010, 2012-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Base kernel memory APIs, Linux implementation.
+ * DOC: Base kernel memory APIs, Linux implementation.
  */
 
 #ifndef _KBASE_MEM_LINUX_H_
@@ -44,15 +44,15 @@ struct kbase_hwc_dma_mapping {
  * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
  *                properties for the new allocation.
  * @gpu_va:       Start address of the memory region which was allocated from GPU
- *                virtual address space.
+ *                virtual address space. If the BASE_MEM_FLAG_MAP_FIXED is set
+ *                then this parameter shall be provided by the caller.
  * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
  *
  * Return: 0 on success or error code
  */
-struct kbase_va_region *
-kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
-		u64 extension, u64 *flags, u64 *gpu_va,
-		enum kbase_caller_mmu_sync_info mmu_sync_info);
+struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va,
+					enum kbase_caller_mmu_sync_info mmu_sync_info);
 
 /**
  * kbase_mem_query - Query properties of a GPU memory region
@@ -258,7 +258,7 @@ struct kbase_vmap_struct {
  * The checks are also there to help catch access errors on memory where
  * security is not a concern: imported memory that is always RW, and memory
  * that was allocated and owned by the process attached to @kctx. In this case,
- * it helps to identify memory that was was mapped with the wrong access type.
+ * it helps to identify memory that was mapped with the wrong access type.
  *
  * Note: KBASE_REG_GPU_{RD,WR} flags are currently supported for legacy cases
  * where either the security of memory is solely dependent on those flags, or
@@ -426,12 +426,12 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
 /**
  * kbase_get_cache_line_alignment - Return cache line alignment
  *
+ * @kbdev: Device pointer.
+ *
  * Helper function to return the maximum cache line alignment considering
  * both CPU and GPU cache sizes.
  *
  * Return: CPU and GPU cache line alignment, in bytes.
- *
- * @kbdev: Device pointer.
  */
 u32 kbase_get_cache_line_alignment(struct kbase_device *kbdev);
 
diff --git a/mali_kbase/mali_kbase_mem_lowlevel.h b/mali_kbase/mali_kbase_mem_lowlevel.h
index 3f260bf..5a1bb16 100644
--- a/mali_kbase/mali_kbase_mem_lowlevel.h
+++ b/mali_kbase/mali_kbase_mem_lowlevel.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2014, 2016-2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -48,6 +48,8 @@ struct tagged_addr { phys_addr_t tagged_addr; };
 #define HUGE_HEAD    (1u << 1)
 #define FROM_PARTIAL (1u << 2)
 
+#define NUM_4K_PAGES_IN_2MB_PAGE (SZ_2M / SZ_4K)
+
 /*
  * Note: if macro for converting physical address to page is not defined
  * in the kernel itself, it is defined hereby. This is to avoid build errors
@@ -158,4 +160,20 @@ static inline bool is_partial(struct tagged_addr t)
 	return t.tagged_addr & FROM_PARTIAL;
 }
 
+/**
+ * index_in_large_page() - Get index of a 4KB page within a 2MB page which
+ *                         wasn't split to be used partially.
+ *
+ * @t:  Tagged physical address of the physical 4KB page that lies within
+ *      the large (or 2 MB) physical page.
+ *
+ * Return: Index of the 4KB page within a 2MB page
+ */
+static inline unsigned int index_in_large_page(struct tagged_addr t)
+{
+	WARN_ON(!is_huge(t));
+
+	return (PFN_DOWN(as_phys_addr_t(t)) & (NUM_4K_PAGES_IN_2MB_PAGE - 1));
+}
+
 #endif /* _KBASE_LOWLEVEL_H */
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index a11da82..4103bd1 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -126,6 +126,7 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
 		struct page *p)
 {
 	struct device *dev = pool->kbdev->dev;
+
 	dma_sync_single_for_device(dev, kbase_dma_addr(p),
 			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
 }
@@ -548,6 +549,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
 	while (nr_from_pool--) {
 		int j;
+
 		p = kbase_mem_pool_remove_locked(pool);
 		if (pool->order) {
 			pages[i++] = as_tagged_tag(page_to_phys(p),
diff --git a/mali_kbase/mali_kbase_mem_pool_group.h b/mali_kbase/mali_kbase_mem_pool_group.h
index 38fd4ca..c50ffdb 100644
--- a/mali_kbase/mali_kbase_mem_pool_group.h
+++ b/mali_kbase/mali_kbase_mem_pool_group.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,16 +25,40 @@
 #include <mali_kbase_defs.h>
 
 /**
+ * kbase_mem_pool_group_select() - Select the memory pool to use.
+ *
+ * @kbdev:         Device pointer.
+ * @mem_group_id:  Physical memory group ID to use.
+ * @is_small_page: Flag used to select between the small and
+ *                 large memory pool.
+ *
+ * Return: A pointer to the selected memory pool.
+ */
+static inline struct kbase_mem_pool *kbase_mem_pool_group_select(
+	struct kbase_device *kbdev, u32 mem_group_id, bool is_small_page)
+{
+	if (WARN_ON(unlikely(kbdev == NULL)))
+		return NULL;
+
+	WARN_ON(mem_group_id > BASE_MEM_GROUP_COUNT);
+
+	if (is_small_page)
+		return &kbdev->mem_pools.small[mem_group_id];
+
+	return &kbdev->mem_pools.large[mem_group_id];
+}
+
+/**
  * kbase_mem_pool_group_config_init - Set the initial configuration for a
  *                                    set of memory pools
  *
+ * @configs:  Initial configuration for the set of memory pools
+ * @max_size: Maximum number of free 4 KiB pages each pool can hold
+ *
  * This function sets the initial configuration for every memory pool so that
  * the maximum amount of free memory that each pool can hold is identical.
  * The equivalent number of 2 MiB pages is calculated automatically for the
  * purpose of configuring the large page pools.
- *
- * @configs:  Initial configuration for the set of memory pools
- * @max_size: Maximum number of free 4 KiB pages each pool can hold
  */
 void kbase_mem_pool_group_config_set_max_size(
 	struct kbase_mem_pool_group_config *configs, size_t max_size);
@@ -42,6 +66,12 @@ void kbase_mem_pool_group_config_set_max_size(
 /**
  * kbase_mem_pool_group_init - Initialize a set of memory pools
  *
+ * @mem_pools:  Set of memory pools to initialize
+ * @kbdev:      Kbase device where memory is used
+ * @configs:    Initial configuration for the set of memory pools
+ * @next_pools: Set of memory pools from which to allocate memory if there
+ *              is no free memory in one of the @mem_pools
+ *
  * Initializes a complete set of physical memory pools. Memory pools are used to
  * allow efficient reallocation of previously-freed physical pages. A pair of
  * memory pools is initialized for each physical memory group: one for 4 KiB
@@ -54,12 +84,6 @@ void kbase_mem_pool_group_config_set_max_size(
  * is full in @mem_pools. Pages are zeroed before they spill over to another
  * pool, to prevent leaking information between applications.
  *
- * @mem_pools:  Set of memory pools to initialize
- * @kbdev:      Kbase device where memory is used
- * @configs:    Initial configuration for the set of memory pools
- * @next_pools: Set of memory pools from which to allocate memory if there
- *              is no free memory in one of the @mem_pools
- *
  * Return: 0 on success, otherwise a negative error code
  */
 int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
@@ -70,21 +94,21 @@ int kbase_mem_pool_group_init(struct kbase_mem_pool_group *mem_pools,
 /**
  * kbase_mem_pool_group_term - Mark a set of memory pools as dying
  *
+ * @mem_pools: Set of memory pools to mark
+ *
  * Marks a complete set of physical memory pools previously initialized by
  * @kbase_mem_pool_group_init as dying. This will cause any ongoing allocation
  * operations (eg growing on page fault) to be terminated.
- *
- * @mem_pools: Set of memory pools to mark
  */
 void kbase_mem_pool_group_mark_dying(struct kbase_mem_pool_group *mem_pools);
 
 /**
  * kbase_mem_pool_group_term - Terminate a set of memory pools
  *
+ * @mem_pools: Set of memory pools to terminate
+ *
  * Terminates a complete set of physical memory pools previously initialized by
  * @kbase_mem_pool_group_init.
- *
- * @mem_pools: Set of memory pools to terminate
  */
 void kbase_mem_pool_group_term(struct kbase_mem_pool_group *mem_pools);
 
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs.c b/mali_kbase/mali_kbase_mem_profile_debugfs.c
index 7e77963..92ab1b8 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs.c
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2012-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2017, 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,14 +24,14 @@
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 
 /**
- * Show callback for the @c mem_profile debugfs file.
- *
- * This function is called to get the contents of the @c mem_profile debugfs
- * file. This is a report of current memory usage and distribution in userspace.
+ * kbasep_mem_profile_seq_show - Show callback for the @c mem_profile debugfs file.
  *
  * @sfile: The debugfs entry
  * @data:  Data associated with the entry
  *
+ * This function is called to get the contents of the @c mem_profile debugfs
+ * file. This is a report of current memory usage and distribution in userspace.
+ *
  * Return: 0 if it successfully prints data in debugfs entry file, non-zero
  * otherwise
  */
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs.h b/mali_kbase/mali_kbase_mem_profile_debugfs.h
index 093a65e..c30fca6 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs.h
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Header file for mem profiles entries in debugfs
+ * DOC: Header file for mem profiles entries in debugfs
  *
  */
 
@@ -31,13 +31,16 @@
 #include <linux/seq_file.h>
 
 /**
- * Remove entry from Mali memory profile debugfs
+ * kbasep_mem_profile_debugfs_remove - Remove entry from Mali memory profile debugfs
+ *
  * @kctx: The context whose debugfs file @p data should be removed from
  */
 void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
 
 /**
- * Insert @p data to the debugfs file so it can be read by userspace
+ * kbasep_mem_profile_debugfs_insert - Insert @p data to the debugfs file
+ *                                     so it can be read by userspace
+ *
  * @kctx: The context whose debugfs file @p data should be inserted to
  * @data: A NULL-terminated string to be inserted to the debugfs file,
  *             without the trailing new line character
@@ -49,8 +52,8 @@ void kbasep_mem_profile_debugfs_remove(struct kbase_context *kctx);
  * If the debugfs entry corresponding to the @p kctx doesn't exist,
  * an attempt will be made to create it.
  *
- * @return 0 if @p data inserted correctly
- *         -EAGAIN in case of error
+ * Return: 0 if @p data inserted correctly, -EAGAIN in case of error
+ *
  * @post @ref mem_profile_initialized will be set to @c true
  *       the first time this function succeeds.
  */
diff --git a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h b/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
index 1210ed5..c2fb3f5 100644
--- a/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
+++ b/mali_kbase/mali_kbase_mem_profile_debugfs_buf_size.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,15 +20,15 @@
  */
 
 /**
- * Header file for the size of the buffer to accumulate the histogram report text in
+ * DOC: Header file for the size of the buffer to accumulate the histogram report text in
  */
 
 #ifndef _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
 #define _KBASE_MEM_PROFILE_DEBUGFS_BUF_SIZE_H_
 
 /**
- * The size of the buffer to accumulate the histogram report text in
- * @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
+ * KBASE_MEM_PROFILE_MAX_BUF_SIZE - The size of the buffer to accumulate the histogram report text
+ *                                  in @see @ref CCTXP_HIST_BUF_SIZE_MAX_LENGTH_REPORT
  */
 #define KBASE_MEM_PROFILE_MAX_BUF_SIZE ((size_t)(64 + ((80 + (56 * 64)) * 54) + 56))
 
diff --git a/mali_kbase/mali_kbase_mipe_gen_header.h b/mali_kbase/mali_kbase_mipe_gen_header.h
index edd44bf..951079d 100644
--- a/mali_kbase/mali_kbase_mipe_gen_header.h
+++ b/mali_kbase/mali_kbase_mipe_gen_header.h
@@ -98,7 +98,7 @@
  *
  * Where the first argument is tracepoints name, the second
  * argument is a short tracepoint description, the third argument
- * argument types (see MIPE documentation), and the fourth argument
+ * types (see MIPE documentation), and the fourth argument
  * is comma separated argument names.
  */
 #if !defined(MIPE_HEADER_TRACEPOINT_LIST)
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
index 3e58a7b..90406b2 100644
--- a/mali_kbase/mali_kbase_pbha.c
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -102,17 +102,28 @@ static bool write_setting_valid(unsigned int id, unsigned int write_setting)
 	return false;
 }
 
-static bool settings_valid(unsigned int id, unsigned int read_setting,
-			   unsigned int write_setting)
+/* Private structure to be returned as setting validity status */
+struct settings_status {
+	/* specifies whether id and either one of settings is valid */
+	bool overall;
+	/* specifies whether read setting is valid */
+	bool read;
+	/* specifies whether write setting is valid*/
+	bool write;
+};
+
+static struct settings_status settings_valid(unsigned int id, unsigned int read_setting,
+					     unsigned int write_setting)
 {
-	bool settings_valid = false;
+	struct settings_status valid = { .overall = (id < SYSC_ALLOC_COUNT * sizeof(u32)) };
 
-	if (id < SYSC_ALLOC_COUNT * sizeof(u32)) {
-		settings_valid = read_setting_valid(id, read_setting) &&
-				 write_setting_valid(id, write_setting);
+	if (valid.overall) {
+		valid.read = read_setting_valid(id, read_setting);
+		valid.write = write_setting_valid(id, write_setting);
+		valid.overall = valid.read || valid.write;
 	}
 
-	return settings_valid;
+	return valid;
 }
 
 bool kbasep_pbha_supported(struct kbase_device *kbdev)
@@ -127,11 +138,12 @@ int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime,
 			       unsigned int id, unsigned int read_setting,
 			       unsigned int write_setting)
 {
-	bool const valid = settings_valid(id, read_setting, write_setting);
+	struct settings_status const valid = settings_valid(id, read_setting, write_setting);
 
-	if (valid) {
+	if (valid.overall) {
 		unsigned int const sysc_alloc_num = id / sizeof(u32);
 		u32 modified_reg;
+
 		if (runtime) {
 			int i;
 
@@ -147,41 +159,50 @@ int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime,
 
 		switch (id % sizeof(u32)) {
 		case 0:
-			modified_reg = SYSC_ALLOC_R_SYSC_ALLOC0_SET(
-				modified_reg, read_setting);
-			modified_reg = SYSC_ALLOC_W_SYSC_ALLOC0_SET(
-				modified_reg, write_setting);
+			modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC0_SET(modified_reg,
+										 read_setting) :
+						    modified_reg;
+			modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC0_SET(modified_reg,
+										  write_setting) :
+						     modified_reg;
 			break;
 		case 1:
-			modified_reg = SYSC_ALLOC_R_SYSC_ALLOC1_SET(
-				modified_reg, read_setting);
-			modified_reg = SYSC_ALLOC_W_SYSC_ALLOC1_SET(
-				modified_reg, write_setting);
+			modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC1_SET(modified_reg,
+										 read_setting) :
+						    modified_reg;
+			modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC1_SET(modified_reg,
+										  write_setting) :
+						     modified_reg;
 			break;
 		case 2:
-			modified_reg = SYSC_ALLOC_R_SYSC_ALLOC2_SET(
-				modified_reg, read_setting);
-			modified_reg = SYSC_ALLOC_W_SYSC_ALLOC2_SET(
-				modified_reg, write_setting);
+			modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC2_SET(modified_reg,
+										 read_setting) :
+						    modified_reg;
+			modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC2_SET(modified_reg,
+										  write_setting) :
+						     modified_reg;
 			break;
 		case 3:
-			modified_reg = SYSC_ALLOC_R_SYSC_ALLOC3_SET(
-				modified_reg, read_setting);
-			modified_reg = SYSC_ALLOC_W_SYSC_ALLOC3_SET(
-				modified_reg, write_setting);
+			modified_reg = valid.read ? SYSC_ALLOC_R_SYSC_ALLOC3_SET(modified_reg,
+										 read_setting) :
+						    modified_reg;
+			modified_reg = valid.write ? SYSC_ALLOC_W_SYSC_ALLOC3_SET(modified_reg,
+										  write_setting) :
+						     modified_reg;
 			break;
 		}
 
 		kbdev->sysc_alloc[sysc_alloc_num] = modified_reg;
 	}
 
-	return valid ? 0 : -EINVAL;
+	return valid.overall ? 0 : -EINVAL;
 }
 
 void kbase_pbha_write_settings(struct kbase_device *kbdev)
 {
 	if (kbasep_pbha_supported(kbdev)) {
 		int i;
+
 		for (i = 0; i < SYSC_ALLOC_COUNT; ++i)
 			kbase_reg_write(kbdev, GPU_CONTROL_REG(SYSC_ALLOC(i)),
 					kbdev->sysc_alloc[i]);
diff --git a/mali_kbase/mali_kbase_pbha.h b/mali_kbase/mali_kbase_pbha.h
index 6861773..7963219 100644
--- a/mali_kbase/mali_kbase_pbha.h
+++ b/mali_kbase/mali_kbase_pbha.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,11 +28,11 @@
  * kbasep_pbha_supported - check whether PBHA registers are
  * available
  *
+ * @kbdev: Device pointer
+ *
  * Should only be used in mali_kbase_pbha* files - thus the
  * kbase[p] prefix.
  *
- * @kbdev: Device pointer
- *
  * Return: True if pbha is supported, false otherwise
  */
 bool kbasep_pbha_supported(struct kbase_device *kbdev);
@@ -57,10 +57,10 @@ int kbase_pbha_record_settings(struct kbase_device *kbdev, bool runtime,
  * kbase_pbha_write_settings - write recorded PBHA settings to GPU
  * registers
  *
+ * @kbdev: Device pointer
+ *
  * Only valid to call this function when L2 is powered down, otherwise
  * this will not affect PBHA settings.
- *
- * @kbdev: Device pointer
  */
 void kbase_pbha_write_settings(struct kbase_device *kbdev);
 
diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h
index 730feea..4bb90a4 100644
--- a/mali_kbase/mali_kbase_pm.h
+++ b/mali_kbase/mali_kbase_pm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management API definitions
+ * DOC: Power management API definitions
  */
 
 #ifndef _KBASE_PM_H_
@@ -39,29 +39,32 @@
 #define PM_NO_RESET          0x04
 #endif
 
-/** Initialize the power management framework.
- *
- * Must be called before any other power management function
+/**
+ * kbase_pm_init - Initialize the power management framework.
  *
- * @param kbdev The kbase device structure for the device
+ * @kbdev: The kbase device structure for the device
  *              (must be a valid pointer)
  *
- * @return 0 if the power management framework was successfully initialized.
+ * Must be called before any other power management function
+ *
+ * Return: 0 if the power management framework was successfully initialized.
  */
 int kbase_pm_init(struct kbase_device *kbdev);
 
-/** Power up GPU after all modules have been initialized and interrupt handlers installed.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
+/**
+ * kbase_pm_powerup - Power up GPU after all modules have been initialized
+ *                    and interrupt handlers installed.
  *
- * @param flags     Flags to pass on to kbase_pm_init_hw
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
+ * @flags:     Flags to pass on to kbase_pm_init_hw
  *
- * @return 0 if powerup was successful.
+ * Return: 0 if powerup was successful.
  */
 int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
 
 /**
- * Halt the power management framework.
+ * kbase_pm_halt - Halt the power management framework.
+ *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * Should ensure that no new interrupts are generated,
@@ -71,16 +74,20 @@ int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags);
  */
 void kbase_pm_halt(struct kbase_device *kbdev);
 
-/** Terminate the power management framework.
+/**
+ * kbase_pm_term - Terminate the power management framework.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
  *
  * No power management functions may be called after this
  * (except @ref kbase_pm_init)
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_term(struct kbase_device *kbdev);
 
-/** Increment the count of active contexts.
+/**
+ * kbase_pm_context_active - Increment the count of active contexts.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
  *
  * This function should be called when a context is about to submit a job.
  * It informs the active power policy that the GPU is going to be in use shortly
@@ -94,8 +101,6 @@ void kbase_pm_term(struct kbase_device *kbdev);
  * @note a Suspend is only visible to Kernel threads; user-space threads in a
  * syscall cannot witness a suspend, because they are frozen before the suspend
  * begins.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_context_active(struct kbase_device *kbdev);
 
@@ -123,7 +128,11 @@ enum kbase_pm_suspend_handler {
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
 };
 
-/** Suspend 'safe' variant of kbase_pm_context_active()
+/**
+ * kbase_pm_context_active_handle_suspend - Suspend 'safe' variant of kbase_pm_context_active()
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
+ * @suspend_handler: The handler code for how to handle a suspend that might occur
  *
  * If a suspend is in progress, this allows for various different ways of
  * handling the suspend. Refer to @ref enum kbase_pm_suspend_handler for details.
@@ -133,20 +142,18 @@ enum kbase_pm_suspend_handler {
  * indicates a failure, the caller must abort whatever operation it was
  * attempting, and potentially queue it up for after the OS has resumed.
  *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
- * @param suspend_handler The handler code for how to handle a suspend that might occur
- * @return zero     Indicates success
- * @return non-zero Indicates failure due to the system being suspending/suspended.
+ * Return: 0 on success, non-zero othrewise.
  */
 int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev, enum kbase_pm_suspend_handler suspend_handler);
 
-/** Decrement the reference count of active contexts.
+/**
+ * kbase_pm_context_idle - Decrement the reference count of active contexts.
+ *
+ * @kbdev:     The kbase device structure for the device (must be a valid pointer)
  *
  * This function should be called when a context becomes idle.
  * After this call the GPU may be turned off by the power policy so the calling
  * code should ensure that it does not access the GPU's registers.
- *
- * @param kbdev     The kbase device structure for the device (must be a valid pointer)
  */
 void kbase_pm_context_idle(struct kbase_device *kbdev);
 
@@ -155,8 +162,9 @@ void kbase_pm_context_idle(struct kbase_device *kbdev);
  */
 
 /**
- * Suspend the GPU and prevent any further register accesses to it from Kernel
- * threads.
+ * kbase_pm_suspend - Suspend the GPU and prevent any further register accesses
+ *                    to it from Kernel threads.
+ *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * This is called in response to an OS suspend event, and calls into the various
@@ -171,8 +179,9 @@ void kbase_pm_context_idle(struct kbase_device *kbdev);
 int kbase_pm_suspend(struct kbase_device *kbdev);
 
 /**
- * Resume the GPU, allow register accesses to it, and resume running atoms on
- * the GPU.
+ * kbase_pm_resume - Resume the GPU, allow register accesses to it,
+ *                   and resume running atoms on the GPU.
+ *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
  *
  * This is called in response to an OS resume event, and calls into the various
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.c b/mali_kbase/mali_kbase_regs_history_debugfs.c
index 1e807d7..f8dec6b 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.c
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,7 @@
  * If resizing fails for any reason (e.g., could not allocate memory, invalid
  * buffer size) then the original buffer will be kept intact.
  *
- * @return 0 if the buffer was resized, failure otherwise
+ * Return: 0 if the buffer was resized, failure otherwise
  */
 static int kbase_io_history_resize(struct kbase_io_history *h, u16 new_size)
 {
@@ -164,7 +164,6 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
 		regs_history_size_set,
 		"%llu\n");
 
-
 /**
  * regs_history_show - show callback for the register access history file.
  *
@@ -173,8 +172,7 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
  *
  * This function is called to dump all recent accesses to the GPU registers.
  *
- * @return 0 if successfully prints data in debugfs entry file, failure
- * otherwise
+ * Return: 0 if successfully prints data in debugfs entry file, failure otherwise
  */
 static int regs_history_show(struct seq_file *sfile, void *data)
 {
@@ -214,7 +212,7 @@ out:
  * @in: &struct inode pointer
  * @file: &struct file pointer
  *
- * @return file descriptor
+ * Return: file descriptor
  */
 static int regs_history_open(struct inode *in, struct file *file)
 {
@@ -231,13 +229,13 @@ static const struct file_operations regs_history_fops = {
 
 void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_bool("regs_history_enabled", S_IRUGO | S_IWUSR,
+	debugfs_create_bool("regs_history_enabled", 0644,
 			kbdev->mali_debugfs_directory,
 			&kbdev->io_history.enabled);
-	debugfs_create_file("regs_history_size", S_IRUGO | S_IWUSR,
+	debugfs_create_file("regs_history_size", 0644,
 			kbdev->mali_debugfs_directory,
 			&kbdev->io_history, &regs_history_size_fops);
-	debugfs_create_file("regs_history", S_IRUGO,
+	debugfs_create_file("regs_history", 0444,
 			kbdev->mali_debugfs_directory, &kbdev->io_history,
 			&regs_history_fops);
 }
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.h b/mali_kbase/mali_kbase_regs_history_debugfs.h
index 1b4196d..044a4d3 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.h
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Header file for register access history support via debugfs
+ * DOC: Header file for register access history support via debugfs
  *
  * This interface is made available via /sys/kernel/debug/mali#/regs_history*.
  *
@@ -44,7 +44,7 @@ struct kbase_device;
  * @h: The register history to initialize
  * @n: The number of register accesses that the buffer could hold
  *
- * @return 0 if successfully initialized, failure otherwise
+ * Return: 0 if successfully initialized, failure otherwise
  */
 int kbase_io_history_init(struct kbase_io_history *h, u16 n);
 
@@ -71,7 +71,7 @@ void kbasep_regs_history_debugfs_init(struct kbase_device *kbdev);
 
 #else /* !defined(CONFIG_DEBUG_FS) || IS_ENABLED(CONFIG_MALI_NO_MALI) */
 
-#define kbase_io_history_init(...) ((int)0)
+#define kbase_io_history_init(...) (0)
 
 #define kbase_io_history_term CSTD_NOP
 
diff --git a/mali_kbase/mali_kbase_reset_gpu.h b/mali_kbase/mali_kbase_reset_gpu.h
index 7502fe8..ff631e9 100644
--- a/mali_kbase/mali_kbase_reset_gpu.h
+++ b/mali_kbase/mali_kbase_reset_gpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -144,9 +144,10 @@ void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev);
 void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);
 
 /**
- * Flags for kbase_prepare_to_reset_gpu
+ * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu
  */
-#define RESET_FLAGS_NONE ((unsigned int)0)
+#define RESET_FLAGS_NONE (0U)
+
 /* This reset should be treated as an unrecoverable error by HW counter logic */
 #define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0))
 
diff --git a/mali_kbase/mali_kbase_smc.h b/mali_kbase/mali_kbase_smc.h
index d0086db..91eb9ee 100644
--- a/mali_kbase/mali_kbase_smc.h
+++ b/mali_kbase/mali_kbase_smc.h
@@ -36,31 +36,31 @@
 
 
 /**
-  * kbase_invoke_smc_fid - Perform a secure monitor call
-  * @fid: The SMC function to call, see SMC Calling convention.
-  * @arg0: First argument to the SMC.
-  * @arg1: Second argument to the SMC.
-  * @arg2: Third argument to the SMC.
-  *
-  * See SMC Calling Convention for details.
-  *
-  * Return: the return value from the SMC.
-  */
+ * kbase_invoke_smc_fid - Perform a secure monitor call
+ * @fid: The SMC function to call, see SMC Calling convention.
+ * @arg0: First argument to the SMC.
+ * @arg1: Second argument to the SMC.
+ * @arg2: Third argument to the SMC.
+ *
+ * See SMC Calling Convention for details.
+ *
+ * Return: the return value from the SMC.
+ */
 u64 kbase_invoke_smc_fid(u32 fid, u64 arg0, u64 arg1, u64 arg2);
 
 /**
-  * kbase_invoke_smc_fid - Perform a secure monitor call
-  * @oen: Owning Entity number (SIP, STD etc).
-  * @function_number: The function number within the OEN.
-  * @smc64: use SMC64 calling convention instead of SMC32.
-  * @arg0: First argument to the SMC.
-  * @arg1: Second argument to the SMC.
-  * @arg2: Third argument to the SMC.
-  *
-  * See SMC Calling Convention for details.
-  *
-  * Return: the return value from the SMC call.
-  */
+ * kbase_invoke_smc_fid - Perform a secure monitor call
+ * @oen: Owning Entity number (SIP, STD etc).
+ * @function_number: The function number within the OEN.
+ * @smc64: use SMC64 calling convention instead of SMC32.
+ * @arg0: First argument to the SMC.
+ * @arg1: Second argument to the SMC.
+ * @arg2: Third argument to the SMC.
+ *
+ * See SMC Calling Convention for details.
+ *
+ * Return: the return value from the SMC call.
+ */
 u64 kbase_invoke_smc(u32 oen, u16 function_number, bool smc64,
 		u64 arg0, u64 arg1, u64 arg2);
 
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index df34854..e7d8e7a 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -213,7 +213,7 @@ void kbase_soft_event_wait_callback(struct kbase_jd_atom *katom)
 	mutex_lock(&kctx->jctx.lock);
 	kbasep_remove_waiting_soft_job(katom);
 	kbase_finish_soft_job(katom);
-	if (jd_done_nolock(katom, NULL))
+	if (jd_done_nolock(katom, true))
 		kbase_js_sched_all(kctx->kbdev);
 	mutex_unlock(&kctx->jctx.lock);
 }
@@ -227,7 +227,7 @@ static void kbasep_soft_event_complete_job(struct work_struct *work)
 	int resched;
 
 	mutex_lock(&kctx->jctx.lock);
-	resched = jd_done_nolock(katom, NULL);
+	resched = jd_done_nolock(katom, true);
 	mutex_unlock(&kctx->jctx.lock);
 
 	if (resched)
@@ -305,7 +305,7 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
 						 info.fence,
 						 info.name,
 						 kbase_sync_status_string(info.status));
-				 }
+				}
 			}
 
 			kbase_fence_debug_check_atom(dep);
@@ -498,7 +498,7 @@ out:
 static void kbasep_soft_event_cancel_job(struct kbase_jd_atom *katom)
 {
 	katom->event_code = BASE_JD_EVENT_JOB_CANCELLED;
-	if (jd_done_nolock(katom, NULL))
+	if (jd_done_nolock(katom, true))
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
@@ -1355,7 +1355,7 @@ static void kbasep_jit_finish_worker(struct work_struct *work)
 
 	mutex_lock(&kctx->jctx.lock);
 	kbase_finish_soft_job(katom);
-	resched = jd_done_nolock(katom, NULL);
+	resched = jd_done_nolock(katom, true);
 	mutex_unlock(&kctx->jctx.lock);
 
 	if (resched)
@@ -1395,9 +1395,8 @@ static void kbase_jit_free_finish(struct kbase_jd_atom *katom)
 	lockdep_assert_held(&kctx->jctx.lock);
 
 	ids = kbase_jit_free_get_ids(katom);
-	if (WARN_ON(ids == NULL)) {
+	if (WARN_ON(ids == NULL))
 		return;
-	}
 
 	/* Remove this atom from the jit_atoms_head list */
 	list_del(&katom->jit_node);
@@ -1787,7 +1786,7 @@ void kbase_resume_suspended_soft_jobs(struct kbase_device *kbdev)
 
 		if (kbase_process_soft_job(katom_iter) == 0) {
 			kbase_finish_soft_job(katom_iter);
-			resched |= jd_done_nolock(katom_iter, NULL);
+			resched |= jd_done_nolock(katom_iter, true);
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 			atomic_dec(&kbdev->pm.gpu_users_waiting);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
diff --git a/mali_kbase/mali_kbase_sync.h b/mali_kbase/mali_kbase_sync.h
index 11cb8b9..e93e5c7 100644
--- a/mali_kbase/mali_kbase_sync.h
+++ b/mali_kbase/mali_kbase_sync.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2012-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2016, 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -66,41 +66,43 @@ struct kbase_sync_fence_info {
  * - dup to add a ref
  * - close to remove a ref
  *
- * return: 0 on success, < 0 on error
+ * Return: 0 on success, < 0 on error
  */
 int kbase_sync_fence_stream_create(const char *name, int *const out_fd);
 
 #if !MALI_USE_CSF
 /**
- * kbase_sync_fence_out_create Create an explicit output fence to specified atom
+ * kbase_sync_fence_out_create - Create an explicit output fence to specified atom
+ *
  * @katom: Atom to assign the new explicit fence to
  * @stream_fd: File descriptor for stream object to create fence on
  *
- * return: Valid file descriptor to fence or < 0 on error
+ * Return: Valid file descriptor to fence or < 0 on error
  */
 int kbase_sync_fence_out_create(struct kbase_jd_atom *katom, int stream_fd);
 
 /**
- * kbase_sync_fence_in_from_fd() Assigns an existing fence to specified atom
+ * kbase_sync_fence_in_from_fd() - Assigns an existing fence to specified atom
  * @katom: Atom to assign the existing explicit fence to
  * @fd: File descriptor to an existing fence
  *
  * Assigns an explicit input fence to atom.
  * This can later be waited for by calling @kbase_sync_fence_in_wait
  *
- * return: 0 on success, < 0 on error
+ * Return: 0 on success, < 0 on error
  */
 int kbase_sync_fence_in_from_fd(struct kbase_jd_atom *katom, int fd);
 #endif /* !MALI_USE_CSF */
 
 /**
  * kbase_sync_fence_validate() - Validate a fd to be a valid fence
+ *
  * @fd: File descriptor to check
  *
  * This function is only usable to catch unintentional user errors early,
  * it does not stop malicious code changing the fd after this function returns.
  *
- * return 0: if fd is for a valid fence, < 0 if invalid
+ * Return: 0 if fd is for a valid fence, < 0 if invalid
  */
 int kbase_sync_fence_validate(int fd);
 
@@ -112,7 +114,7 @@ int kbase_sync_fence_validate(int fd);
  *
  * Signal output fence attached on katom and remove the fence from the atom.
  *
- * return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
+ * Return: The "next" event code for atom, typically JOB_CANCELLED or EVENT_DONE
  */
 enum base_jd_event_code
 kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
@@ -127,7 +129,7 @@ kbase_sync_fence_out_trigger(struct kbase_jd_atom *katom, int result);
  * If the fence isn't already signaled, then this kbase_sync framework will
  * take responsibility to continue the processing once the fence is signaled.
  *
- * return: 0 if already signaled, otherwise 1
+ * Return: 0 if already signaled, otherwise 1
  */
 int kbase_sync_fence_in_wait(struct kbase_jd_atom *katom);
 
@@ -163,7 +165,7 @@ void kbase_sync_fence_out_remove(struct kbase_jd_atom *katom);
  * @katom: Atom to get fence information from
  * @info: Struct to be filled with fence information
  *
- * return: 0 on success, < 0 on error
+ * Return: 0 on success, < 0 on error
  */
 int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
 				 struct kbase_sync_fence_info *info);
@@ -173,7 +175,7 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
  * @katom: Atom to get fence information from
  * @info: Struct to be filled with fence information
  *
- * return: 0 on success, < 0 on error
+ * Return: 0 on success, < 0 on error
  */
 int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
 				  struct kbase_sync_fence_info *info);
@@ -193,7 +195,7 @@ void kbase_sync_fence_info_get(struct dma_fence *fence,
  * kbase_sync_status_string() - Get string matching @status
  * @status: Value of fence status.
  *
- * return: Pointer to string describing @status.
+ * Return: Pointer to string describing @status.
  */
 const char *kbase_sync_status_string(int status);
 
@@ -206,7 +208,8 @@ void kbase_sync_fence_wait_worker(struct work_struct *data);
 
 #ifdef CONFIG_MALI_FENCE_DEBUG
 /**
- * kbase_sync_fence_in_dump() Trigger a debug dump of atoms input fence state
+ * kbase_sync_fence_in_dump() - Trigger a debug dump of atoms input fence state
+ *
  * @katom: Atom to trigger fence debug dump for
  */
 void kbase_sync_fence_in_dump(struct kbase_jd_atom *katom);
diff --git a/mali_kbase/mali_kbase_sync_android.c b/mali_kbase/mali_kbase_sync_android.c
index 8af2584..8d8d561 100644
--- a/mali_kbase/mali_kbase_sync_android.c
+++ b/mali_kbase/mali_kbase_sync_android.c
@@ -445,7 +445,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
 	kbasep_remove_waiting_soft_job(katom);
 	kbase_finish_soft_job(katom);
 
-	if (jd_done_nolock(katom, NULL))
+	if (jd_done_nolock(katom, true))
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
@@ -468,12 +468,19 @@ void kbase_sync_fence_in_remove(struct kbase_jd_atom *katom)
 int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
 				 struct kbase_sync_fence_info *info)
 {
+	u32 string_len;
+
 	if (!katom->fence)
 		return -ENOENT;
 
 	info->fence = katom->fence;
 	info->status = kbase_fence_get_status(katom->fence);
-	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	string_len = strscpy(info->name, katom->fence->name, sizeof(info->name));
+	string_len += sizeof(char);
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name));
+	CSTD_UNUSED(string_len);
 
 	return 0;
 }
@@ -481,12 +488,19 @@ int kbase_sync_fence_in_info_get(struct kbase_jd_atom *katom,
 int kbase_sync_fence_out_info_get(struct kbase_jd_atom *katom,
 				 struct kbase_sync_fence_info *info)
 {
+	u32 string_len;
+
 	if (!katom->fence)
 		return -ENOENT;
 
 	info->fence = katom->fence;
 	info->status = kbase_fence_get_status(katom->fence);
-	strlcpy(info->name, katom->fence->name, sizeof(info->name));
+
+	string_len = strscpy(info->name, katom->fence->name, sizeof(info->name));
+	string_len += sizeof(char);
+	/* Make sure that the source string fit into the buffer. */
+	KBASE_DEBUG_ASSERT(string_len <= sizeof(info->name));
+	CSTD_UNUSED(string_len);
 
 	return 0;
 }
diff --git a/mali_kbase/mali_kbase_sync_file.c b/mali_kbase/mali_kbase_sync_file.c
index 25670c4..7c14c09 100644
--- a/mali_kbase/mali_kbase_sync_file.c
+++ b/mali_kbase/mali_kbase_sync_file.c
@@ -262,7 +262,7 @@ void kbase_sync_fence_in_cancel_wait(struct kbase_jd_atom *katom)
 	kbasep_remove_waiting_soft_job(katom);
 	kbase_finish_soft_job(katom);
 
-	if (jd_done_nolock(katom, NULL))
+	if (jd_done_nolock(katom, true))
 		kbase_js_sched_all(katom->kctx->kbdev);
 }
 
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.c b/mali_kbase/mali_kbase_trace_gpu_mem.c
index 3088c41..7df7d79 100644
--- a/mali_kbase/mali_kbase_trace_gpu_mem.c
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,6 +51,8 @@ struct kbase_dma_buf {
  * rb_tree is maintained at kbase_device level and kbase_process level
  * by passing the root of kbase_device or kbase_process we can remove
  * the node from the tree.
+ *
+ * Return: true on success.
  */
 static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx,
 					 struct dma_buf *dma_buf,
@@ -100,6 +102,8 @@ static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx,
  * of all unique dma_buf's mapped to gpu memory. So when attach any
  * dma_buf add it the rb_tree's. To add the unique mapping we need
  * check if the mapping is not a duplicate and then add them.
+ *
+ * Return: true on success
  */
 static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx,
 					  struct dma_buf *dma_buf,
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 6a1e782..d7a6c98 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -357,6 +357,8 @@ static void kbasep_vinstr_dump_worker(struct work_struct *work)
  * kbasep_vinstr_dump_timer() - Dump timer that schedules the dump worker for
  *                              execution as soon as possible.
  * @timer: Timer structure.
+ *
+ * Return: HRTIMER_NORESTART always.
  */
 static enum hrtimer_restart kbasep_vinstr_dump_timer(struct hrtimer *timer)
 {
@@ -920,7 +922,7 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
  * @arg:    Command's argument.
  * @size:   Size of arg.
  *
- * @return 0 on success, else error code.
+ * Return: 0 on success, else error code.
  */
 static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
 	struct kbase_vinstr_client *cli, unsigned long arg, size_t size)
diff --git a/mali_kbase/mali_malisw.h b/mali_kbase/mali_malisw.h
index 92c8d31..fc8dcbc 100644
--- a/mali_kbase/mali_malisw.h
+++ b/mali_kbase/mali_malisw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,7 +53,8 @@
 #define MAX(x, y)	((x) < (y) ? (y) : (x))
 
 /**
- * Function-like macro for suppressing unused variable warnings.
+ * CSTD_UNUSED - Function-like macro for suppressing unused variable warnings.
+ *
  * @x: unused variable
  *
  * Where possible such variables should be removed; this macro is present for
@@ -62,7 +63,7 @@
 #define CSTD_UNUSED(x)	((void)(x))
 
 /**
- * Function-like macro for use where "no behavior" is desired.
+ * CSTD_NOP - Function-like macro for use where "no behavior" is desired.
  * @...: no-op
  *
  * This is useful when compile time macros turn a function-like macro in to a
@@ -71,7 +72,7 @@
 #define CSTD_NOP(...)	((void)#__VA_ARGS__)
 
 /**
- * Function-like macro for stringizing a single level macro.
+ * CSTD_STR1 - Function-like macro for stringizing a single level macro.
  * @x: macro's value
  *
  * @code
@@ -83,7 +84,7 @@
 #define CSTD_STR1(x)	#x
 
 /**
- * Function-like macro for stringizing a macro's value.
+ * CSTD_STR2 - Function-like macro for stringizing a macro's value.
  * @x: macro's value
  *
  * This should not be used if the macro is defined in a way which may have no
diff --git a/mali_kbase/mali_power_gpu_frequency_trace.h b/mali_kbase/mali_power_gpu_frequency_trace.h
index ea18fcd..f156650 100644
--- a/mali_kbase/mali_power_gpu_frequency_trace.h
+++ b/mali_kbase/mali_power_gpu_frequency_trace.h
@@ -42,8 +42,8 @@ DECLARE_EVENT_CLASS(gpu,
 	TP_ARGS(state, gpu_id),
 
 	TP_STRUCT__entry(
-		__field(	u32,		state		)
-		__field(	u32,		gpu_id		)
+		__field(u32, state)
+		__field(u32, gpu_id)
 	),
 
 	TP_fast_assign(
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index b050be8..fad5554 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -241,13 +241,13 @@ static void kbase_mmu_interrupt_process(struct kbase_device *kbdev,
 		 * hw counters dumping in progress, signal the
 		 * other thread that it failed
 		 */
-  		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
+		spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
 		if ((kbdev->hwcnt.kctx == kctx) &&
 		    (kbdev->hwcnt.backend.state ==
 					KBASE_INSTR_STATE_DUMPING))
-			kbdev->hwcnt.backend.state =
-						KBASE_INSTR_STATE_FAULT;
-  		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
+			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_FAULT;
+
+		spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
 
 		/*
 		 * Stop the kctx from submitting more jobs and cause it
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index a450d38..5814b46 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -47,6 +47,47 @@
 #endif
 
 #include <mali_kbase_trace_gpu_mem.h>
+#include <backend/gpu/mali_kbase_pm_internal.h>
+
+static void mmu_hw_operation_begin(struct kbase_device *kbdev)
+{
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if MALI_USE_CSF
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
+		unsigned long flags;
+
+		lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress);
+		kbdev->mmu_hw_operation_in_progress = true;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+#endif /* MALI_USE_CSF */
+#endif /* !CONFIG_MALI_NO_MALI */
+}
+
+static void mmu_hw_operation_end(struct kbase_device *kbdev)
+{
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+#if MALI_USE_CSF
+	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
+		unsigned long flags;
+
+		lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress);
+		kbdev->mmu_hw_operation_in_progress = false;
+		/* Invoke the PM state machine, the L2 power off may have been
+		 * skipped due to the MMU command.
+		 */
+		kbase_pm_update_state(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+#endif /* MALI_USE_CSF */
+#endif /* !CONFIG_MALI_NO_MALI */
+}
 
 /**
  * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
@@ -326,9 +367,15 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
 		.mmu_sync_info = mmu_sync_info,
 	};
 	if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 		mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as, &op_param);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 	} else {
+		mmu_hw_operation_begin(kbdev);
 		kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+		mmu_hw_operation_end(kbdev);
 	}
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -337,12 +384,32 @@ static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
 			KBASE_MMU_FAULT_TYPE_PAGE);
 }
 
+static void set_gwt_element_page_addr_and_size(
+		struct kbasep_gwt_list_element *element,
+		u64 fault_page_addr, struct tagged_addr fault_phys)
+{
+	u64 fault_pfn = fault_page_addr >> PAGE_SHIFT;
+	unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1);
+
+	/* If the fault address lies within a 2MB page, then consider
+	 * the whole 2MB page for dumping to avoid incomplete dumps.
+	 */
+	if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) {
+		element->page_addr = fault_page_addr & ~(SZ_2M - 1);
+		element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE;
+	} else {
+		element->page_addr = fault_page_addr;
+		element->num_pages = 1;
+	}
+}
+
 static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 			struct kbase_as *faulting_as)
 {
 	struct kbasep_gwt_list_element *pos;
 	struct kbase_va_region *region;
 	struct kbase_device *kbdev;
+	struct tagged_addr *fault_phys_addr;
 	struct kbase_fault *fault;
 	u64 fault_pfn, pfn_offset;
 	int ret;
@@ -374,15 +441,18 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 		return;
 	}
 
+	pfn_offset = fault_pfn - region->start_pfn;
+	fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset];
+
 	/* Capture addresses of faulting write location
 	 * for job dumping if write tracking is enabled.
 	 */
 	if (kctx->gwt_enabled) {
-		u64 page_addr = fault->addr & PAGE_MASK;
+		u64 fault_page_addr = fault->addr & PAGE_MASK;
 		bool found = false;
 		/* Check if this write was already handled. */
 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
-			if (page_addr == pos->page_addr) {
+			if (fault_page_addr == pos->page_addr) {
 				found = true;
 				break;
 			}
@@ -392,8 +462,8 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
 			if (pos) {
 				pos->region = region;
-				pos->page_addr = page_addr;
-				pos->num_pages = 1;
+				set_gwt_element_page_addr_and_size(pos,
+					fault_page_addr, *fault_phys_addr);
 				list_add(&pos->link, &kctx->gwt_current_list);
 			} else {
 				dev_warn(kbdev->dev, "kmalloc failure");
@@ -401,10 +471,9 @@ static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
 		}
 	}
 
-	pfn_offset = fault_pfn - region->start_pfn;
 	/* Now make this faulting page writable to GPU. */
 	ret = kbase_mmu_update_pages_no_flush(kctx, fault_pfn,
-				&kbase_get_gpu_phy_pages(region)[pfn_offset],
+				fault_phys_addr,
 				1, region->flags, region->gpu_alloc->group_id);
 
 	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
@@ -457,7 +526,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
  * pool, then if required will try to allocate the remaining pages from the
  * device pool.
  *
- * This function will not allocate any new memory beyond that that is already
+ * This function will not allocate any new memory beyond that is already
  * present in the context or device pools. This is because it is intended to be
  * called with the vm_lock held, which could cause recursive locking if the
  * allocation caused the out-of-memory killer to run.
@@ -876,7 +945,9 @@ page_fault_retry:
 			.kctx_id = kctx->id,
 			.mmu_sync_info = mmu_sync_info,
 		};
+		mmu_hw_operation_begin(kbdev);
 		kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+		mmu_hw_operation_end(kbdev);
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 
@@ -911,7 +982,9 @@ page_fault_retry:
 			.kctx_id = kctx->id,
 			.mmu_sync_info = mmu_sync_info,
 		};
+		mmu_hw_operation_begin(kbdev);
 		kbase_mmu_hw_do_operation(kbdev, faulting_as, &op_param);
+		mmu_hw_operation_end(kbdev);
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
 
@@ -1020,11 +1093,23 @@ page_fault_retry:
 			.mmu_sync_info = mmu_sync_info,
 		};
 		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
-			mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as,
+			unsigned long irq_flags;
+
+			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+			err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, faulting_as,
 							 &op_param);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 		} else {
-			kbase_mmu_hw_do_operation(kbdev, faulting_as,
+			mmu_hw_operation_begin(kbdev);
+			err = kbase_mmu_hw_do_operation(kbdev, faulting_as,
 						  &op_param);
+			mmu_hw_operation_end(kbdev);
+		}
+
+		if (err) {
+			dev_err(kbdev->dev,
+				"Flush for GPU page table update did not complete on handling page fault @ 0x%llx",
+				fault->addr);
 		}
 
 		mutex_unlock(&kbdev->mmu_hw_mutex);
@@ -1295,9 +1380,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		unsigned int left = to_vpfn - vpfn;
 		int level;
 		u64 *page;
-		register unsigned int num_of_valid_entries;
 		phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
 
+		register unsigned int num_of_valid_entries;
+
 		if (count > left)
 			count = left;
 
@@ -1739,6 +1825,9 @@ static void kbase_mmu_flush_invalidate_noretain(struct kbase_context *kctx,
 	 */
 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
 
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
+
 	/* Early out if there is nothing to do */
 	if (nr == 0)
 		return;
@@ -1826,7 +1915,9 @@ kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
 		err = mmu_flush_invalidate_on_gpu_ctrl(kbdev, as, &op_param);
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	} else {
+		mmu_hw_operation_begin(kbdev);
 		err = kbase_mmu_hw_do_operation(kbdev, as, &op_param);
+		mmu_hw_operation_end(kbdev);
 	}
 
 	if (err) {
@@ -2154,22 +2245,28 @@ out:
 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
 
 /**
- * kbase_mmu_update_pages_no_flush() - Update page table entries on the GPU
+ * kbase_mmu_update_pages_no_flush() - Update attributes data in GPU page table entries
  *
  * @kctx:  Kbase context
  * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
- * @phys:  Tagged physical addresses of the physical pages to replace the
- *         current mappings
+ * @phys:  Pointer to the array of tagged physical addresses of the physical
+ *         pages that are pointed to by the page table entries (that need to
+ *         be updated). The pointer should be within the reg->gpu_alloc->pages
+ *         array.
  * @nr:    Number of pages to update
  * @flags: Flags
  * @group_id: The physical memory group in which the page was allocated.
  *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  *
  * This will update page table entries that already exist on the GPU based on
- * the new flags that are passed. It is used as a response to the changes of
- * the memory attributes
+ * the new flags that are passed (the physical pages pointed to by the page
+ * table entries remain unchanged). It is used as a response to the changes of
+ * the memory attributes.
+ *
+ * The caller is responsible for validating the memory attributes.
  *
- * The caller is responsible for validating the memory attributes
+ * Return: 0 if the attributes data in page table entries were updated
+ *         successfully, otherwise an error code.
  */
 static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 					struct tagged_addr *phys, size_t nr,
@@ -2199,39 +2296,22 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
 		struct page *p;
 		register unsigned int num_of_valid_entries;
+		int cur_level = MIDGARD_MMU_BOTTOMLEVEL;
 
 		if (count > nr)
 			count = nr;
 
-		do {
-			err = mmu_get_bottom_pgd(kbdev, &kctx->mmu,
-					vpfn, &pgd);
-			if (err != -ENOMEM)
-				break;
-			/* Fill the memory pool with enough pages for
-			 * the page walk to succeed
-			 */
-			mutex_unlock(&kctx->mmu.mmu_lock);
-			err = kbase_mem_pool_grow(
-#ifdef CONFIG_MALI_2MB_ALLOC
-				&kbdev->mem_pools.large[
-#else
-				&kbdev->mem_pools.small[
-#endif
-					kctx->mmu.group_id],
-				MIDGARD_MMU_BOTTOMLEVEL);
-			mutex_lock(&kctx->mmu.mmu_lock);
-		} while (!err);
-		if (err) {
-			dev_warn(kbdev->dev,
-				 "mmu_get_bottom_pgd failure\n");
+		if (is_huge(*phys) && (index == index_in_large_page(*phys)))
+			cur_level = MIDGARD_MMU_LEVEL(2);
+
+		err = mmu_get_pgd_at_level(kbdev, &kctx->mmu, vpfn, cur_level, &pgd);
+		if (WARN_ON(err))
 			goto fail_unlock;
-		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
 		pgd_page = kmap(p);
 		if (!pgd_page) {
-			dev_warn(kbdev->dev, "kmap failure\n");
+			dev_warn(kbdev->dev, "kmap failure on update_pages");
 			err = -ENOMEM;
 			goto fail_unlock;
 		}
@@ -2239,15 +2319,35 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		num_of_valid_entries =
 			kbdev->mmu_mode->get_num_valid_entries(pgd_page);
 
-		for (i = 0; i < count; i++) {
+		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
+			int level_index = (vpfn >> 9) & 0x1FF;
+			struct tagged_addr *target_phys =
+				phys - index_in_large_page(*phys);
+
 #ifdef CONFIG_MALI_DEBUG
 			WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
-					pgd_page[index + i],
-					MIDGARD_MMU_BOTTOMLEVEL));
+					pgd_page[level_index], MIDGARD_MMU_LEVEL(2)));
 #endif
-			pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
-				phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
-				group_id);
+			pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
+					*target_phys, flags, MIDGARD_MMU_LEVEL(2),
+					group_id);
+			kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (level_index * sizeof(u64)),
+				sizeof(u64));
+		} else {
+			for (i = 0; i < count; i++) {
+#ifdef CONFIG_MALI_DEBUG
+				WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
+						pgd_page[index + i],
+						MIDGARD_MMU_BOTTOMLEVEL));
+#endif
+				pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
+					phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
+					group_id);
+			}
+			kbase_mmu_sync_pgd(kbdev,
+				kbase_dma_addr(p) + (index * sizeof(u64)),
+				count * sizeof(u64));
 		}
 
 		kbdev->mmu_mode->set_num_valid_entries(pgd_page,
@@ -2257,11 +2357,7 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 		vpfn += count;
 		nr -= count;
 
-		kbase_mmu_sync_pgd(kbdev,
-				kbase_dma_addr(p) + (index * sizeof(u64)),
-				count * sizeof(u64));
-
-		kunmap(pfn_to_page(PFN_DOWN(pgd)));
+		kunmap(p);
 	}
 
 	mutex_unlock(&kctx->mmu.mmu_lock);
@@ -2291,12 +2387,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 
 static void mmu_teardown_level(struct kbase_device *kbdev,
 		struct kbase_mmu_table *mmut, phys_addr_t pgd,
-		int level, u64 *pgd_page_buffer)
+		int level)
 {
 	phys_addr_t target_pgd;
 	u64 *pgd_page;
 	int i;
 	struct kbase_mmu_mode const *mmu_mode;
+	u64 *pgd_page_buffer;
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
@@ -2313,6 +2410,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 	/* Copy the page to our preallocated buffer so that we can minimize
 	 * kmap_atomic usage
 	 */
+	pgd_page_buffer = mmut->mmu_teardown_pages[level];
 	memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
 	kunmap_atomic(pgd_page);
 	pgd_page = pgd_page_buffer;
@@ -2326,9 +2424,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
 				mmu_teardown_level(kbdev, mmut,
 						   target_pgd,
-						   level + 1,
-						   pgd_page_buffer +
-						   (PAGE_SIZE / sizeof(u64)));
+						   level + 1);
 			}
 		}
 	}
@@ -2340,6 +2436,8 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
 	int const group_id)
 {
+	int level;
+
 	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
 	    WARN_ON(group_id < 0))
 		return -EINVAL;
@@ -2347,14 +2445,20 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 	mmut->group_id = group_id;
 	mutex_init(&mmut->mmu_lock);
 	mmut->kctx = kctx;
+	mmut->pgd = 0;
 
-	/* Preallocate MMU depth of four pages for mmu_teardown_level to use */
-	mmut->mmu_teardown_pages = kmalloc(PAGE_SIZE * 4, GFP_KERNEL);
+	/* Preallocate MMU depth of 3 pages for mmu_teardown_level to use */
+	for (level = MIDGARD_MMU_TOPLEVEL;
+			level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
+		mmut->mmu_teardown_pages[level] =
+			kmalloc(PAGE_SIZE, GFP_KERNEL);
 
-	if (mmut->mmu_teardown_pages == NULL)
-		return -ENOMEM;
+		if (!mmut->mmu_teardown_pages[level]) {
+			kbase_mmu_term(kbdev, mmut);
+			return -ENOMEM;
+		}
+	}
 
-	mmut->pgd = 0;
 	/* We allocate pages into the kbdev memory pool, then
 	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
 	 * avoid allocations from the kernel happening with the lock held.
@@ -2380,17 +2484,24 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 
 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
 {
+	int level;
+
 	if (mmut->pgd) {
 		mutex_lock(&mmut->mmu_lock);
-		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL,
-				mmut->mmu_teardown_pages);
+		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
 		mutex_unlock(&mmut->mmu_lock);
 
 		if (mmut->kctx)
 			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
 	}
 
-	kfree(mmut->mmu_teardown_pages);
+	for (level = MIDGARD_MMU_TOPLEVEL;
+			level < MIDGARD_MMU_BOTTOMLEVEL; level++) {
+		if (!mmut->mmu_teardown_pages[level])
+			break;
+		kfree(mmut->mmu_teardown_pages[level]);
+	}
+
 	mutex_destroy(&mmut->mmu_lock);
 }
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index fe721fc..49665fb 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,25 +36,27 @@ struct kbase_mmu_table;
  * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to
  * existing GPU work does it come from requests (like ioctl) from user-space, power management,
  * etc.
+ *
+ * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice
+ *                                  of a 'valid' value
+ * @CALLER_MMU_SYNC: Arbitrary value for 'synchronous that isn't easy to choose by accident
+ * @CALLER_MMU_ASYNC: Also hard to choose by accident
  */
 enum kbase_caller_mmu_sync_info {
-	/* default value must be invalid to avoid accidental choice ov a 'valid' value. */
 	CALLER_MMU_UNSET_SYNCHRONICITY,
-	/* Arbitrary value for 'synchronous that isn't easy to choose by accident. */
 	CALLER_MMU_SYNC = 0x02,
-	/* Also hard to choose by accident */
 	CALLER_MMU_ASYNC
 };
 
 /**
  * kbase_mmu_as_init() - Initialising GPU address space object.
  *
- * This is called from device probe to initialise an address space object
- * of the device.
- *
  * @kbdev: The kbase device structure for the device (must be a valid pointer).
  * @i:     Array index of address space object.
  *
+ * This is called from device probe to initialise an address space object
+ * of the device.
+ *
  * Return: 0 on success and non-zero value on failure.
  */
 int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
@@ -62,19 +64,17 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, int i);
 /**
  * kbase_mmu_as_term() - Terminate address space object.
  *
- * This is called upon device termination to destroy
- * the address space object of the device.
- *
  * @kbdev: The kbase device structure for the device (must be a valid pointer).
  * @i:     Array index of address space object.
+ *
+ * This is called upon device termination to destroy
+ * the address space object of the device.
  */
 void kbase_mmu_as_term(struct kbase_device *kbdev, int i);
 
 /**
  * kbase_mmu_init - Initialise an object representing GPU page tables
  *
- * The structure should be terminated using kbase_mmu_term()
- *
  * @kbdev:    Instance of GPU platform device, allocated from the probe method.
  * @mmut:     GPU page tables to be initialized.
  * @kctx:     Optional kbase context, may be NULL if this set of MMU tables
@@ -82,6 +82,8 @@ void kbase_mmu_as_term(struct kbase_device *kbdev, int i);
  * @group_id: The physical group ID from which to allocate GPU page tables.
  *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
  *
+ * The structure should be terminated using kbase_mmu_term()
+ *
  * Return:    0 if successful, otherwise a negative error code.
  */
 int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
@@ -90,20 +92,20 @@ int kbase_mmu_init(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 /**
  * kbase_mmu_interrupt - Process an MMU interrupt.
  *
- * Process the MMU interrupt that was reported by the &kbase_device.
- *
  * @kbdev:       Pointer to the kbase device for which the interrupt happened.
  * @irq_stat:    Value of the MMU_IRQ_STATUS register.
+ *
+ * Process the MMU interrupt that was reported by the &kbase_device.
  */
 void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat);
 
 /**
  * kbase_mmu_term - Terminate an object representing GPU page tables
  *
- * This will free any page tables that have been allocated
- *
  * @kbdev: Instance of GPU platform device, allocated from the probe method.
  * @mmut:  GPU page tables to be destroyed.
+ *
+ * This will free any page tables that have been allocated
  */
 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut);
 
@@ -152,13 +154,13 @@ int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 /**
  * kbase_mmu_bus_fault_interrupt - Process a bus fault interrupt.
  *
- * Process the bus fault interrupt that was reported for a particular GPU
- * address space.
- *
  * @kbdev:       Pointer to the kbase device for which bus fault was reported.
  * @status:      Value of the GPU_FAULTSTATUS register.
  * @as_nr:       GPU address space for which the bus fault occurred.
  *
+ * Process the bus fault interrupt that was reported for a particular GPU
+ * address space.
+ *
  * Return: zero if the operation was successful, non-zero otherwise.
  */
 int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status,
@@ -166,6 +168,7 @@ int kbase_mmu_bus_fault_interrupt(struct kbase_device *kbdev, u32 status,
 
 /**
  * kbase_mmu_gpu_fault_interrupt() - Report a GPU fault.
+ *
  * @kbdev:    Kbase device pointer
  * @status:   GPU fault status
  * @as_nr:    Faulty address space
@@ -182,10 +185,10 @@ void kbase_mmu_gpu_fault_interrupt(struct kbase_device *kbdev, u32 status,
  * kbase_context_mmu_group_id_get - Decode a memory group ID from
  *                                 base_context_create_flags
  *
- * Memory allocated for GPU page tables will come from the returned group.
- *
  * @flags: Bitmask of flags to pass to base_context_init.
  *
+ * Memory allocated for GPU page tables will come from the returned group.
+ *
  * Return: Physical memory group ID. Valid range is 0..(BASE_MEM_GROUP_COUNT-1).
  */
 static inline int
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index 7cdf426..31658e0 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,7 +55,7 @@ enum kbase_mmu_fault_type {
 };
 
 /**
- * enum kbase_mmu_cache_flush_type - enum for MMU operations
+ * enum kbase_mmu_op_type - enum for MMU operations
  * @KBASE_MMU_OP_NONE:        To help catch uninitialized struct
  * @KBASE_MMU_OP_FIRST:       The lower boundary of enum
  * @KBASE_MMU_OP_LOCK:        Lock memory region
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index 0ebc1bc..cdf9a84 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -29,6 +29,7 @@
 
 /**
  * lock_region() - Generate lockaddr to lock memory region in MMU
+ * @gpu_props: GPU properties for finding the MMU lock region size
  * @pfn:       Starting page frame number of the region to lock
  * @num_pages: Number of pages to lock. It must be greater than 0.
  * @lockaddr:  Address and size of memory region to lock
@@ -62,7 +63,8 @@
  *
  * Return: 0 if success, or an error code on failure.
  */
-static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr)
+static int lock_region(struct kbase_gpu_props const *gpu_props, u64 pfn, u32 num_pages,
+		       u64 *lockaddr)
 {
 	const u64 lockaddr_base = pfn << PAGE_SHIFT;
 	const u64 lockaddr_end = ((pfn + num_pages) << PAGE_SHIFT) - 1;
@@ -106,7 +108,7 @@ static int lock_region(u64 pfn, u32 num_pages, u64 *lockaddr)
 		return -EINVAL;
 
 	lockaddr_size_log2 =
-		MAX(lockaddr_size_log2, KBASE_LOCK_REGION_MIN_SIZE_LOG2);
+		MAX(lockaddr_size_log2, kbase_get_lock_region_min_size_log2(gpu_props));
 
 	/* Represent the result in a way that is compatible with HW spec.
 	 *
@@ -136,8 +138,10 @@ static int wait_ready(struct kbase_device *kbdev,
 		;
 	}
 
-	if (max_loops == 0) {
-		dev_err(kbdev->dev, "AS_ACTIVE bit stuck, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n");
+	if (WARN_ON_ONCE(max_loops == 0)) {
+		dev_err(kbdev->dev,
+			"AS_ACTIVE bit stuck for as %u, might be caused by slow/unstable GPU clock or possible faulty FPGA connector",
+			as_nr);
 		return -1;
 	}
 
@@ -152,6 +156,11 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
 	status = wait_ready(kbdev, as_nr);
 	if (status == 0)
 		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
+	else {
+		dev_err(kbdev->dev,
+			"Wait for AS_ACTIVE bit failed for as %u, before sending MMU command %u",
+			as_nr, cmd);
+	}
 
 	return status;
 }
@@ -161,6 +170,9 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 	struct kbase_mmu_setup *current_setup = &as->current_setup;
 	u64 transcfg = 0;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->mmu_hw_mutex);
+
 	transcfg = current_setup->transcfg;
 
 	/* Set flag AS_TRANSCFG_PTW_MEMATTR_WRITE_BACK
@@ -204,6 +216,10 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 			transcfg);
 
 	write_cmd(kbdev, as->number, AS_COMMAND_UPDATE);
+#if MALI_USE_CSF
+	/* Wait for UPDATE command to complete */
+	wait_ready(kbdev, as->number);
+#endif
 }
 
 int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
@@ -235,7 +251,7 @@ int kbase_mmu_hw_do_operation(struct kbase_device *kbdev, struct kbase_as *as,
 		}
 	} else if (op_param->op >= KBASE_MMU_OP_FIRST &&
 		   op_param->op < KBASE_MMU_OP_COUNT) {
-		ret = lock_region(op_param->vpfn, op_param->nr, &lock_addr);
+		ret = lock_region(&kbdev->gpu_props, op_param->vpfn, op_param->nr, &lock_addr);
 
 		if (!ret) {
 			/* Lock the region that needs to be updated */
diff --git a/mali_kbase/mmu/mali_kbase_mmu_internal.h b/mali_kbase/mmu/mali_kbase_mmu_internal.h
index b8cd55f..9d7ce48 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_internal.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -50,7 +50,7 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
  * Used to switch to incremental rendering if we have nearly run out of
  * virtual address space in a growable memory region.
  *
- * Return 0 if successful, otherwise a negative error code.
+ * Return: 0 if successful, otherwise a negative error code.
  */
 int kbase_mmu_switch_to_ir(struct kbase_context *kctx,
 	struct kbase_va_region *reg);
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index 6ef4c9d..c061099 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -204,7 +204,7 @@ static void entry_invalidate(u64 *entry)
 	page_table_entry_set(entry, ENTRY_IS_INVAL);
 }
 
-static struct kbase_mmu_mode const aarch64_mode = {
+static const struct kbase_mmu_mode aarch64_mode = {
 	.update = mmu_update,
 	.get_as_setup = kbase_mmu_get_as_setup,
 	.disable_as = mmu_disable_as,
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.c b/mali_kbase/platform/devicetree/mali_kbase_config_platform.c
index 63aa33f..2eebed0 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
index 927d5c2..743885f 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
  *
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
 
 /**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
  *
  * Attached value: pointer to @ref kbase_platform_funcs_conf
  * Default value: See @ref kbase_platform_funcs_conf
@@ -41,7 +41,7 @@ extern struct kbase_pm_callback_conf pm_callbacks;
 extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops;
 extern struct kbase_platform_funcs_conf platform_funcs;
 /**
- * Autosuspend delay
+ * AUTO_SUSPEND_DELAY - Autosuspend delay
  *
  * The delay time (in milliseconds) to be used for autosuspend
  */
diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
index 9ae2c02..3881d28 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
@@ -21,6 +21,7 @@
 
 #include <mali_kbase.h>
 #include <mali_kbase_defs.h>
+#include <device/mali_kbase_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
@@ -49,6 +50,7 @@ static void enable_gpu_power_control(struct kbase_device *kbdev)
 	}
 }
 
+
 static void disable_gpu_power_control(struct kbase_device *kbdev)
 {
 	unsigned int i;
@@ -71,6 +73,7 @@ static void disable_gpu_power_control(struct kbase_device *kbdev)
 			WARN_ON(regulator_disable(kbdev->regulators[i]));
 	}
 #endif
+
 }
 
 static int pm_callback_power_on(struct kbase_device *kbdev)
@@ -96,8 +99,8 @@ static int pm_callback_power_on(struct kbase_device *kbdev)
 #else
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	enable_gpu_power_control(kbdev);
 	error = pm_runtime_get_sync(kbdev->dev);
+	enable_gpu_power_control(kbdev);
 
 	if (error == 1) {
 		/*
@@ -200,7 +203,7 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev)
 {
 	int ret = 0;
 
-	dev_dbg(kbdev->dev, "kbase_device_runtime_init\n");
+	dev_dbg(kbdev->dev, "%s\n", __func__);
 
 	pm_runtime_set_autosuspend_delay(kbdev->dev, AUTO_SUSPEND_DELAY);
 	pm_runtime_use_autosuspend(kbdev->dev);
@@ -223,7 +226,7 @@ static int kbase_device_runtime_init(struct kbase_device *kbdev)
 
 static void kbase_device_runtime_disable(struct kbase_device *kbdev)
 {
-	dev_dbg(kbdev->dev, "kbase_device_runtime_disable\n");
+	dev_dbg(kbdev->dev, "%s\n", __func__);
 
 	if (atomic_read(&kbdev->dev->power.usage_count))
 		dev_warn(kbdev->dev,
@@ -236,7 +239,7 @@ static void kbase_device_runtime_disable(struct kbase_device *kbdev)
 
 static int pm_callback_runtime_on(struct kbase_device *kbdev)
 {
-	dev_dbg(kbdev->dev, "pm_callback_runtime_on\n");
+	dev_dbg(kbdev->dev, "%s\n", __func__);
 
 	enable_gpu_power_control(kbdev);
 	return 0;
@@ -244,7 +247,7 @@ static int pm_callback_runtime_on(struct kbase_device *kbdev)
 
 static void pm_callback_runtime_off(struct kbase_device *kbdev)
 {
-	dev_dbg(kbdev->dev, "pm_callback_runtime_off\n");
+	dev_dbg(kbdev->dev, "%s\n", __func__);
 
 	disable_gpu_power_control(kbdev);
 }
@@ -261,6 +264,7 @@ static void pm_callback_suspend(struct kbase_device *kbdev)
 	pm_callback_runtime_off(kbdev);
 }
 
+
 struct kbase_pm_callback_conf pm_callbacks = {
 	.power_on_callback = pm_callback_power_on,
 	.power_off_callback = pm_callback_power_off,
diff --git a/mali_kbase/platform/vexpress/mali_kbase_config_platform.h b/mali_kbase/platform/vexpress/mali_kbase_config_platform.h
index d8682db..28f4531 100644
--- a/mali_kbase/platform/vexpress/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/vexpress/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
  *
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
 
 /**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
  *
  * Attached value: pointer to @ref kbase_platform_funcs_conf
  * Default value: See @ref kbase_platform_funcs_conf
diff --git a/mali_kbase/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h b/mali_kbase/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
index d8682db..28f4531 100644
--- a/mali_kbase/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/vexpress_1xv7_a57/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
  *
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
 
 /**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
  *
  * Attached value: pointer to @ref kbase_platform_funcs_conf
  * Default value: See @ref kbase_platform_funcs_conf
diff --git a/mali_kbase/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h b/mali_kbase/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
index d8682db..28f4531 100644
--- a/mali_kbase/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/vexpress_6xvirtex7_10mhz/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
  *
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
 
 /**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
  *
  * Attached value: pointer to @ref kbase_platform_funcs_conf
  * Default value: See @ref kbase_platform_funcs_conf
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index 79b1eac..c4c713c 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -81,17 +81,4 @@ int kutf_helper_input_enqueue(struct kutf_context *context,
  */
 void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context);
 
-/* kutf_helper_external_reset_gpu() - Mimic power-on-reset using external reset
- *
- * Reset GPU using FPGA SYSCTL register.
- *
- * Note that
- * - It must be called on the platform that has FPGA SYSCTL
- *   register available such as Juno board.
- * - It won't reinitialize GPU related settings such as interrupt for kbase.
- *
- * Return:  0 on success, negative value otherwise.
- */
-int kutf_helper_external_reset_gpu(void);
-
 #endif	/* _KERNEL_UTF_HELPERS_H_ */
diff --git a/mali_kbase/tests/include/kutf/kutf_suite.h b/mali_kbase/tests/include/kutf/kutf_suite.h
index b9c333b..9e459c5 100644
--- a/mali_kbase/tests/include/kutf/kutf_suite.h
+++ b/mali_kbase/tests/include/kutf/kutf_suite.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,36 +43,36 @@
 #define KUTF_MAX_LINE_LENGTH (1024u)
 
 /**
- * Pseudo-flag indicating an absence of any specified test class. Note that
- * tests should not be annotated with this constant as it is simply a zero
+ * KUTF_F_TEST_NONE - Pseudo-flag indicating an absence of any specified test class.
+ * Note that tests should not be annotated with this constant as it is simply a zero
  * value; tests without a more specific class must be marked with the flag
  * KUTF_F_TEST_GENERIC.
  */
 #define KUTF_F_TEST_NONE                ((unsigned int)(0))
 
 /**
- * Class indicating this test is a smoke test.
+ * KUTF_F_TEST_SMOKETEST - Class indicating this test is a smoke test.
  * A given set of smoke tests should be quick to run, enabling rapid turn-around
  * of "regress-on-commit" test runs.
  */
 #define KUTF_F_TEST_SMOKETEST           ((unsigned int)(1 << 1))
 
 /**
- * Class indicating this test is a performance test.
+ * KUTF_F_TEST_PERFORMANCE - Class indicating this test is a performance test.
  * These tests typically produce a performance metric, such as "time to run" or
  * "frames per second",
  */
 #define KUTF_F_TEST_PERFORMANCE         ((unsigned int)(1 << 2))
 
 /**
- * Class indicating that this test is a deprecated test.
+ * KUTF_F_TEST_DEPRECATED - Class indicating that this test is a deprecated test.
  * These tests have typically been replaced by an alternative test which is
  * more efficient, or has better coverage.
  */
 #define KUTF_F_TEST_DEPRECATED          ((unsigned int)(1 << 3))
 
 /**
- * Class indicating that this test is a known failure.
+ * KUTF_F_TEST_EXPECTED_FAILURE - Class indicating that this test is a known failure.
  * These tests have typically been run and failed, but marking them as a known
  * failure means it is easier to triage results.
  *
@@ -83,68 +83,69 @@
 #define KUTF_F_TEST_EXPECTED_FAILURE    ((unsigned int)(1 << 4))
 
 /**
- * Class indicating that this test is a generic test, which is not a member of
- * a more specific test class. Tests which are not created with a specific set
+ * KUTF_F_TEST_GENERIC - Class indicating that this test is a generic test,
+ * which is not a member of a more specific test class.
+ * Tests which are not created with a specific set
  * of filter flags by the user are assigned this test class by default.
  */
 #define KUTF_F_TEST_GENERIC             ((unsigned int)(1 << 5))
 
 /**
- * Class indicating this test is a resource allocation failure test.
+ * KUTF_F_TEST_RESFAIL - Class indicating this test is a resource allocation failure test.
  * A resource allocation failure test will test that an error code is
  * correctly propagated when an allocation fails.
  */
 #define KUTF_F_TEST_RESFAIL             ((unsigned int)(1 << 6))
 
 /**
- * Additional flag indicating that this test is an expected failure when
- * run in resource failure mode. These tests are never run when running
- * the low resource mode.
+ * KUTF_F_TEST_EXPECTED_FAILURE_RF - Additional flag indicating that this test
+ * is an expected failure when run in resource failure mode.
+ * These tests are never run when running the low resource mode.
  */
 #define KUTF_F_TEST_EXPECTED_FAILURE_RF ((unsigned int)(1 << 7))
 
 /**
- * Flag reserved for user-defined filter zero.
+ * KUTF_F_TEST_USER_0 - Flag reserved for user-defined filter zero.
  */
 #define KUTF_F_TEST_USER_0 ((unsigned int)(1 << 24))
 
 /**
- * Flag reserved for user-defined filter one.
+ * KUTF_F_TEST_USER_1 - Flag reserved for user-defined filter one.
  */
 #define KUTF_F_TEST_USER_1 ((unsigned int)(1 << 25))
 
 /**
- * Flag reserved for user-defined filter two.
+ * KUTF_F_TEST_USER_2 - Flag reserved for user-defined filter two.
  */
 #define KUTF_F_TEST_USER_2 ((unsigned int)(1 << 26))
 
 /**
- * Flag reserved for user-defined filter three.
+ * KUTF_F_TEST_USER_3 - Flag reserved for user-defined filter three.
  */
 #define KUTF_F_TEST_USER_3 ((unsigned int)(1 << 27))
 
 /**
- * Flag reserved for user-defined filter four.
+ * KUTF_F_TEST_USER_4 - Flag reserved for user-defined filter four.
  */
 #define KUTF_F_TEST_USER_4 ((unsigned int)(1 << 28))
 
 /**
- * Flag reserved for user-defined filter five.
+ * KUTF_F_TEST_USER_5 - Flag reserved for user-defined filter five.
  */
 #define KUTF_F_TEST_USER_5 ((unsigned int)(1 << 29))
 
 /**
- * Flag reserved for user-defined filter six.
+ * KUTF_F_TEST_USER_6 - Flag reserved for user-defined filter six.
  */
 #define KUTF_F_TEST_USER_6 ((unsigned int)(1 << 30))
 
 /**
- * Flag reserved for user-defined filter seven.
+ * KUTF_F_TEST_USER_7 - Flag reserved for user-defined filter seven.
  */
 #define KUTF_F_TEST_USER_7 ((unsigned int)(1 << 31))
 
 /**
- * Pseudo-flag indicating that all test classes should be executed.
+ * KUTF_F_TEST_ALL - Pseudo-flag indicating that all test classes should be executed.
  */
 #define KUTF_F_TEST_ALL                 ((unsigned int)(0xFFFFFFFFU))
 
diff --git a/mali_kbase/tests/include/kutf/kutf_utils.h b/mali_kbase/tests/include/kutf/kutf_utils.h
index 5f6d769..f6e758b 100644
--- a/mali_kbase/tests/include/kutf/kutf_utils.h
+++ b/mali_kbase/tests/include/kutf/kutf_utils.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014, 2017, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -33,8 +33,8 @@
 #include <kutf/kutf_mem.h>
 
 /**
- * Maximum size of the message strings within kernel UTF, messages longer then
- * this will be truncated.
+ * KUTF_MAX_DSPRINTF_LEN - Maximum size of the message strings within
+ * kernel UTF, messages longer then this will be truncated.
  */
 #define KUTF_MAX_DSPRINTF_LEN	1024
 
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index d76cebe..d207d1c 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -28,10 +28,6 @@
 #include <linux/wait.h>
 #include <linux/uaccess.h>
 #include <linux/export.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include "gpu/mali_kbase_gpu_regmap.h"
-#include <device/mali_kbase_device.h>
 
 static DEFINE_SPINLOCK(kutf_input_lock);
 
@@ -131,44 +127,3 @@ void kutf_helper_input_enqueue_end_of_data(struct kutf_context *context)
 {
 	kutf_helper_input_enqueue(context, NULL, 0);
 }
-
-/* Values are taken from juno-fpga.dtsi */
-#define FPGA_SYSCTL_START_ADDR ((resource_size_t)0x6f020000)
-#define FPGA_SYSCTL_SIZE ((size_t)0xCC)
-
-/* Offset of FPGA_SYSCTL_GPU_RESET_REG register */
-#define FPGA_SYSCTL_GPU_RESET_REG 0x64
-#define GPU_RESET_HIGH 0x1
-#define GPU_RESET_LOW 0x0
-
-int kutf_helper_external_reset_gpu(void)
-{
-	void __iomem *regs = NULL;
-	void __iomem *gpu_reset_reg = NULL;
-	int error = -ENXIO;
-	int repeat = 100;
-
-	regs = ioremap(FPGA_SYSCTL_START_ADDR, FPGA_SYSCTL_SIZE);
-	if (!regs)
-		return -ENOMEM;
-
-	/* Reset GPU via SYSCTL_GPU_RESET by rising & falling the reset signal */
-	gpu_reset_reg = regs + FPGA_SYSCTL_GPU_RESET_REG;
-	while (error && repeat--) {
-		writel(GPU_RESET_HIGH, gpu_reset_reg);
-		if (readl(gpu_reset_reg) == GPU_RESET_HIGH) {
-			mdelay(100);
-			writel(GPU_RESET_LOW, gpu_reset_reg);
-			mdelay(100);
-
-			/* Succeed in resetting GPU */
-			if (readl(gpu_reset_reg) == GPU_RESET_LOW)
-				error = 0;
-		}
-	}
-
-	iounmap(regs);
-
-	return error;
-}
-EXPORT_SYMBOL(kutf_helper_external_reset_gpu);
diff --git a/mali_kbase/tests/kutf/kutf_helpers_user.c b/mali_kbase/tests/kutf/kutf_helpers_user.c
index a8b59f7..f88e138 100644
--- a/mali_kbase/tests/kutf/kutf_helpers_user.c
+++ b/mali_kbase/tests/kutf/kutf_helpers_user.c
@@ -368,7 +368,7 @@ int kutf_helper_receive_named_val(
 		named_val->u.val_str = strval;
 		break;
 	default:
-		pr_err("Unreachable, fix kutf_helper_receive_named_val\n");
+		pr_err("Unreachable, fix %s\n", __func__);
 		/* Coding error, report as though 'run' file failed */
 		return -EINVAL;
 	}
diff --git a/mali_kbase/tests/kutf/kutf_resultset.c b/mali_kbase/tests/kutf/kutf_resultset.c
index c7572bd..3a7ade2 100644
--- a/mali_kbase/tests/kutf/kutf_resultset.c
+++ b/mali_kbase/tests/kutf/kutf_resultset.c
@@ -89,7 +89,7 @@ int kutf_add_result(struct kutf_context *context,
 void kutf_destroy_result_set(struct kutf_result_set *set)
 {
 	if (!list_empty(&set->results))
-		pr_err("kutf_destroy_result_set: Unread results from test\n");
+		pr_err("%s: Unread results from test\n", __func__);
 
 	kfree(set);
 }
diff --git a/mali_kbase/tests/kutf/kutf_suite.c b/mali_kbase/tests/kutf/kutf_suite.c
index d45d9df..91065b5 100644
--- a/mali_kbase/tests/kutf/kutf_suite.c
+++ b/mali_kbase/tests/kutf/kutf_suite.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014, 2017-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2017-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -319,7 +319,8 @@ static void kutf_run_test(struct work_struct *data)
 }
 
 /**
- * kutf_debugfs_run_open() Debugfs open callback for the "run" entry.
+ * kutf_debugfs_run_open() - Debugfs open callback for the "run" entry.
+ *
  * @inode:	inode of the opened file
  * @file:	Opened file to read from
  *
@@ -493,7 +494,7 @@ exit:
 }
 
 /**
- * kutf_debugfs_run_write() Debugfs write callback for the "run" entry.
+ * kutf_debugfs_run_write() - Debugfs write callback for the "run" entry.
  * @file:	Opened file to write to
  * @buf:	User buffer to read the data from
  * @len:	Amount of data to write
@@ -589,7 +590,7 @@ static int create_fixture_variant(struct kutf_test_function *test_func,
 		goto fail_dir;
 	}
 
-	tmp = debugfs_create_file("type", S_IROTH, test_fix->dir, "fixture\n",
+	tmp = debugfs_create_file("type", 0004, test_fix->dir, "fixture\n",
 				  &kutf_debugfs_const_string_ops);
 	if (IS_ERR_OR_NULL(tmp)) {
 		pr_err("Failed to create debugfs file \"type\" when adding fixture\n");
@@ -671,7 +672,7 @@ void kutf_add_test_with_filters_and_data(
 		goto fail_dir;
 	}
 
-	tmp = debugfs_create_file("type", S_IROTH, test_func->dir, "test\n",
+	tmp = debugfs_create_file("type", 0004, test_func->dir, "test\n",
 				  &kutf_debugfs_const_string_ops);
 	if (IS_ERR_OR_NULL(tmp)) {
 		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
@@ -680,10 +681,10 @@ void kutf_add_test_with_filters_and_data(
 
 	test_func->filters = filters;
 #if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
-	tmp = debugfs_create_file_unsafe("filters", S_IROTH, test_func->dir,
+	tmp = debugfs_create_file_unsafe("filters", 0004, test_func->dir,
 					 &test_func->filters, &kutfp_fops_x32_ro);
 #else
-	tmp = debugfs_create_x32("filters", S_IROTH, test_func->dir,
+	tmp = debugfs_create_x32("filters", 0004, test_func->dir,
 				 &test_func->filters);
 #endif
 	if (IS_ERR_OR_NULL(tmp)) {
@@ -693,10 +694,10 @@ void kutf_add_test_with_filters_and_data(
 
 	test_func->test_id = id;
 #if KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE
-	debugfs_create_u32("test_id", S_IROTH, test_func->dir,
-                       &test_func->test_id);
+	debugfs_create_u32("test_id", 0004, test_func->dir,
+		&test_func->test_id);
 #else
-	tmp = debugfs_create_u32("test_id", S_IROTH, test_func->dir,
+	tmp = debugfs_create_u32("test_id", 0004, test_func->dir,
 				 &test_func->test_id);
 	if (IS_ERR_OR_NULL(tmp)) {
 		pr_err("Failed to create debugfs file \"test_id\" when adding test %s\n", name);
@@ -766,7 +767,7 @@ void kutf_add_test(struct kutf_suite *suite,
 EXPORT_SYMBOL(kutf_add_test);
 
 /**
- * kutf_remove_test(): Remove a previously added test function.
+ * kutf_remove_test() - Remove a previously added test function.
  * @test_func: Test function
  */
 static void kutf_remove_test(struct kutf_test_function *test_func)
@@ -810,7 +811,7 @@ struct kutf_suite *kutf_create_suite_with_filters_and_data(
 		goto fail_debugfs;
 	}
 
-	tmp = debugfs_create_file("type", S_IROTH, suite->dir, "suite\n",
+	tmp = debugfs_create_file("type", 0004, suite->dir, "suite\n",
 				  &kutf_debugfs_const_string_ops);
 	if (IS_ERR_OR_NULL(tmp)) {
 		pr_err("Failed to create debugfs file \"type\" when adding test %s\n", name);
@@ -918,7 +919,7 @@ struct kutf_application *kutf_create_application(const char *name)
 		goto fail_debugfs;
 	}
 
-	tmp = debugfs_create_file("type", S_IROTH, app->dir, "application\n",
+	tmp = debugfs_create_file("type", 0004, app->dir, "application\n",
 				  &kutf_debugfs_const_string_ops);
 	if (IS_ERR_OR_NULL(tmp)) {
 		pr_err("Failed to create debugfs file \"type\" when creating application %s\n", name);
@@ -1162,8 +1163,9 @@ EXPORT_SYMBOL(kutf_test_abort);
 
 /**
  * init_kutf_core() - Module entry point.
- *
  * Create the base entry point in debugfs.
+ *
+ * Return: 0 on success, error code otherwise.
  */
 static int __init init_kutf_core(void)
 {
@@ -1197,9 +1199,10 @@ static void __exit exit_kutf_core(void)
 #else	/* CONFIG_DEBUG_FS */
 
 /**
- * init_kutf_core() - Module entry point.
+ * init_kutf_core - Module entry point
+ * Stub for when build against a kernel without debugfs support.
  *
- * Stub for when build against a kernel without debugfs support
+ * Return: -ENODEV
  */
 static int __init init_kutf_core(void)
 {
diff --git a/mali_kbase/tests/kutf/kutf_utils.c b/mali_kbase/tests/kutf/kutf_utils.c
index c0fb3ba..2ae1510 100644
--- a/mali_kbase/tests/kutf/kutf_utils.c
+++ b/mali_kbase/tests/kutf/kutf_utils.c
@@ -47,12 +47,12 @@ const char *kutf_dsprintf(struct kutf_mempool *pool,
 	va_end(args);
 
 	if (len < 0) {
-		pr_err("kutf_dsprintf: Bad format dsprintf format %s\n", fmt);
+		pr_err("%s: Bad format dsprintf format %s\n", __func__, fmt);
 		goto fail_format;
 	}
 
 	if (len >= sizeof(tmp_buffer)) {
-		pr_warn("kutf_dsprintf: Truncated dsprintf message %s\n", fmt);
+		pr_warn("%s: Truncated dsprintf message %s\n", __func__, fmt);
 		size = sizeof(tmp_buffer);
 	} else {
 		size = len + 1;
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index 87bcb31..935f8ca 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -195,7 +195,7 @@ static void kutf_set_pm_ctx_idle(struct kutf_context *context)
 	kbase_pm_context_idle(data->kbdev);
 }
 
-static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context,
+static const char *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context,
 				struct clk_trace_portal_input *cmd)
 {
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
@@ -232,7 +232,7 @@ static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context,
 	return errmsg;
 }
 
-static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context,
+static const char *kutf_clk_trace_do_get_rate(struct kutf_context *context,
 				struct clk_trace_portal_input *cmd)
 {
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
@@ -293,8 +293,10 @@ static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context,
  * current snapshot record, and the start of the next one. The response
  * message contains the current snapshot record, with each clock's
  * data sequentially placed inside (array marker) [ ].
+ *
+ * Return: generated string
  */
-static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context,
+static const char *kutf_clk_trace_do_get_snapshot(struct kutf_context *context,
 				struct clk_trace_portal_input *cmd)
 {
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
@@ -351,8 +353,10 @@ static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context,
  *
  * Invokes frequency change notification callbacks with a fake
  * GPU frequency 42 kHz for the top clock domain.
+ *
+ * Return: generated string
  */
-static char const *kutf_clk_trace_do_invoke_notify_42k(
+static const char *kutf_clk_trace_do_invoke_notify_42k(
 	struct kutf_context *context,
 	struct clk_trace_portal_input *cmd)
 {
@@ -392,7 +396,7 @@ static char const *kutf_clk_trace_do_invoke_notify_42k(
 	return errmsg;
 }
 
-static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context,
+static const char *kutf_clk_trace_do_close_portal(struct kutf_context *context,
 				struct clk_trace_portal_input *cmd)
 {
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
@@ -426,7 +430,7 @@ static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context,
  *
  * Return: A string to indicate the platform (PV/PTM/GPU/UNKNOWN)
  */
-static char const *kutf_clk_trace_do_get_platform(
+static const char *kutf_clk_trace_do_get_platform(
 	struct kutf_context *context,
 	struct clk_trace_portal_input *cmd)
 {
@@ -570,6 +574,8 @@ static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context,
  *
  * This function deal with an erroneous input request, and respond with
  * a proper 'NACK' message.
+ *
+ * Return: 0 on success, non-zero on failure
  */
 static int kutf_clk_trace_do_nack_response(struct kutf_context *context,
 				struct clk_trace_portal_input *cmd)
@@ -871,8 +877,8 @@ static void *mali_kutf_clk_rate_trace_create_fixture(
 }
 
 /**
- * Destroy fixture data previously created by
- * mali_kutf_clk_rate_trace_create_fixture.
+ * mali_kutf_clk_rate_trace_remove_fixture - Destroy fixture data previously created by
+ *                                           mali_kutf_clk_rate_trace_create_fixture.
  *
  * @context:             KUTF context.
  */
@@ -896,6 +902,8 @@ static void mali_kutf_clk_rate_trace_remove_fixture(
 
 /**
  * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule.
+ *
+ * Return: 0 on success, error code otherwise
  */
 static int __init mali_kutf_clk_rate_trace_test_module_init(void)
 {
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
index f37efa8..a716b9f 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -88,8 +88,10 @@ enum kbasep_clk_rate_trace_req {
 };
 
 /**
- * Portal service request command names. The portal request consists of a kutf
- * named u64-value. For those above enumerated PORTAL_CMD, the names defined
+ * DOC: Portal service request command names.
+ *
+ * The portal request consists of a kutf named u64-value.
+ * For those above enumerated PORTAL_CMD, the names defined
  * here are used to mark the name and then followed with a sequence number
  * value. Example (manual script here for illustration):
  *   exec 5<>run                   # open the portal kutf run as fd-5
@@ -134,9 +136,10 @@ enum kbasep_clk_rate_trace_req {
 #define INVOKE_NOTIFY_42KHZ  "INVOKE_NOTIFY_42KHZ"
 
 /**
- * Portal service response tag names. The response consists of a kutf
- * named string-value. In case of a 'NACK' (negative acknowledgement), it
- * can be one of the two formats:
+ * DOC: Portal service response tag names.
+ *
+ * The response consists of a kutf named string-value.
+ * In case of a 'NACK' (negative acknowledgment), it can be one of the two formats:
  *   1. NACK="{SEQ:2, MSG:xyzed}"     # NACK on command with sequence tag-2.
  *      Note, the portal has received a valid name and valid sequence number
  *            but can't carry-out the request, reason in the MSG field.
diff --git a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
index 7799a68..5824a4c 100644
--- a/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
+++ b/mali_kbase/tests/mali_kutf_irq_test/mali_kutf_irq_test_main.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2016-2018, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2016-2018, 2020-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,7 +43,7 @@
 struct kutf_application *irq_app;
 
 /**
- * struct kutf_irq_fixture data - test fixture used by the test functions.
+ * struct kutf_irq_fixture_data - test fixture used by the test functions.
  * @kbdev:	kbase device for the GPU.
  *
  */
@@ -234,7 +234,9 @@ static void mali_kutf_irq_latency(struct kutf_context *context)
 }
 
 /**
- * Module entry point for this test.
+ * mali_kutf_irq_test_main_init - Module entry point for this test.
+ *
+ * Return: 0 on success, error code otherwise
  */
 static int __init mali_kutf_irq_test_main_init(void)
 {
@@ -263,7 +265,7 @@ static int __init mali_kutf_irq_test_main_init(void)
 }
 
 /**
- * Module exit point for this test.
+ * mali_kutf_irq_test_main_exit - Module exit point for this test.
  */
 static void __exit mali_kutf_irq_test_main_exit(void)
 {
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index de1199a..34d2223 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -127,7 +127,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
  *
  * Return: address of the found gap end (high limit) if area is found;
  *         -ENOMEM if search is unsuccessful
-*/
+ */
 
 static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
 		*info, bool is_shader_code, bool is_same_4gb_page)
@@ -301,45 +301,45 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 #endif /* CONFIG_64BIT */
 	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
 		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
-			int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
-			struct kbase_va_region *reg;
-
-			/* Need to hold gpu vm lock when using reg */
-			kbase_gpu_vm_lock(kctx);
-			reg = kctx->pending_regions[cookie];
-			if (!reg) {
-				kbase_gpu_vm_unlock(kctx);
-				return -EINVAL;
+		int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
+		struct kbase_va_region *reg;
+
+		/* Need to hold gpu vm lock when using reg */
+		kbase_gpu_vm_lock(kctx);
+		reg = kctx->pending_regions[cookie];
+		if (!reg) {
+			kbase_gpu_vm_unlock(kctx);
+			return -EINVAL;
+		}
+		if (!(reg->flags & KBASE_REG_GPU_NX)) {
+			if (cpu_va_bits > gpu_pc_bits) {
+				align_offset = 1ULL << gpu_pc_bits;
+				align_mask = align_offset - 1;
+				is_shader_code = true;
 			}
-			if (!(reg->flags & KBASE_REG_GPU_NX)) {
-				if (cpu_va_bits > gpu_pc_bits) {
-					align_offset = 1ULL << gpu_pc_bits;
-					align_mask = align_offset - 1;
-					is_shader_code = true;
-				}
 #if !MALI_USE_CSF
-			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
-				unsigned long extension_bytes =
-					(unsigned long)(reg->extension
-							<< PAGE_SHIFT);
-				/* kbase_check_alloc_sizes() already satisfies
-				 * these checks, but they're here to avoid
-				 * maintenance hazards due to the assumptions
-				 * involved
-				 */
-				WARN_ON(reg->extension >
-					(ULONG_MAX >> PAGE_SHIFT));
-				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
-				WARN_ON(!is_power_of_2(extension_bytes));
-				align_mask = extension_bytes - 1;
-				align_offset =
-					extension_bytes -
-					(reg->initial_commit << PAGE_SHIFT);
+		} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
+			unsigned long extension_bytes =
+				(unsigned long)(reg->extension
+						<< PAGE_SHIFT);
+			/* kbase_check_alloc_sizes() already satisfies
+			 * these checks, but they're here to avoid
+			 * maintenance hazards due to the assumptions
+			 * involved
+			 */
+			WARN_ON(reg->extension >
+				(ULONG_MAX >> PAGE_SHIFT));
+			WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
+			WARN_ON(!is_power_of_2(extension_bytes));
+			align_mask = extension_bytes - 1;
+			align_offset =
+				extension_bytes -
+				(reg->initial_commit << PAGE_SHIFT);
 #endif /* !MALI_USE_CSF */
-			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
-				is_same_4gb_page = true;
-			}
-			kbase_gpu_vm_unlock(kctx);
+		} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
+			is_same_4gb_page = true;
+		}
+		kbase_gpu_vm_unlock(kctx);
 #ifndef CONFIG_64BIT
 	} else {
 		return current->mm->get_unmapped_area(
diff --git a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
index 567c5f1..a6062f1 100644
--- a/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
+++ b/mali_kbase/tl/backend/mali_kbase_timeline_csf.c
@@ -164,7 +164,7 @@ void kbase_create_timeline_objects(struct kbase_device *kbdev)
 
 			if (kcpu_queue)
 				__kbase_tlstream_tl_kbase_new_kcpuqueue(
-					body, kcpu_queue, kcpu_queue->kctx->id,
+					body, kcpu_queue, kcpu_queue->id, kcpu_queue->kctx->id,
 					kcpu_queue->num_pending_cmds);
 		}
 
diff --git a/mali_kbase/tl/mali_kbase_timeline.c b/mali_kbase/tl/mali_kbase_timeline.c
index af10cf5..d656c03 100644
--- a/mali_kbase/tl/mali_kbase_timeline.c
+++ b/mali_kbase/tl/mali_kbase_timeline.c
@@ -197,8 +197,7 @@ int kbase_timeline_io_acquire(struct kbase_device *kbdev, u32 flags)
 		if (flags & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS) {
 			ret = kbase_csf_tl_reader_start(
 				&timeline->csf_tl_reader, kbdev);
-			if (ret)
-			{
+			if (ret) {
 				atomic_set(timeline->timeline_flags, 0);
 				return ret;
 			}
diff --git a/mali_kbase/tl/mali_kbase_timeline.h b/mali_kbase/tl/mali_kbase_timeline.h
index 63926eb..96a4b18 100644
--- a/mali_kbase/tl/mali_kbase_timeline.h
+++ b/mali_kbase/tl/mali_kbase_timeline.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -76,9 +76,9 @@ int kbase_timeline_streams_flush(struct kbase_timeline *timeline);
 
 /**
  * kbase_timeline_streams_body_reset - reset timeline body streams.
+ * @timeline:     Timeline instance
  *
  * Function will discard pending data in all timeline body streams.
- * @timeline:     Timeline instance
  */
 void kbase_timeline_streams_body_reset(struct kbase_timeline *timeline);
 
diff --git a/mali_kbase/tl/mali_kbase_timeline_io.c b/mali_kbase/tl/mali_kbase_timeline_io.c
index 23e42ad..3391e75 100644
--- a/mali_kbase/tl/mali_kbase_timeline_io.c
+++ b/mali_kbase/tl/mali_kbase_timeline_io.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -115,7 +115,7 @@ static int kbasep_timeline_has_header_data(struct kbase_timeline *timeline)
  * @hdr_size:    Header size.
  * @hdr_btc:     Pointer to the remaining number of bytes to copy.
  *
- * Returns: 0 if success, -1 otherwise.
+ * Return: 0 if success, -1 otherwise.
  */
 static inline int copy_stream_header(char __user *buffer, size_t size,
 				     ssize_t *copy_len, const char *hdr,
@@ -152,7 +152,7 @@ static inline int copy_stream_header(char __user *buffer, size_t size,
  * to the user, and if so, sends them. copy_len is respectively
  * updated.
  *
- * Returns: 0 if success, -1 if copy_to_user has failed.
+ * Return: 0 if success, -1 if copy_to_user has failed.
  */
 static inline int kbasep_timeline_copy_headers(struct kbase_timeline *timeline,
 					       char __user *buffer, size_t size,
@@ -289,6 +289,7 @@ static ssize_t kbasep_timeline_io_read(struct file *filp, char __user *buffer,
  * kbasep_timeline_io_poll - poll timeline stream for packets
  * @filp: Pointer to file structure
  * @wait: Pointer to poll table
+ *
  * Return: POLLIN if data can be read without blocking, otherwise zero
  */
 static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
@@ -320,7 +321,7 @@ static unsigned int kbasep_timeline_io_poll(struct file *filp, poll_table *wait)
  * @inode: Pointer to inode structure
  * @filp:  Pointer to file structure
  *
- * Return always return zero
+ * Return: always return zero
  */
 static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
 {
@@ -344,7 +345,7 @@ static int kbasep_timeline_io_release(struct inode *inode, struct file *filp)
 	elapsed_time = ktime_sub(ktime_get(), timeline->last_acquire_time);
 	elapsed_time_ms = ktime_to_ms(elapsed_time);
 	time_to_sleep = MIN(TIMELINE_HYSTERESIS_TIMEOUT_MS,
-	                    TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
+		TIMELINE_HYSTERESIS_TIMEOUT_MS - elapsed_time_ms);
 	if (time_to_sleep > 0)
 		msleep(time_to_sleep);
 
diff --git a/mali_kbase/tl/mali_kbase_tl_serialize.h b/mali_kbase/tl/mali_kbase_tl_serialize.h
index 30d120d..b6aaade 100644
--- a/mali_kbase/tl/mali_kbase_tl_serialize.h
+++ b/mali_kbase/tl/mali_kbase_tl_serialize.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,13 +32,13 @@
 /**
  * kbasep_serialize_bytes - serialize bytes to the message buffer
  *
- * Serialize bytes as is using memcpy()
- *
  * @buffer:    Message buffer
  * @pos:       Message buffer offset
  * @bytes:     Bytes to serialize
  * @len:       Length of bytes array
  *
+ * Serialize bytes as if using memcpy().
+ *
  * Return: updated position in the buffer
  */
 static inline size_t kbasep_serialize_bytes(
@@ -58,14 +58,14 @@ static inline size_t kbasep_serialize_bytes(
 /**
  * kbasep_serialize_string - serialize string to the message buffer
  *
- * String is serialized as 4 bytes for string size,
- * then string content and then null terminator.
- *
  * @buffer:         Message buffer
  * @pos:            Message buffer offset
  * @string:         String to serialize
  * @max_write_size: Number of bytes that can be stored in buffer
  *
+ * String is serialized as 4 bytes for string size,
+ * then string content and then null terminator.
+ *
  * Return: updated position in the buffer
  */
 static inline size_t kbasep_serialize_string(
@@ -84,7 +84,7 @@ static inline size_t kbasep_serialize_string(
 	KBASE_DEBUG_ASSERT(max_write_size >= sizeof(string_len) + sizeof(char));
 	max_write_size -= sizeof(string_len);
 
-	string_len = strlcpy(
+	string_len = strscpy(
 			&buffer[pos + sizeof(string_len)],
 			string,
 			max_write_size);
@@ -102,12 +102,12 @@ static inline size_t kbasep_serialize_string(
 /**
  * kbasep_serialize_timestamp - serialize timestamp to the message buffer
  *
- * Get current timestamp using kbasep_get_timestamp()
- * and serialize it as 64 bit unsigned integer.
- *
  * @buffer: Message buffer
  * @pos:    Message buffer offset
  *
+ * Get current timestamp using kbasep_get_timestamp()
+ * and serialize it as 64 bit unsigned integer.
+ *
  * Return: updated position in the buffer
  */
 static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos)
@@ -121,4 +121,3 @@ static inline size_t kbasep_serialize_timestamp(void *buffer, size_t pos)
 			&timestamp, sizeof(timestamp));
 }
 #endif /* _KBASE_TL_SERIALIZE_H */
-
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index abbed05..6aae4e0 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /*
- * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py.
  * DO NOT EDIT.
  */
 
@@ -30,7 +30,7 @@
 
 /* clang-format off */
 
-/* Message ids of trace events that are recorded in the timeline stream. */
+/* Message ids of trace events that are recorded in the obj stream. */
 enum tl_msg_id_obj {
 	KBASE_TL_NEW_CTX,
 	KBASE_TL_NEW_GPU,
@@ -52,10 +52,6 @@ enum tl_msg_id_obj {
 	KBASE_TL_RET_ATOM_AS,
 	KBASE_TL_NRET_ATOM_AS,
 	KBASE_TL_ATTRIB_ATOM_CONFIG,
-	KBASE_TL_ATTRIB_ATOM_PRIORITY,
-	KBASE_TL_ATTRIB_ATOM_STATE,
-	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
-	KBASE_TL_ATTRIB_ATOM_JIT,
 	KBASE_TL_JIT_USEDPAGES,
 	KBASE_TL_ATTRIB_ATOM_JITALLOCINFO,
 	KBASE_TL_ATTRIB_ATOM_JITFREEINFO,
@@ -71,6 +67,22 @@ enum tl_msg_id_obj {
 	KBASE_TL_ARBITER_STOPPED,
 	KBASE_TL_ARBITER_REQUESTED,
 	KBASE_JD_GPU_SOFT_RESET,
+	KBASE_JD_TILER_HEAP_CHUNK_ALLOC,
+	KBASE_TL_JS_SCHED_START,
+	KBASE_TL_JS_SCHED_END,
+	KBASE_TL_JD_SUBMIT_ATOM_START,
+	KBASE_TL_JD_SUBMIT_ATOM_END,
+	KBASE_TL_JD_DONE_NO_LOCK_START,
+	KBASE_TL_JD_DONE_NO_LOCK_END,
+	KBASE_TL_JD_DONE_START,
+	KBASE_TL_JD_DONE_END,
+	KBASE_TL_JD_ATOM_COMPLETE,
+	KBASE_TL_RUN_ATOM_START,
+	KBASE_TL_RUN_ATOM_END,
+	KBASE_TL_ATTRIB_ATOM_PRIORITY,
+	KBASE_TL_ATTRIB_ATOM_STATE,
+	KBASE_TL_ATTRIB_ATOM_PRIORITIZED,
+	KBASE_TL_ATTRIB_ATOM_JIT,
 	KBASE_TL_KBASE_NEW_DEVICE,
 	KBASE_TL_KBASE_DEVICE_PROGRAM_CSG,
 	KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG,
@@ -128,37 +140,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_CSFFW_FW_DISABLING,
 	KBASE_TL_KBASE_CSFFW_FW_OFF,
 	KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
-	KBASE_TL_JS_SCHED_START,
-	KBASE_TL_JS_SCHED_END,
-	KBASE_TL_JD_SUBMIT_ATOM_START,
-	KBASE_TL_JD_SUBMIT_ATOM_END,
-	KBASE_TL_JD_DONE_NO_LOCK_START,
-	KBASE_TL_JD_DONE_NO_LOCK_END,
-	KBASE_TL_JD_DONE_START,
-	KBASE_TL_JD_DONE_END,
-	KBASE_TL_JD_ATOM_COMPLETE,
-	KBASE_TL_RUN_ATOM_START,
-	KBASE_TL_RUN_ATOM_END,
 	KBASE_OBJ_MSG_COUNT,
 };
 
-/* Message ids of trace events that are recorded in the auxiliary stream. */
-enum tl_msg_id_aux {
-	KBASE_AUX_PM_STATE,
-	KBASE_AUX_PAGEFAULT,
-	KBASE_AUX_PAGESALLOC,
-	KBASE_AUX_DEVFREQ_TARGET,
-	KBASE_AUX_PROTECTED_ENTER_START,
-	KBASE_AUX_PROTECTED_ENTER_END,
-	KBASE_AUX_PROTECTED_LEAVE_START,
-	KBASE_AUX_PROTECTED_LEAVE_END,
-	KBASE_AUX_JIT_STATS,
-	KBASE_AUX_TILER_HEAP_STATS,
-	KBASE_AUX_EVENT_JOB_SLOT,
-	KBASE_AUX_MMU_COMMAND,
-	KBASE_AUX_MSG_COUNT,
-};
-
 #define OBJ_TP_LIST \
 	TRACEPOINT_DESC(KBASE_TL_NEW_CTX, \
 		"object ctx is created", \
@@ -240,22 +224,6 @@ enum tl_msg_id_aux {
 		"atom job slot attributes", \
 		"@pLLI", \
 		"atom,descriptor,affinity,config") \
-	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
-		"atom priority", \
-		"@pI", \
-		"atom,prio") \
-	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
-		"atom state", \
-		"@pI", \
-		"atom,state") \
-	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
-		"atom caused priority change", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
-		"jit done for atom", \
-		"@pLLILILLL", \
-		"atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \
 	TRACEPOINT_DESC(KBASE_TL_JIT_USEDPAGES, \
 		"used pages for jit", \
 		"@LI", \
@@ -316,6 +284,70 @@ enum tl_msg_id_aux {
 		"gpu soft reset", \
 		"@p", \
 		"gpu") \
+	TRACEPOINT_DESC(KBASE_JD_TILER_HEAP_CHUNK_ALLOC, \
+		"Tiler Heap Chunk Allocation", \
+		"@ILL", \
+		"ctx_nr,heap_id,chunk_va") \
+	TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \
+		"Scheduling starts", \
+		"@I", \
+		"dummy") \
+	TRACEPOINT_DESC(KBASE_TL_JS_SCHED_END, \
+		"Scheduling ends", \
+		"@I", \
+		"dummy") \
+	TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_START, \
+		"Submitting an atom starts", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_END, \
+		"Submitting an atom ends", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \
+		"Within function jd_done_nolock", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \
+		"Within function jd_done_nolock - end", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \
+		"Start of kbase_jd_done", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_DONE_END, \
+		"End of kbase_jd_done", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_JD_ATOM_COMPLETE, \
+		"Atom marked complete", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_START, \
+		"Running of atom starts", \
+		"@pI", \
+		"atom,atom_nr") \
+	TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_END, \
+		"Running of atom ends", \
+		"@pI", \
+		"atom,atom_nr") \
+	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITY, \
+		"atom priority", \
+		"@pI", \
+		"atom,prio") \
+	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_STATE, \
+		"atom state", \
+		"@pI", \
+		"atom,state") \
+	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_PRIORITIZED, \
+		"atom caused priority change", \
+		"@p", \
+		"atom") \
+	TRACEPOINT_DESC(KBASE_TL_ATTRIB_ATOM_JIT, \
+		"jit done for atom", \
+		"@pLLILILLL", \
+		"atom,edit_addr,new_addr,jit_flags,mem_flags,j_id,com_pgs,extent,va_pgs") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_DEVICE, \
 		"New KBase Device", \
 		"@IIIIIII", \
@@ -350,8 +382,8 @@ enum tl_msg_id_aux {
 		"kernel_ctx_id") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_NEW_KCPUQUEUE, \
 		"New KCPU Queue", \
-		"@pII", \
-		"kcpu_queue,kernel_ctx_id,kcpuq_num_pending_cmds") \
+		"@pIII", \
+		"kcpu_queue,kcpu_queue_id,kernel_ctx_id,kcpuq_num_pending_cmds") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_DEL_KCPUQUEUE, \
 		"Delete KCPU Queue", \
 		"@p", \
@@ -543,63 +575,36 @@ enum tl_msg_id_aux {
 	TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \
 		"An overflow has happened with the CSFFW Timeline stream", \
 		"@LL", \
-		"csffw_timestamp,csffw_cycle") \
-	TRACEPOINT_DESC(KBASE_TL_JS_SCHED_START, \
-		"Scheduling starts", \
-		"@I", \
-		"dummy") \
-	TRACEPOINT_DESC(KBASE_TL_JS_SCHED_END, \
-		"Scheduling ends", \
-		"@I", \
-		"dummy") \
-	TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_START, \
-		"Submitting an atom starts", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_SUBMIT_ATOM_END, \
-		"Submitting an atom ends", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_START, \
-		"Within function jd_done_nolock", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_DONE_NO_LOCK_END, \
-		"Within function jd_done_nolock - end", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_DONE_START, \
-		"Start of kbase_jd_done", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_DONE_END, \
-		"End of kbase_jd_done", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_JD_ATOM_COMPLETE, \
-		"Atom marked complete", \
-		"@p", \
-		"atom") \
-	TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_START, \
-		"Running of atom starts", \
-		"@pI", \
-		"atom,atom_nr") \
-	TRACEPOINT_DESC(KBASE_TL_RUN_ATOM_END, \
-		"Running of atom ends", \
-		"@pI", \
-		"atom,atom_nr") \
+		"csffw_timestamp,csffw_cycle")
 
-#define MIPE_HEADER_BLOB_VAR_NAME		__obj_desc_header
-#define MIPE_HEADER_STREAM_ID			TL_STREAM_ID_KERNEL
-#define MIPE_HEADER_PKT_CLASS			TL_PACKET_CLASS_OBJ
-#define MIPE_HEADER_TRACEPOINT_LIST		OBJ_TP_LIST
-#define MIPE_HEADER_TRACEPOINT_LIST_SIZE	KBASE_OBJ_MSG_COUNT
+#define MIPE_HEADER_BLOB_VAR_NAME        __obj_desc_header
+#define MIPE_HEADER_STREAM_ID            TL_STREAM_ID_KERNEL
+#define MIPE_HEADER_PKT_CLASS            TL_PACKET_CLASS_OBJ
+#define MIPE_HEADER_TRACEPOINT_LIST      OBJ_TP_LIST
+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_OBJ_MSG_COUNT
 
 #include "mali_kbase_mipe_gen_header.h"
 
 const char   *obj_desc_header = (const char *) &__obj_desc_header;
 const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
 
+/* Message ids of trace events that are recorded in the aux stream. */
+enum tl_msg_id_aux {
+	KBASE_AUX_PM_STATE,
+	KBASE_AUX_PAGEFAULT,
+	KBASE_AUX_PAGESALLOC,
+	KBASE_AUX_DEVFREQ_TARGET,
+	KBASE_AUX_JIT_STATS,
+	KBASE_AUX_TILER_HEAP_STATS,
+	KBASE_AUX_EVENT_JOB_SLOT,
+	KBASE_AUX_PROTECTED_ENTER_START,
+	KBASE_AUX_PROTECTED_ENTER_END,
+	KBASE_AUX_MMU_COMMAND,
+	KBASE_AUX_PROTECTED_LEAVE_START,
+	KBASE_AUX_PROTECTED_LEAVE_END,
+	KBASE_AUX_MSG_COUNT,
+};
+
 #define AUX_TP_LIST \
 	TRACEPOINT_DESC(KBASE_AUX_PM_STATE, \
 		"PM state", \
@@ -617,22 +622,6 @@ const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
 		"New device frequency target", \
 		"@L", \
 		"target_freq") \
-	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
-		"enter protected mode start", \
-		"@p", \
-		"gpu") \
-	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
-		"enter protected mode end", \
-		"@p", \
-		"gpu") \
-	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
-		"leave protected mode start", \
-		"@p", \
-		"gpu") \
-	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
-		"leave protected mode end", \
-		"@p", \
-		"gpu") \
 	TRACEPOINT_DESC(KBASE_AUX_JIT_STATS, \
 		"per-bin JIT statistics", \
 		"@IIIIII", \
@@ -645,16 +634,32 @@ const size_t  obj_desc_header_size = sizeof(__obj_desc_header);
 		"event on a given job slot", \
 		"@pIII", \
 		"ctx,slot_nr,atom_nr,event") \
+	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_START, \
+		"enter protected mode start", \
+		"@p", \
+		"gpu") \
+	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_ENTER_END, \
+		"enter protected mode end", \
+		"@p", \
+		"gpu") \
 	TRACEPOINT_DESC(KBASE_AUX_MMU_COMMAND, \
 		"mmu commands with synchronicity info", \
 		"@IIILI", \
 		"kernel_ctx_id,mmu_cmd_id,mmu_synchronicity,mmu_lock_addr,mmu_lock_page_num") \
+	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_START, \
+		"leave protected mode start", \
+		"@p", \
+		"gpu") \
+	TRACEPOINT_DESC(KBASE_AUX_PROTECTED_LEAVE_END, \
+		"leave protected mode end", \
+		"@p", \
+		"gpu")
 
-#define MIPE_HEADER_BLOB_VAR_NAME		__aux_desc_header
-#define MIPE_HEADER_STREAM_ID        		TL_STREAM_ID_KERNEL
-#define MIPE_HEADER_PKT_CLASS        		TL_PACKET_CLASS_AUX
-#define MIPE_HEADER_TRACEPOINT_LIST		AUX_TP_LIST
-#define MIPE_HEADER_TRACEPOINT_LIST_SIZE	KBASE_AUX_MSG_COUNT
+#define MIPE_HEADER_BLOB_VAR_NAME        __aux_desc_header
+#define MIPE_HEADER_STREAM_ID            TL_STREAM_ID_KERNEL
+#define MIPE_HEADER_PKT_CLASS            TL_PACKET_CLASS_AUX
+#define MIPE_HEADER_TRACEPOINT_LIST      AUX_TP_LIST
+#define MIPE_HEADER_TRACEPOINT_LIST_SIZE KBASE_AUX_MSG_COUNT
 
 #include "mali_kbase_mipe_gen_header.h"
 
@@ -665,7 +670,8 @@ void __kbase_tlstream_tl_new_ctx(
 	struct kbase_tlstream *stream,
 	const void *ctx,
 	u32 ctx_nr,
-	u32 tgid)
+	u32 tgid
+)
 {
 	const u32 msg_id = KBASE_TL_NEW_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -695,7 +701,8 @@ void __kbase_tlstream_tl_new_gpu(
 	struct kbase_tlstream *stream,
 	const void *gpu,
 	u32 gpu_id,
-	u32 core_count)
+	u32 core_count
+)
 {
 	const u32 msg_id = KBASE_TL_NEW_GPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -725,7 +732,8 @@ void __kbase_tlstream_tl_new_lpu(
 	struct kbase_tlstream *stream,
 	const void *lpu,
 	u32 lpu_nr,
-	u32 lpu_fn)
+	u32 lpu_fn
+)
 {
 	const u32 msg_id = KBASE_TL_NEW_LPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -754,7 +762,8 @@ void __kbase_tlstream_tl_new_lpu(
 void __kbase_tlstream_tl_new_atom(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	u32 atom_nr)
+	u32 atom_nr
+)
 {
 	const u32 msg_id = KBASE_TL_NEW_ATOM;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -780,7 +789,8 @@ void __kbase_tlstream_tl_new_atom(
 void __kbase_tlstream_tl_new_as(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	u32 as_nr)
+	u32 as_nr
+)
 {
 	const u32 msg_id = KBASE_TL_NEW_AS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -805,7 +815,8 @@ void __kbase_tlstream_tl_new_as(
 
 void __kbase_tlstream_tl_del_ctx(
 	struct kbase_tlstream *stream,
-	const void *ctx)
+	const void *ctx
+)
 {
 	const u32 msg_id = KBASE_TL_DEL_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -827,7 +838,8 @@ void __kbase_tlstream_tl_del_ctx(
 
 void __kbase_tlstream_tl_del_atom(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *atom
+)
 {
 	const u32 msg_id = KBASE_TL_DEL_ATOM;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -850,7 +862,8 @@ void __kbase_tlstream_tl_del_atom(
 void __kbase_tlstream_tl_lifelink_lpu_gpu(
 	struct kbase_tlstream *stream,
 	const void *lpu,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_LIFELINK_LPU_GPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -876,7 +889,8 @@ void __kbase_tlstream_tl_lifelink_lpu_gpu(
 void __kbase_tlstream_tl_lifelink_as_gpu(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_LIFELINK_AS_GPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -902,7 +916,8 @@ void __kbase_tlstream_tl_lifelink_as_gpu(
 void __kbase_tlstream_tl_ret_ctx_lpu(
 	struct kbase_tlstream *stream,
 	const void *ctx,
-	const void *lpu)
+	const void *lpu
+)
 {
 	const u32 msg_id = KBASE_TL_RET_CTX_LPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -928,7 +943,8 @@ void __kbase_tlstream_tl_ret_ctx_lpu(
 void __kbase_tlstream_tl_ret_atom_ctx(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *ctx)
+	const void *ctx
+)
 {
 	const u32 msg_id = KBASE_TL_RET_ATOM_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -955,15 +971,16 @@ void __kbase_tlstream_tl_ret_atom_lpu(
 	struct kbase_tlstream *stream,
 	const void *atom,
 	const void *lpu,
-	const char *attrib_match_list)
+	const char *attrib_match_list
+)
 {
 	const u32 msg_id = KBASE_TL_RET_ATOM_LPU;
-	const size_t s0 = sizeof(u32) + sizeof(char)
+	const size_t s2 = sizeof(u32) + sizeof(char)
 		+ strnlen(attrib_match_list, STRLEN_MAX);
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(atom)
 		+ sizeof(lpu)
-		+ s0
+		+ s2
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -978,7 +995,7 @@ void __kbase_tlstream_tl_ret_atom_lpu(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &lpu, sizeof(lpu));
 	pos = kbasep_serialize_string(buffer,
-		pos, attrib_match_list, s0);
+		pos, attrib_match_list, s2);
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -986,7 +1003,8 @@ void __kbase_tlstream_tl_ret_atom_lpu(
 void __kbase_tlstream_tl_nret_ctx_lpu(
 	struct kbase_tlstream *stream,
 	const void *ctx,
-	const void *lpu)
+	const void *lpu
+)
 {
 	const u32 msg_id = KBASE_TL_NRET_CTX_LPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1012,7 +1030,8 @@ void __kbase_tlstream_tl_nret_ctx_lpu(
 void __kbase_tlstream_tl_nret_atom_ctx(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *ctx)
+	const void *ctx
+)
 {
 	const u32 msg_id = KBASE_TL_NRET_ATOM_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1038,7 +1057,8 @@ void __kbase_tlstream_tl_nret_atom_ctx(
 void __kbase_tlstream_tl_nret_atom_lpu(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *lpu)
+	const void *lpu
+)
 {
 	const u32 msg_id = KBASE_TL_NRET_ATOM_LPU;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1064,7 +1084,8 @@ void __kbase_tlstream_tl_nret_atom_lpu(
 void __kbase_tlstream_tl_ret_as_ctx(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *ctx)
+	const void *ctx
+)
 {
 	const u32 msg_id = KBASE_TL_RET_AS_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1090,7 +1111,8 @@ void __kbase_tlstream_tl_ret_as_ctx(
 void __kbase_tlstream_tl_nret_as_ctx(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *ctx)
+	const void *ctx
+)
 {
 	const u32 msg_id = KBASE_TL_NRET_AS_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1116,7 +1138,8 @@ void __kbase_tlstream_tl_nret_as_ctx(
 void __kbase_tlstream_tl_ret_atom_as(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *address_space)
+	const void *address_space
+)
 {
 	const u32 msg_id = KBASE_TL_RET_ATOM_AS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1142,7 +1165,8 @@ void __kbase_tlstream_tl_ret_atom_as(
 void __kbase_tlstream_tl_nret_atom_as(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *address_space)
+	const void *address_space
+)
 {
 	const u32 msg_id = KBASE_TL_NRET_ATOM_AS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1170,7 +1194,8 @@ void __kbase_tlstream_tl_attrib_atom_config(
 	const void *atom,
 	u64 descriptor,
 	u64 affinity,
-	u32 config)
+	u32 config
+)
 {
 	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_CONFIG;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1199,138 +1224,11 @@ void __kbase_tlstream_tl_attrib_atom_config(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_attrib_atom_priority(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 prio)
-{
-	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
-	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		+ sizeof(prio)
-		;
-	char *buffer;
-	unsigned long acq_flags;
-	size_t pos = 0;
-
-	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
-
-	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_serialize_timestamp(buffer, pos);
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &prio, sizeof(prio));
-
-	kbase_tlstream_msgbuf_release(stream, acq_flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_state(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 state)
-{
-	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
-	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		+ sizeof(state)
-		;
-	char *buffer;
-	unsigned long acq_flags;
-	size_t pos = 0;
-
-	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
-
-	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_serialize_timestamp(buffer, pos);
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &state, sizeof(state));
-
-	kbase_tlstream_msgbuf_release(stream, acq_flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_prioritized(
-	struct kbase_tlstream *stream,
-	const void *atom)
-{
-	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
-	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		;
-	char *buffer;
-	unsigned long acq_flags;
-	size_t pos = 0;
-
-	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
-
-	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_serialize_timestamp(buffer, pos);
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
-
-	kbase_tlstream_msgbuf_release(stream, acq_flags);
-}
-
-void __kbase_tlstream_tl_attrib_atom_jit(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u64 edit_addr,
-	u64 new_addr,
-	u32 jit_flags,
-	u64 mem_flags,
-	u32 j_id,
-	u64 com_pgs,
-	u64 extent,
-	u64 va_pgs)
-{
-	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
-	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		+ sizeof(edit_addr)
-		+ sizeof(new_addr)
-		+ sizeof(jit_flags)
-		+ sizeof(mem_flags)
-		+ sizeof(j_id)
-		+ sizeof(com_pgs)
-		+ sizeof(extent)
-		+ sizeof(va_pgs)
-		;
-	char *buffer;
-	unsigned long acq_flags;
-	size_t pos = 0;
-
-	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
-
-	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
-	pos = kbasep_serialize_timestamp(buffer, pos);
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &edit_addr, sizeof(edit_addr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &new_addr, sizeof(new_addr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &jit_flags, sizeof(jit_flags));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &mem_flags, sizeof(mem_flags));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &j_id, sizeof(j_id));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &com_pgs, sizeof(com_pgs));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &extent, sizeof(extent));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &va_pgs, sizeof(va_pgs));
-
-	kbase_tlstream_msgbuf_release(stream, acq_flags);
-}
-
 void __kbase_tlstream_tl_jit_usedpages(
 	struct kbase_tlstream *stream,
 	u64 used_pages,
-	u32 j_id)
+	u32 j_id
+)
 {
 	const u32 msg_id = KBASE_TL_JIT_USEDPAGES;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1363,7 +1261,8 @@ void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
 	u32 bin_id,
 	u32 max_allocs,
 	u32 jit_flags,
-	u32 usg_id)
+	u32 usg_id
+)
 {
 	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITALLOCINFO;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1410,7 +1309,8 @@ void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
 void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	u32 j_id)
+	u32 j_id
+)
 {
 	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JITFREEINFO;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1438,7 +1338,8 @@ void __kbase_tlstream_tl_attrib_as_config(
 	const void *address_space,
 	u64 transtab,
 	u64 memattr,
-	u64 transcfg)
+	u64 transcfg
+)
 {
 	const u32 msg_id = KBASE_TL_ATTRIB_AS_CONFIG;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1469,7 +1370,8 @@ void __kbase_tlstream_tl_attrib_as_config(
 
 void __kbase_tlstream_tl_event_lpu_softstop(
 	struct kbase_tlstream *stream,
-	const void *lpu)
+	const void *lpu
+)
 {
 	const u32 msg_id = KBASE_TL_EVENT_LPU_SOFTSTOP;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1491,7 +1393,8 @@ void __kbase_tlstream_tl_event_lpu_softstop(
 
 void __kbase_tlstream_tl_event_atom_softstop_ex(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *atom
+)
 {
 	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_EX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1513,7 +1416,8 @@ void __kbase_tlstream_tl_event_atom_softstop_ex(
 
 void __kbase_tlstream_tl_event_atom_softstop_issue(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *atom
+)
 {
 	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTSTOP_ISSUE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1535,7 +1439,8 @@ void __kbase_tlstream_tl_event_atom_softstop_issue(
 
 void __kbase_tlstream_tl_event_atom_softjob_start(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *atom
+)
 {
 	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1557,7 +1462,8 @@ void __kbase_tlstream_tl_event_atom_softjob_start(
 
 void __kbase_tlstream_tl_event_atom_softjob_end(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *atom
+)
 {
 	const u32 msg_id = KBASE_TL_EVENT_ATOM_SOFTJOB_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1579,7 +1485,8 @@ void __kbase_tlstream_tl_event_atom_softjob_end(
 
 void __kbase_tlstream_tl_arbiter_granted(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_ARBITER_GRANTED;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1601,7 +1508,8 @@ void __kbase_tlstream_tl_arbiter_granted(
 
 void __kbase_tlstream_tl_arbiter_started(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_ARBITER_STARTED;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1623,7 +1531,8 @@ void __kbase_tlstream_tl_arbiter_started(
 
 void __kbase_tlstream_tl_arbiter_stop_requested(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_ARBITER_STOP_REQUESTED;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1645,7 +1554,8 @@ void __kbase_tlstream_tl_arbiter_stop_requested(
 
 void __kbase_tlstream_tl_arbiter_stopped(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_ARBITER_STOPPED;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1667,7 +1577,8 @@ void __kbase_tlstream_tl_arbiter_stopped(
 
 void __kbase_tlstream_tl_arbiter_requested(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_TL_ARBITER_REQUESTED;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1689,7 +1600,8 @@ void __kbase_tlstream_tl_arbiter_requested(
 
 void __kbase_tlstream_jd_gpu_soft_reset(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *gpu
+)
 {
 	const u32 msg_id = KBASE_JD_GPU_SOFT_RESET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -1709,15 +1621,18 @@ void __kbase_tlstream_jd_gpu_soft_reset(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_pm_state(
+void __kbase_tlstream_jd_tiler_heap_chunk_alloc(
 	struct kbase_tlstream *stream,
-	u32 core_type,
-	u64 core_state_bitset)
+	u32 ctx_nr,
+	u64 heap_id,
+	u64 chunk_va
+)
 {
-	const u32 msg_id = KBASE_AUX_PM_STATE;
+	const u32 msg_id = KBASE_JD_TILER_HEAP_CHUNK_ALLOC;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(core_type)
-		+ sizeof(core_state_bitset)
+		+ sizeof(ctx_nr)
+		+ sizeof(heap_id)
+		+ sizeof(chunk_va)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1728,24 +1643,23 @@ void __kbase_tlstream_aux_pm_state(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &core_type, sizeof(core_type));
+		pos, &ctx_nr, sizeof(ctx_nr));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &core_state_bitset, sizeof(core_state_bitset));
+		pos, &heap_id, sizeof(heap_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &chunk_va, sizeof(chunk_va));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_pagefault(
+void __kbase_tlstream_tl_js_sched_start(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u32 as_nr,
-	u64 page_cnt_change)
+	u32 dummy
+)
 {
-	const u32 msg_id = KBASE_AUX_PAGEFAULT;
+	const u32 msg_id = KBASE_TL_JS_SCHED_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(ctx_nr)
-		+ sizeof(as_nr)
-		+ sizeof(page_cnt_change)
+		+ sizeof(dummy)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1756,24 +1670,19 @@ void __kbase_tlstream_aux_pagefault(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &as_nr, sizeof(as_nr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &page_cnt_change, sizeof(page_cnt_change));
+		pos, &dummy, sizeof(dummy));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_pagesalloc(
+void __kbase_tlstream_tl_js_sched_end(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u64 page_cnt)
+	u32 dummy
+)
 {
-	const u32 msg_id = KBASE_AUX_PAGESALLOC;
+	const u32 msg_id = KBASE_TL_JS_SCHED_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(ctx_nr)
-		+ sizeof(page_cnt)
+		+ sizeof(dummy)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1784,20 +1693,19 @@ void __kbase_tlstream_aux_pagesalloc(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &page_cnt, sizeof(page_cnt));
+		pos, &dummy, sizeof(dummy));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_devfreq_target(
+void __kbase_tlstream_tl_jd_submit_atom_start(
 	struct kbase_tlstream *stream,
-	u64 target_freq)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET;
+	const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(target_freq)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1808,18 +1716,19 @@ void __kbase_tlstream_aux_devfreq_target(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &target_freq, sizeof(target_freq));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_protected_enter_start(
+void __kbase_tlstream_tl_jd_submit_atom_end(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START;
+	const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(gpu)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1830,18 +1739,19 @@ void __kbase_tlstream_aux_protected_enter_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &gpu, sizeof(gpu));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_protected_enter_end(
+void __kbase_tlstream_tl_jd_done_no_lock_start(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END;
+	const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(gpu)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1852,18 +1762,19 @@ void __kbase_tlstream_aux_protected_enter_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &gpu, sizeof(gpu));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_protected_leave_start(
+void __kbase_tlstream_tl_jd_done_no_lock_end(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
+	const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(gpu)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1874,18 +1785,19 @@ void __kbase_tlstream_aux_protected_leave_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &gpu, sizeof(gpu));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_protected_leave_end(
+void __kbase_tlstream_tl_jd_done_start(
 	struct kbase_tlstream *stream,
-	const void *gpu)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const u32 msg_id = KBASE_TL_JD_DONE_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(gpu)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1896,28 +1808,19 @@ void __kbase_tlstream_aux_protected_leave_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &gpu, sizeof(gpu));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_jit_stats(
+void __kbase_tlstream_tl_jd_done_end(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u32 bid,
-	u32 max_allocs,
-	u32 allocs,
-	u32 va_pages,
-	u32 ph_pages)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_JIT_STATS;
+	const u32 msg_id = KBASE_TL_JD_DONE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(ctx_nr)
-		+ sizeof(bid)
-		+ sizeof(max_allocs)
-		+ sizeof(allocs)
-		+ sizeof(va_pages)
-		+ sizeof(ph_pages)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1928,44 +1831,19 @@ void __kbase_tlstream_aux_jit_stats(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &bid, sizeof(bid));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &max_allocs, sizeof(max_allocs));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &allocs, sizeof(allocs));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &va_pages, sizeof(va_pages));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &ph_pages, sizeof(ph_pages));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_tiler_heap_stats(
+void __kbase_tlstream_tl_jd_atom_complete(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u64 heap_id,
-	u32 va_pages,
-	u32 ph_pages,
-	u32 max_chunks,
-	u32 chunk_size,
-	u32 chunk_count,
-	u32 target_in_flight,
-	u32 nr_in_flight)
+	const void *atom
+)
 {
-	const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS;
+	const u32 msg_id = KBASE_TL_JD_ATOM_COMPLETE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(ctx_nr)
-		+ sizeof(heap_id)
-		+ sizeof(va_pages)
-		+ sizeof(ph_pages)
-		+ sizeof(max_chunks)
-		+ sizeof(chunk_size)
-		+ sizeof(chunk_count)
-		+ sizeof(target_in_flight)
-		+ sizeof(nr_in_flight)
+		+ sizeof(atom)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -1976,40 +1854,102 @@ void __kbase_tlstream_aux_tiler_heap_stats(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ctx_nr, sizeof(ctx_nr));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &heap_id, sizeof(heap_id));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &va_pages, sizeof(va_pages));
+		pos, &atom, sizeof(atom));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_run_atom_start(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr
+)
+{
+	const u32 msg_id = KBASE_TL_RUN_ATOM_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(atom_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ph_pages, sizeof(ph_pages));
+		pos, &atom, sizeof(atom));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &max_chunks, sizeof(max_chunks));
+		pos, &atom_nr, sizeof(atom_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_run_atom_end(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr
+)
+{
+	const u32 msg_id = KBASE_TL_RUN_ATOM_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(atom_nr)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &chunk_size, sizeof(chunk_size));
+		pos, &atom, sizeof(atom));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &chunk_count, sizeof(chunk_count));
+		pos, &atom_nr, sizeof(atom_nr));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio
+)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITY;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		+ sizeof(prio)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &target_in_flight, sizeof(target_in_flight));
+		pos, &atom, sizeof(atom));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &nr_in_flight, sizeof(nr_in_flight));
+		pos, &prio, sizeof(prio));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_event_job_slot(
+void __kbase_tlstream_tl_attrib_atom_state(
 	struct kbase_tlstream *stream,
-	const void *ctx,
-	u32 slot_nr,
-	u32 atom_nr,
-	u32 event)
+	const void *atom,
+	u32 state
+)
 {
-	const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT;
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_STATE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(ctx)
-		+ sizeof(slot_nr)
-		+ sizeof(atom_nr)
-		+ sizeof(event)
+		+ sizeof(atom)
+		+ sizeof(state)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2020,32 +1960,60 @@ void __kbase_tlstream_aux_event_job_slot(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &ctx, sizeof(ctx));
-	pos = kbasep_serialize_bytes(buffer,
-		pos, &slot_nr, sizeof(slot_nr));
+		pos, &atom, sizeof(atom));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom_nr, sizeof(atom_nr));
+		pos, &state, sizeof(state));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom
+)
+{
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_PRIORITIZED;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(atom)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &event, sizeof(event));
+		pos, &atom, sizeof(atom));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_aux_mmu_command(
+void __kbase_tlstream_tl_attrib_atom_jit(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id,
-	u32 mmu_cmd_id,
-	u32 mmu_synchronicity,
-	u64 mmu_lock_addr,
-	u32 mmu_lock_page_num)
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs
+)
 {
-	const u32 msg_id = KBASE_AUX_MMU_COMMAND;
+	const u32 msg_id = KBASE_TL_ATTRIB_ATOM_JIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(kernel_ctx_id)
-		+ sizeof(mmu_cmd_id)
-		+ sizeof(mmu_synchronicity)
-		+ sizeof(mmu_lock_addr)
-		+ sizeof(mmu_lock_page_num)
+		+ sizeof(atom)
+		+ sizeof(edit_addr)
+		+ sizeof(new_addr)
+		+ sizeof(jit_flags)
+		+ sizeof(mem_flags)
+		+ sizeof(j_id)
+		+ sizeof(com_pgs)
+		+ sizeof(extent)
+		+ sizeof(va_pgs)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2056,15 +2024,23 @@ void __kbase_tlstream_aux_mmu_command(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
+		pos, &atom, sizeof(atom));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &mmu_cmd_id, sizeof(mmu_cmd_id));
+		pos, &edit_addr, sizeof(edit_addr));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &mmu_synchronicity, sizeof(mmu_synchronicity));
+		pos, &new_addr, sizeof(new_addr));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &mmu_lock_addr, sizeof(mmu_lock_addr));
+		pos, &jit_flags, sizeof(jit_flags));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num));
+		pos, &mem_flags, sizeof(mem_flags));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &j_id, sizeof(j_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &com_pgs, sizeof(com_pgs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &extent, sizeof(extent));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pgs, sizeof(va_pgs));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2077,7 +2053,8 @@ void __kbase_tlstream_tl_kbase_new_device(
 	u32 kbase_device_as_count,
 	u32 kbase_device_sb_entry_count,
 	u32 kbase_device_has_cross_stream_sync,
-	u32 kbase_device_supports_gpu_sleep)
+	u32 kbase_device_supports_gpu_sleep
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_NEW_DEVICE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2121,7 +2098,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed)
+	u32 kbase_device_csg_slot_resumed
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEVICE_PROGRAM_CSG;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2156,7 +2134,8 @@ void __kbase_tlstream_tl_kbase_device_program_csg(
 void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
-	u32 kbase_device_csg_slot_index)
+	u32 kbase_device_csg_slot_index
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEVICE_DEPROGRAM_CSG;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2182,7 +2161,8 @@ void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 void __kbase_tlstream_tl_kbase_device_halt_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
-	u32 kbase_device_csg_slot_index)
+	u32 kbase_device_csg_slot_index
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEVICE_HALT_CSG;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2208,7 +2188,8 @@ void __kbase_tlstream_tl_kbase_device_halt_csg(
 void __kbase_tlstream_tl_kbase_new_ctx(
 	struct kbase_tlstream *stream,
 	u32 kernel_ctx_id,
-	u32 kbase_device_id)
+	u32 kbase_device_id
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_NEW_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2233,7 +2214,8 @@ void __kbase_tlstream_tl_kbase_new_ctx(
 
 void __kbase_tlstream_tl_kbase_del_ctx(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id)
+	u32 kernel_ctx_id
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEL_CTX;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2256,7 +2238,8 @@ void __kbase_tlstream_tl_kbase_del_ctx(
 void __kbase_tlstream_tl_kbase_ctx_assign_as(
 	struct kbase_tlstream *stream,
 	u32 kernel_ctx_id,
-	u32 kbase_device_as_index)
+	u32 kbase_device_as_index
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CTX_ASSIGN_AS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2281,7 +2264,8 @@ void __kbase_tlstream_tl_kbase_ctx_assign_as(
 
 void __kbase_tlstream_tl_kbase_ctx_unassign_as(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id)
+	u32 kernel_ctx_id
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CTX_UNASSIGN_AS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2304,12 +2288,15 @@ void __kbase_tlstream_tl_kbase_ctx_unassign_as(
 void __kbase_tlstream_tl_kbase_new_kcpuqueue(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 kcpu_queue_id,
 	u32 kernel_ctx_id,
-	u32 kcpuq_num_pending_cmds)
+	u32 kcpuq_num_pending_cmds
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_NEW_KCPUQUEUE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(kcpu_queue_id)
 		+ sizeof(kernel_ctx_id)
 		+ sizeof(kcpuq_num_pending_cmds)
 		;
@@ -2324,6 +2311,8 @@ void __kbase_tlstream_tl_kbase_new_kcpuqueue(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
 	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue_id, sizeof(kcpu_queue_id));
+	pos = kbasep_serialize_bytes(buffer,
 		pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpuq_num_pending_cmds, sizeof(kcpuq_num_pending_cmds));
@@ -2333,7 +2322,8 @@ void __kbase_tlstream_tl_kbase_new_kcpuqueue(
 
 void __kbase_tlstream_tl_kbase_del_kcpuqueue(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_DEL_KCPUQUEUE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2356,7 +2346,8 @@ void __kbase_tlstream_tl_kbase_del_kcpuqueue(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	const void *fence)
+	const void *fence
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2382,7 +2373,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	const void *fence)
+	const void *fence
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2410,7 +2402,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
 	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error)
+	u32 cqs_obj_inherit_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2442,7 +2435,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 cqs_obj_gpu_addr)
+	u64 cqs_obj_gpu_addr
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2468,7 +2462,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr)
+	u64 map_import_buf_gpu_addr
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2494,7 +2489,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr)
+	u64 map_import_buf_gpu_addr
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2520,7 +2516,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr)
+	u64 map_import_buf_gpu_addr
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2545,7 +2542,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2569,7 +2567,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	const void *group_suspend_buf,
-	u32 gpu_cmdq_grp_handle)
+	u32 gpu_cmdq_grp_handle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2597,7 +2596,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
 
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2628,7 +2628,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc(
 	u32 jit_alloc_bin_id,
 	u32 jit_alloc_max_allocations,
 	u32 jit_alloc_flags,
-	u32 jit_alloc_usage_id)
+	u32 jit_alloc_usage_id
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2677,7 +2678,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc(
 
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2699,7 +2701,8 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc(
 
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2722,7 +2725,8 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 jit_alloc_jit_id)
+	u32 jit_alloc_jit_id
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2747,7 +2751,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free(
 
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2769,7 +2774,8 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2792,7 +2798,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2817,7 +2824,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2840,7 +2848,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2865,7 +2874,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2888,7 +2898,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2914,7 +2925,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2939,7 +2951,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2962,7 +2975,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -2987,7 +3001,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3010,7 +3025,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3035,7 +3051,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3058,7 +3075,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3083,7 +3101,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3105,7 +3124,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(
 
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3130,7 +3150,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	const void *kcpu_queue,
 	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
-	u64 jit_alloc_mmu_flags)
+	u64 jit_alloc_mmu_flags
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3161,7 +3182,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3183,7 +3205,8 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3205,7 +3228,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start(
 
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3229,7 +3253,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u32 execute_error,
-	u64 jit_free_pages_used)
+	u64 jit_free_pages_used
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3257,7 +3282,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3279,7 +3305,8 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3301,7 +3328,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3324,7 +3352,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error)
+	u32 execute_error
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3349,7 +3378,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_reloading(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_RELOADING;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3371,7 +3401,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_reloading(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_enabling(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_ENABLING;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3393,7 +3424,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_enabling(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_SLEEP;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3415,7 +3447,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3437,7 +3470,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_request_halt(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_REQUEST_HALT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3459,7 +3493,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_request_halt(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_disabling(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_DISABLING;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3481,7 +3516,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_disabling(
 
 void __kbase_tlstream_tl_kbase_csffw_fw_off(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_FW_OFF;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3504,7 +3540,8 @@ void __kbase_tlstream_tl_kbase_csffw_fw_off(
 void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	struct kbase_tlstream *stream,
 	u64 csffw_timestamp,
-	u64 csffw_cycle)
+	u64 csffw_cycle
+)
 {
 	const u32 msg_id = KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
@@ -3527,13 +3564,16 @@ void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_js_sched_start(
+void __kbase_tlstream_aux_pm_state(
 	struct kbase_tlstream *stream,
-	u32 dummy)
+	u32 core_type,
+	u64 core_state_bitset
+)
 {
-	const u32 msg_id = KBASE_TL_JS_SCHED_START;
+	const u32 msg_id = KBASE_AUX_PM_STATE;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(dummy)
+		+ sizeof(core_type)
+		+ sizeof(core_state_bitset)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3544,18 +3584,25 @@ void __kbase_tlstream_tl_js_sched_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &dummy, sizeof(dummy));
+		pos, &core_type, sizeof(core_type));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &core_state_bitset, sizeof(core_state_bitset));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_js_sched_end(
+void __kbase_tlstream_aux_pagefault(
 	struct kbase_tlstream *stream,
-	u32 dummy)
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change
+)
 {
-	const u32 msg_id = KBASE_TL_JS_SCHED_END;
+	const u32 msg_id = KBASE_AUX_PAGEFAULT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(dummy)
+		+ sizeof(ctx_nr)
+		+ sizeof(as_nr)
+		+ sizeof(page_cnt_change)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3566,18 +3613,25 @@ void __kbase_tlstream_tl_js_sched_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &dummy, sizeof(dummy));
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &as_nr, sizeof(as_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt_change, sizeof(page_cnt_change));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_submit_atom_start(
+void __kbase_tlstream_aux_pagesalloc(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	u32 ctx_nr,
+	u64 page_cnt
+)
 {
-	const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_START;
+	const u32 msg_id = KBASE_AUX_PAGESALLOC;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(ctx_nr)
+		+ sizeof(page_cnt)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3588,18 +3642,21 @@ void __kbase_tlstream_tl_jd_submit_atom_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &page_cnt, sizeof(page_cnt));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_submit_atom_end(
+void __kbase_tlstream_aux_devfreq_target(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	u64 target_freq
+)
 {
-	const u32 msg_id = KBASE_TL_JD_SUBMIT_ATOM_END;
+	const u32 msg_id = KBASE_AUX_DEVFREQ_TARGET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(target_freq)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3610,18 +3667,29 @@ void __kbase_tlstream_tl_jd_submit_atom_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &target_freq, sizeof(target_freq));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_done_no_lock_start(
+void __kbase_tlstream_aux_jit_stats(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages
+)
 {
-	const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_START;
+	const u32 msg_id = KBASE_AUX_JIT_STATS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(ctx_nr)
+		+ sizeof(bid)
+		+ sizeof(max_allocs)
+		+ sizeof(allocs)
+		+ sizeof(va_pages)
+		+ sizeof(ph_pages)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3632,18 +3700,45 @@ void __kbase_tlstream_tl_jd_done_no_lock_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &bid, sizeof(bid));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_allocs, sizeof(max_allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &allocs, sizeof(allocs));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pages, sizeof(va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ph_pages, sizeof(ph_pages));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_done_no_lock_end(
+void __kbase_tlstream_aux_tiler_heap_stats(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	u32 ctx_nr,
+	u64 heap_id,
+	u32 va_pages,
+	u32 ph_pages,
+	u32 max_chunks,
+	u32 chunk_size,
+	u32 chunk_count,
+	u32 target_in_flight,
+	u32 nr_in_flight
+)
 {
-	const u32 msg_id = KBASE_TL_JD_DONE_NO_LOCK_END;
+	const u32 msg_id = KBASE_AUX_TILER_HEAP_STATS;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(ctx_nr)
+		+ sizeof(heap_id)
+		+ sizeof(va_pages)
+		+ sizeof(ph_pages)
+		+ sizeof(max_chunks)
+		+ sizeof(chunk_size)
+		+ sizeof(chunk_count)
+		+ sizeof(target_in_flight)
+		+ sizeof(nr_in_flight)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3654,18 +3749,41 @@ void __kbase_tlstream_tl_jd_done_no_lock_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &ctx_nr, sizeof(ctx_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &heap_id, sizeof(heap_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &va_pages, sizeof(va_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &ph_pages, sizeof(ph_pages));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &max_chunks, sizeof(max_chunks));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &chunk_size, sizeof(chunk_size));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &chunk_count, sizeof(chunk_count));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &target_in_flight, sizeof(target_in_flight));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &nr_in_flight, sizeof(nr_in_flight));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_done_start(
+void __kbase_tlstream_aux_event_job_slot(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event
+)
 {
-	const u32 msg_id = KBASE_TL_JD_DONE_START;
+	const u32 msg_id = KBASE_AUX_EVENT_JOB_SLOT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(ctx)
+		+ sizeof(slot_nr)
+		+ sizeof(atom_nr)
+		+ sizeof(event)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3676,18 +3794,25 @@ void __kbase_tlstream_tl_jd_done_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &ctx, sizeof(ctx));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &slot_nr, sizeof(slot_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &atom_nr, sizeof(atom_nr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &event, sizeof(event));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_done_end(
+void __kbase_tlstream_aux_protected_enter_start(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *gpu
+)
 {
-	const u32 msg_id = KBASE_TL_JD_DONE_END;
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(gpu)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3698,18 +3823,19 @@ void __kbase_tlstream_tl_jd_done_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &gpu, sizeof(gpu));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_jd_atom_complete(
+void __kbase_tlstream_aux_protected_enter_end(
 	struct kbase_tlstream *stream,
-	const void *atom)
+	const void *gpu
+)
 {
-	const u32 msg_id = KBASE_TL_JD_ATOM_COMPLETE;
+	const u32 msg_id = KBASE_AUX_PROTECTED_ENTER_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
+		+ sizeof(gpu)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3720,20 +3846,27 @@ void __kbase_tlstream_tl_jd_atom_complete(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &gpu, sizeof(gpu));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_run_atom_start(
+void __kbase_tlstream_aux_mmu_command(
 	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 atom_nr)
+	u32 kernel_ctx_id,
+	u32 mmu_cmd_id,
+	u32 mmu_synchronicity,
+	u64 mmu_lock_addr,
+	u32 mmu_lock_page_num
+)
 {
-	const u32 msg_id = KBASE_TL_RUN_ATOM_START;
+	const u32 msg_id = KBASE_AUX_MMU_COMMAND;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		+ sizeof(atom_nr)
+		+ sizeof(kernel_ctx_id)
+		+ sizeof(mmu_cmd_id)
+		+ sizeof(mmu_synchronicity)
+		+ sizeof(mmu_lock_addr)
+		+ sizeof(mmu_lock_page_num)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3744,22 +3877,27 @@ void __kbase_tlstream_tl_run_atom_start(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &kernel_ctx_id, sizeof(kernel_ctx_id));
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom_nr, sizeof(atom_nr));
+		pos, &mmu_cmd_id, sizeof(mmu_cmd_id));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &mmu_synchronicity, sizeof(mmu_synchronicity));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &mmu_lock_addr, sizeof(mmu_lock_addr));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &mmu_lock_page_num, sizeof(mmu_lock_page_num));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_run_atom_end(
+void __kbase_tlstream_aux_protected_leave_start(
 	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 atom_nr)
+	const void *gpu
+)
 {
-	const u32 msg_id = KBASE_TL_RUN_ATOM_END;
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_START;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
-		+ sizeof(atom)
-		+ sizeof(atom_nr)
+		+ sizeof(gpu)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -3770,9 +3908,30 @@ void __kbase_tlstream_tl_run_atom_end(
 	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom, sizeof(atom));
+		pos, &gpu, sizeof(gpu));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu
+)
+{
+	const u32 msg_id = KBASE_AUX_PROTECTED_LEAVE_END;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(gpu)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
-		pos, &atom_nr, sizeof(atom_nr));
+		pos, &gpu, sizeof(gpu));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index aa10bc0..b15fe6a 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /*
- * THIS FILE IS AUTOGENERATED BY mali_trace_generator.py.
+ * THIS FILE IS AUTOGENERATED BY generate_tracepoints.py.
  * DO NOT EDIT.
  */
 
@@ -83,112 +83,132 @@ void __kbase_tlstream_tl_new_ctx(
 	struct kbase_tlstream *stream,
 	const void *ctx,
 	u32 ctx_nr,
-	u32 tgid);
+	u32 tgid
+);
+
 void __kbase_tlstream_tl_new_gpu(
 	struct kbase_tlstream *stream,
 	const void *gpu,
 	u32 gpu_id,
-	u32 core_count);
+	u32 core_count
+);
+
 void __kbase_tlstream_tl_new_lpu(
 	struct kbase_tlstream *stream,
 	const void *lpu,
 	u32 lpu_nr,
-	u32 lpu_fn);
+	u32 lpu_fn
+);
+
 void __kbase_tlstream_tl_new_atom(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	u32 atom_nr);
+	u32 atom_nr
+);
+
 void __kbase_tlstream_tl_new_as(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	u32 as_nr);
+	u32 as_nr
+);
+
 void __kbase_tlstream_tl_del_ctx(
 	struct kbase_tlstream *stream,
-	const void *ctx);
+	const void *ctx
+);
+
 void __kbase_tlstream_tl_del_atom(
 	struct kbase_tlstream *stream,
-	const void *atom);
+	const void *atom
+);
+
 void __kbase_tlstream_tl_lifelink_lpu_gpu(
 	struct kbase_tlstream *stream,
 	const void *lpu,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_lifelink_as_gpu(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_ret_ctx_lpu(
 	struct kbase_tlstream *stream,
 	const void *ctx,
-	const void *lpu);
+	const void *lpu
+);
+
 void __kbase_tlstream_tl_ret_atom_ctx(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *ctx);
+	const void *ctx
+);
+
 void __kbase_tlstream_tl_ret_atom_lpu(
 	struct kbase_tlstream *stream,
 	const void *atom,
 	const void *lpu,
-	const char *attrib_match_list);
+	const char *attrib_match_list
+);
+
 void __kbase_tlstream_tl_nret_ctx_lpu(
 	struct kbase_tlstream *stream,
 	const void *ctx,
-	const void *lpu);
+	const void *lpu
+);
+
 void __kbase_tlstream_tl_nret_atom_ctx(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *ctx);
+	const void *ctx
+);
+
 void __kbase_tlstream_tl_nret_atom_lpu(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *lpu);
+	const void *lpu
+);
+
 void __kbase_tlstream_tl_ret_as_ctx(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *ctx);
+	const void *ctx
+);
+
 void __kbase_tlstream_tl_nret_as_ctx(
 	struct kbase_tlstream *stream,
 	const void *address_space,
-	const void *ctx);
+	const void *ctx
+);
+
 void __kbase_tlstream_tl_ret_atom_as(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *address_space);
+	const void *address_space
+);
+
 void __kbase_tlstream_tl_nret_atom_as(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	const void *address_space);
+	const void *address_space
+);
+
 void __kbase_tlstream_tl_attrib_atom_config(
 	struct kbase_tlstream *stream,
 	const void *atom,
 	u64 descriptor,
 	u64 affinity,
-	u32 config);
-void __kbase_tlstream_tl_attrib_atom_priority(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 prio);
-void __kbase_tlstream_tl_attrib_atom_state(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 state);
-void __kbase_tlstream_tl_attrib_atom_prioritized(
-	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_attrib_atom_jit(
-	struct kbase_tlstream *stream,
-	const void *atom,
-	u64 edit_addr,
-	u64 new_addr,
-	u32 jit_flags,
-	u64 mem_flags,
-	u32 j_id,
-	u64 com_pgs,
-	u64 extent,
-	u64 va_pgs);
+	u32 config
+);
+
 void __kbase_tlstream_tl_jit_usedpages(
 	struct kbase_tlstream *stream,
 	u64 used_pages,
-	u32 j_id);
+	u32 j_id
+);
+
 void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
 	struct kbase_tlstream *stream,
 	const void *atom,
@@ -199,110 +219,172 @@ void __kbase_tlstream_tl_attrib_atom_jitallocinfo(
 	u32 bin_id,
 	u32 max_allocs,
 	u32 jit_flags,
-	u32 usg_id);
+	u32 usg_id
+);
+
 void __kbase_tlstream_tl_attrib_atom_jitfreeinfo(
 	struct kbase_tlstream *stream,
 	const void *atom,
-	u32 j_id);
+	u32 j_id
+);
+
 void __kbase_tlstream_tl_attrib_as_config(
 	struct kbase_tlstream *stream,
 	const void *address_space,
 	u64 transtab,
 	u64 memattr,
-	u64 transcfg);
+	u64 transcfg
+);
+
 void __kbase_tlstream_tl_event_lpu_softstop(
 	struct kbase_tlstream *stream,
-	const void *lpu);
+	const void *lpu
+);
+
 void __kbase_tlstream_tl_event_atom_softstop_ex(
 	struct kbase_tlstream *stream,
-	const void *atom);
+	const void *atom
+);
+
 void __kbase_tlstream_tl_event_atom_softstop_issue(
 	struct kbase_tlstream *stream,
-	const void *atom);
+	const void *atom
+);
+
 void __kbase_tlstream_tl_event_atom_softjob_start(
 	struct kbase_tlstream *stream,
-	const void *atom);
+	const void *atom
+);
+
 void __kbase_tlstream_tl_event_atom_softjob_end(
 	struct kbase_tlstream *stream,
-	const void *atom);
+	const void *atom
+);
+
 void __kbase_tlstream_tl_arbiter_granted(
 	struct kbase_tlstream *stream,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_arbiter_started(
 	struct kbase_tlstream *stream,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_arbiter_stop_requested(
 	struct kbase_tlstream *stream,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_arbiter_stopped(
 	struct kbase_tlstream *stream,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_tl_arbiter_requested(
 	struct kbase_tlstream *stream,
-	const void *gpu);
+	const void *gpu
+);
+
 void __kbase_tlstream_jd_gpu_soft_reset(
 	struct kbase_tlstream *stream,
-	const void *gpu);
-void __kbase_tlstream_aux_pm_state(
-	struct kbase_tlstream *stream,
-	u32 core_type,
-	u64 core_state_bitset);
-void __kbase_tlstream_aux_pagefault(
+	const void *gpu
+);
+
+void __kbase_tlstream_jd_tiler_heap_chunk_alloc(
 	struct kbase_tlstream *stream,
 	u32 ctx_nr,
-	u32 as_nr,
-	u64 page_cnt_change);
-void __kbase_tlstream_aux_pagesalloc(
+	u64 heap_id,
+	u64 chunk_va
+);
+
+void __kbase_tlstream_tl_js_sched_start(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u64 page_cnt);
-void __kbase_tlstream_aux_devfreq_target(
+	u32 dummy
+);
+
+void __kbase_tlstream_tl_js_sched_end(
 	struct kbase_tlstream *stream,
-	u64 target_freq);
-void __kbase_tlstream_aux_protected_enter_start(
+	u32 dummy
+);
+
+void __kbase_tlstream_tl_jd_submit_atom_start(
 	struct kbase_tlstream *stream,
-	const void *gpu);
-void __kbase_tlstream_aux_protected_enter_end(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_submit_atom_end(
 	struct kbase_tlstream *stream,
-	const void *gpu);
-void __kbase_tlstream_aux_protected_leave_start(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_done_no_lock_start(
 	struct kbase_tlstream *stream,
-	const void *gpu);
-void __kbase_tlstream_aux_protected_leave_end(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_done_no_lock_end(
 	struct kbase_tlstream *stream,
-	const void *gpu);
-void __kbase_tlstream_aux_jit_stats(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_done_start(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u32 bid,
-	u32 max_allocs,
-	u32 allocs,
-	u32 va_pages,
-	u32 ph_pages);
-void __kbase_tlstream_aux_tiler_heap_stats(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_done_end(
 	struct kbase_tlstream *stream,
-	u32 ctx_nr,
-	u64 heap_id,
-	u32 va_pages,
-	u32 ph_pages,
-	u32 max_chunks,
-	u32 chunk_size,
-	u32 chunk_count,
-	u32 target_in_flight,
-	u32 nr_in_flight);
-void __kbase_tlstream_aux_event_job_slot(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_jd_atom_complete(
 	struct kbase_tlstream *stream,
-	const void *ctx,
-	u32 slot_nr,
-	u32 atom_nr,
-	u32 event);
-void __kbase_tlstream_aux_mmu_command(
+	const void *atom
+);
+
+void __kbase_tlstream_tl_run_atom_start(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id,
-	u32 mmu_cmd_id,
-	u32 mmu_synchronicity,
-	u64 mmu_lock_addr,
-	u32 mmu_lock_page_num);
+	const void *atom,
+	u32 atom_nr
+);
+
+void __kbase_tlstream_tl_run_atom_end(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 atom_nr
+);
+
+void __kbase_tlstream_tl_attrib_atom_priority(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 prio
+);
+
+void __kbase_tlstream_tl_attrib_atom_state(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u32 state
+);
+
+void __kbase_tlstream_tl_attrib_atom_prioritized(
+	struct kbase_tlstream *stream,
+	const void *atom
+);
+
+void __kbase_tlstream_tl_attrib_atom_jit(
+	struct kbase_tlstream *stream,
+	const void *atom,
+	u64 edit_addr,
+	u64 new_addr,
+	u32 jit_flags,
+	u64 mem_flags,
+	u32 j_id,
+	u64 com_pgs,
+	u64 extent,
+	u64 va_pgs
+);
+
 void __kbase_tlstream_tl_kbase_new_device(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
@@ -311,85 +393,126 @@ void __kbase_tlstream_tl_kbase_new_device(
 	u32 kbase_device_as_count,
 	u32 kbase_device_sb_entry_count,
 	u32 kbase_device_has_cross_stream_sync,
-	u32 kbase_device_supports_gpu_sleep);
+	u32 kbase_device_supports_gpu_sleep
+);
+
 void __kbase_tlstream_tl_kbase_device_program_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
 	u32 kernel_ctx_id,
 	u32 gpu_cmdq_grp_handle,
 	u32 kbase_device_csg_slot_index,
-	u32 kbase_device_csg_slot_resumed);
+	u32 kbase_device_csg_slot_resumed
+);
+
 void __kbase_tlstream_tl_kbase_device_deprogram_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
-	u32 kbase_device_csg_slot_index);
+	u32 kbase_device_csg_slot_index
+);
+
 void __kbase_tlstream_tl_kbase_device_halt_csg(
 	struct kbase_tlstream *stream,
 	u32 kbase_device_id,
-	u32 kbase_device_csg_slot_index);
+	u32 kbase_device_csg_slot_index
+);
+
 void __kbase_tlstream_tl_kbase_new_ctx(
 	struct kbase_tlstream *stream,
 	u32 kernel_ctx_id,
-	u32 kbase_device_id);
+	u32 kbase_device_id
+);
+
 void __kbase_tlstream_tl_kbase_del_ctx(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id);
+	u32 kernel_ctx_id
+);
+
 void __kbase_tlstream_tl_kbase_ctx_assign_as(
 	struct kbase_tlstream *stream,
 	u32 kernel_ctx_id,
-	u32 kbase_device_as_index);
+	u32 kbase_device_as_index
+);
+
 void __kbase_tlstream_tl_kbase_ctx_unassign_as(
 	struct kbase_tlstream *stream,
-	u32 kernel_ctx_id);
+	u32 kernel_ctx_id
+);
+
 void __kbase_tlstream_tl_kbase_new_kcpuqueue(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 kcpu_queue_id,
 	u32 kernel_ctx_id,
-	u32 kcpuq_num_pending_cmds);
+	u32 kcpuq_num_pending_cmds
+);
+
 void __kbase_tlstream_tl_kbase_del_kcpuqueue(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	const void *fence);
+	const void *fence
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	const void *fence);
+	const void *fence
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
 	u32 cqs_obj_compare_value,
-	u32 cqs_obj_inherit_error);
+	u32 cqs_obj_inherit_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 cqs_obj_gpu_addr);
+	u64 cqs_obj_gpu_addr
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr);
+	u64 map_import_buf_gpu_addr
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr);
+	u64 map_import_buf_gpu_addr
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u64 map_import_buf_gpu_addr);
+	u64 map_import_buf_gpu_addr
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	const void *group_suspend_buf,
-	u32 gpu_cmdq_grp_handle);
+	u32 gpu_cmdq_grp_handle
+);
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -401,171 +524,292 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc(
 	u32 jit_alloc_bin_id,
 	u32 jit_alloc_max_allocations,
 	u32 jit_alloc_flags,
-	u32 jit_alloc_usage_id);
+	u32 jit_alloc_usage_id
+);
+
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 jit_alloc_jit_id);
+	u32 jit_alloc_jit_id
+);
+
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
-	u64 jit_alloc_mmu_flags);
+	u64 jit_alloc_mmu_flags
+);
+
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u32 execute_error,
-	u64 jit_free_pages_used);
+	u64 jit_free_pages_used
+);
+
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue
+);
+
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
-	u32 execute_error);
+	u32 execute_error
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_reloading(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_enabling(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_request_sleep(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_request_halt(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_disabling(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_fw_off(
 	struct kbase_tlstream *stream,
-	u64 csffw_cycle);
+	u64 csffw_cycle
+);
+
 void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	struct kbase_tlstream *stream,
 	u64 csffw_timestamp,
-	u64 csffw_cycle);
-void __kbase_tlstream_tl_js_sched_start(
+	u64 csffw_cycle
+);
+
+void __kbase_tlstream_aux_pm_state(
 	struct kbase_tlstream *stream,
-	u32 dummy);
-void __kbase_tlstream_tl_js_sched_end(
+	u32 core_type,
+	u64 core_state_bitset
+);
+
+void __kbase_tlstream_aux_pagefault(
 	struct kbase_tlstream *stream,
-	u32 dummy);
-void __kbase_tlstream_tl_jd_submit_atom_start(
+	u32 ctx_nr,
+	u32 as_nr,
+	u64 page_cnt_change
+);
+
+void __kbase_tlstream_aux_pagesalloc(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_submit_atom_end(
+	u32 ctx_nr,
+	u64 page_cnt
+);
+
+void __kbase_tlstream_aux_devfreq_target(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_done_no_lock_start(
+	u64 target_freq
+);
+
+void __kbase_tlstream_aux_jit_stats(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_done_no_lock_end(
+	u32 ctx_nr,
+	u32 bid,
+	u32 max_allocs,
+	u32 allocs,
+	u32 va_pages,
+	u32 ph_pages
+);
+
+void __kbase_tlstream_aux_tiler_heap_stats(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_done_start(
+	u32 ctx_nr,
+	u64 heap_id,
+	u32 va_pages,
+	u32 ph_pages,
+	u32 max_chunks,
+	u32 chunk_size,
+	u32 chunk_count,
+	u32 target_in_flight,
+	u32 nr_in_flight
+);
+
+void __kbase_tlstream_aux_event_job_slot(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_done_end(
+	const void *ctx,
+	u32 slot_nr,
+	u32 atom_nr,
+	u32 event
+);
+
+void __kbase_tlstream_aux_protected_enter_start(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_jd_atom_complete(
+	const void *gpu
+);
+
+void __kbase_tlstream_aux_protected_enter_end(
 	struct kbase_tlstream *stream,
-	const void *atom);
-void __kbase_tlstream_tl_run_atom_start(
+	const void *gpu
+);
+
+void __kbase_tlstream_aux_mmu_command(
 	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 atom_nr);
-void __kbase_tlstream_tl_run_atom_end(
+	u32 kernel_ctx_id,
+	u32 mmu_cmd_id,
+	u32 mmu_synchronicity,
+	u64 mmu_lock_addr,
+	u32 mmu_lock_page_num
+);
+
+void __kbase_tlstream_aux_protected_leave_start(
 	struct kbase_tlstream *stream,
-	const void *atom,
-	u32 atom_nr);
+	const void *gpu
+);
+
+void __kbase_tlstream_aux_protected_leave_end(
+	struct kbase_tlstream *stream,
+	const void *gpu
+);
 
 struct kbase_tlstream;
 
 /**
- * KBASE_TLSTREAM_TL_NEW_CTX -
- *   object ctx is created
+ * KBASE_TLSTREAM_TL_NEW_CTX - object ctx is created
  *
  * @kbdev: Kbase device
  * @ctx: Name of the context object
@@ -583,12 +827,14 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_new_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				ctx, ctx_nr, tgid);	\
+				ctx,	\
+				ctx_nr,	\
+				tgid	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NEW_GPU -
- *   object gpu is created
+ * KBASE_TLSTREAM_TL_NEW_GPU - object gpu is created
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -606,12 +852,14 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_new_gpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu, gpu_id, core_count);	\
+				gpu,	\
+				gpu_id,	\
+				core_count	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NEW_LPU -
- *   object lpu is created
+ * KBASE_TLSTREAM_TL_NEW_LPU - object lpu is created
  *
  * @kbdev: Kbase device
  * @lpu: Name of the Logical Processing Unit object
@@ -629,12 +877,14 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_new_lpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				lpu, lpu_nr, lpu_fn);	\
+				lpu,	\
+				lpu_nr,	\
+				lpu_fn	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NEW_ATOM -
- *   object atom is created
+ * KBASE_TLSTREAM_TL_NEW_ATOM - object atom is created
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -650,12 +900,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_new_atom(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, atom_nr);	\
+				atom,	\
+				atom_nr	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NEW_AS -
- *   address space object is created
+ * KBASE_TLSTREAM_TL_NEW_AS - address space object is created
  *
  * @kbdev: Kbase device
  * @address_space: Name of the address space object
@@ -671,12 +922,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_new_as(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				address_space, as_nr);	\
+				address_space,	\
+				as_nr	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_DEL_CTX -
- *   context is destroyed
+ * KBASE_TLSTREAM_TL_DEL_CTX - context is destroyed
  *
  * @kbdev: Kbase device
  * @ctx: Name of the context object
@@ -690,12 +942,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_del_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				ctx);	\
+				ctx	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_DEL_ATOM -
- *   atom is destroyed
+ * KBASE_TLSTREAM_TL_DEL_ATOM - atom is destroyed
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -709,12 +961,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_del_atom(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU -
- *   lpu is deleted with gpu
+ * KBASE_TLSTREAM_TL_LIFELINK_LPU_GPU - lpu is deleted with gpu
  *
  * @kbdev: Kbase device
  * @lpu: Name of the Logical Processing Unit object
@@ -730,12 +982,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_lifelink_lpu_gpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				lpu, gpu);	\
+				lpu,	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU -
- *   address space is deleted with gpu
+ * KBASE_TLSTREAM_TL_LIFELINK_AS_GPU - address space is deleted with gpu
  *
  * @kbdev: Kbase device
  * @address_space: Name of the address space object
@@ -751,12 +1004,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_lifelink_as_gpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				address_space, gpu);	\
+				address_space,	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RET_CTX_LPU -
- *   context is retained by lpu
+ * KBASE_TLSTREAM_TL_RET_CTX_LPU - context is retained by lpu
  *
  * @kbdev: Kbase device
  * @ctx: Name of the context object
@@ -772,12 +1026,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_ret_ctx_lpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				ctx, lpu);	\
+				ctx,	\
+				lpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RET_ATOM_CTX -
- *   atom is retained by context
+ * KBASE_TLSTREAM_TL_RET_ATOM_CTX - atom is retained by context
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -793,12 +1048,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_ret_atom_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, ctx);	\
+				atom,	\
+				ctx	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RET_ATOM_LPU -
- *   atom is retained by lpu
+ * KBASE_TLSTREAM_TL_RET_ATOM_LPU - atom is retained by lpu
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -816,12 +1072,14 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_ret_atom_lpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, lpu, attrib_match_list);	\
+				atom,	\
+				lpu,	\
+				attrib_match_list	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NRET_CTX_LPU -
- *   context is released by lpu
+ * KBASE_TLSTREAM_TL_NRET_CTX_LPU - context is released by lpu
  *
  * @kbdev: Kbase device
  * @ctx: Name of the context object
@@ -837,12 +1095,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_nret_ctx_lpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				ctx, lpu);	\
+				ctx,	\
+				lpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NRET_ATOM_CTX -
- *   atom is released by context
+ * KBASE_TLSTREAM_TL_NRET_ATOM_CTX - atom is released by context
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -858,12 +1117,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_nret_atom_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, ctx);	\
+				atom,	\
+				ctx	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NRET_ATOM_LPU -
- *   atom is released by lpu
+ * KBASE_TLSTREAM_TL_NRET_ATOM_LPU - atom is released by lpu
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -879,12 +1139,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_nret_atom_lpu(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, lpu);	\
+				atom,	\
+				lpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RET_AS_CTX -
- *   address space is retained by context
+ * KBASE_TLSTREAM_TL_RET_AS_CTX - address space is retained by context
  *
  * @kbdev: Kbase device
  * @address_space: Name of the address space object
@@ -900,12 +1161,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_ret_as_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				address_space, ctx);	\
+				address_space,	\
+				ctx	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NRET_AS_CTX -
- *   address space is released by context
+ * KBASE_TLSTREAM_TL_NRET_AS_CTX - address space is released by context
  *
  * @kbdev: Kbase device
  * @address_space: Name of the address space object
@@ -921,12 +1183,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_nret_as_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				address_space, ctx);	\
+				address_space,	\
+				ctx	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RET_ATOM_AS -
- *   atom is retained by address space
+ * KBASE_TLSTREAM_TL_RET_ATOM_AS - atom is retained by address space
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -942,12 +1205,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_ret_atom_as(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, address_space);	\
+				atom,	\
+				address_space	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_NRET_ATOM_AS -
- *   atom is released by address space
+ * KBASE_TLSTREAM_TL_NRET_ATOM_AS - atom is released by address space
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -963,12 +1227,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_nret_atom_as(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, address_space);	\
+				atom,	\
+				address_space	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG -
- *   atom job slot attributes
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_CONFIG - atom job slot attributes
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -988,117 +1253,19 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_attrib_atom_config(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, descriptor, affinity, config);	\
+				atom,	\
+				descriptor,	\
+				affinity,	\
+				config	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY -
- *   atom priority
- *
- * @kbdev: Kbase device
- * @atom: Atom identifier
- * @prio: Atom priority
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(	\
-	kbdev,	\
-	atom,	\
-	prio	\
-	)	\
-	do {	\
-		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_tl_attrib_atom_priority(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, prio);	\
-	} while (0)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE -
- *   atom state
- *
- * @kbdev: Kbase device
- * @atom: Atom identifier
- * @state: Atom state
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(	\
-	kbdev,	\
-	atom,	\
-	state	\
-	)	\
-	do {	\
-		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_tl_attrib_atom_state(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, state);	\
-	} while (0)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED -
- *   atom caused priority change
- *
- * @kbdev: Kbase device
- * @atom: Atom identifier
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(	\
-	kbdev,	\
-	atom	\
-	)	\
-	do {	\
-		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_tl_attrib_atom_prioritized(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
-	} while (0)
-
-/**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT -
- *   jit done for atom
- *
- * @kbdev: Kbase device
- * @atom: Atom identifier
- * @edit_addr: Address edited by jit
- * @new_addr: Address placed into the edited location
- * @jit_flags: Flags specifying the special requirements for
- * the JIT allocation.
- * @mem_flags: Flags defining the properties of a memory region
- * @j_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests.
- * @com_pgs: The minimum number of physical pages which
- * should back the allocation.
- * @extent: Granularity of physical pages to grow the
- * allocation by during a fault.
- * @va_pgs: The minimum number of virtual pages required
- */
-#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(	\
-	kbdev,	\
-	atom,	\
-	edit_addr,	\
-	new_addr,	\
-	jit_flags,	\
-	mem_flags,	\
-	j_id,	\
-	com_pgs,	\
-	extent,	\
-	va_pgs	\
-	)	\
-	do {	\
-		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)	\
-			__kbase_tlstream_tl_attrib_atom_jit(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, edit_addr, new_addr, jit_flags, mem_flags, j_id, com_pgs, extent, va_pgs);	\
-	} while (0)
-
-/**
- * KBASE_TLSTREAM_TL_JIT_USEDPAGES -
- *   used pages for jit
+ * KBASE_TLSTREAM_TL_JIT_USEDPAGES - used pages for jit
  *
  * @kbdev: Kbase device
  * @used_pages: Number of pages used for jit
- * @j_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests.
+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests.
  */
 #define KBASE_TLSTREAM_TL_JIT_USEDPAGES(	\
 	kbdev,	\
@@ -1110,28 +1277,23 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_jit_usedpages(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				used_pages, j_id);	\
+				used_pages,	\
+				j_id	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO -
- *   Information about JIT allocations
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO - Information about JIT allocations
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
  * @va_pgs: The minimum number of virtual pages required
- * @com_pgs: The minimum number of physical pages which
- * should back the allocation.
- * @extent: Granularity of physical pages to grow the
- * allocation by during a fault.
- * @j_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests.
- * @bin_id: The JIT allocation bin, used in conjunction with
- * max_allocations to limit the number of each
- * type of JIT allocation.
+ * @com_pgs: The minimum number of physical pages which should back the allocation.
+ * @extent: Granularity of physical pages to grow the allocation by during a fault.
+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests.
+ * @bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation.
  * @max_allocs: Maximum allocations allowed in this bin.
- * @jit_flags: Flags specifying the special requirements for
- * the JIT allocation.
+ * @jit_flags: Flags specifying the special requirements for the JIT allocation.
  * @usg_id: A hint about which allocation should be reused.
  */
 #define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(	\
@@ -1151,17 +1313,24 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_attrib_atom_jitallocinfo(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, va_pgs, com_pgs, extent, j_id, bin_id, max_allocs, jit_flags, usg_id);	\
+				atom,	\
+				va_pgs,	\
+				com_pgs,	\
+				extent,	\
+				j_id,	\
+				bin_id,	\
+				max_allocs,	\
+				jit_flags,	\
+				usg_id	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO -
- *   Information about JIT frees
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO - Information about JIT frees
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
- * @j_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests.
+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests.
  */
 #define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITFREEINFO(	\
 	kbdev,	\
@@ -1173,12 +1342,13 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_attrib_atom_jitfreeinfo(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, j_id);	\
+				atom,	\
+				j_id	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG -
- *   address space attributes
+ * KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG - address space attributes
  *
  * @kbdev: Kbase device
  * @address_space: Name of the address space object
@@ -1198,12 +1368,15 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_attrib_as_config(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				address_space, transtab, memattr, transcfg);	\
+				address_space,	\
+				transtab,	\
+				memattr,	\
+				transcfg	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP -
- *   softstop event on given lpu
+ * KBASE_TLSTREAM_TL_EVENT_LPU_SOFTSTOP - softstop event on given lpu
  *
  * @kbdev: Kbase device
  * @lpu: Name of the Logical Processing Unit object
@@ -1217,12 +1390,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_event_lpu_softstop(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				lpu);	\
+				lpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX -
- *   atom softstopped
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_EX - atom softstopped
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -1236,12 +1409,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_event_atom_softstop_ex(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE -
- *   atom softstop issued
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTSTOP_ISSUE - atom softstop issued
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -1255,12 +1428,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_event_atom_softstop_issue(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START -
- *   atom soft job has started
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START - atom soft job has started
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -1274,12 +1447,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_event_atom_softjob_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END -
- *   atom soft job has completed
+ * KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END - atom soft job has completed
  *
  * @kbdev: Kbase device
  * @atom: Atom identifier
@@ -1293,12 +1466,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_event_atom_softjob_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ARBITER_GRANTED -
- *   Arbiter has granted gpu access
+ * KBASE_TLSTREAM_TL_ARBITER_GRANTED - Arbiter has granted gpu access
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1312,12 +1485,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_arbiter_granted(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ARBITER_STARTED -
- *   Driver is running again and able to process jobs
+ * KBASE_TLSTREAM_TL_ARBITER_STARTED - Driver is running again and able to process jobs
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1331,12 +1504,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_arbiter_started(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED -
- *   Arbiter has requested driver to stop using gpu
+ * KBASE_TLSTREAM_TL_ARBITER_STOP_REQUESTED - Arbiter has requested driver to stop using gpu
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1350,12 +1523,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_arbiter_stop_requested(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ARBITER_STOPPED -
- *   Driver has stopped using gpu
+ * KBASE_TLSTREAM_TL_ARBITER_STOPPED - Driver has stopped using gpu
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1369,12 +1542,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_arbiter_stopped(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_ARBITER_REQUESTED -
- *   Driver has requested the arbiter for gpu access
+ * KBASE_TLSTREAM_TL_ARBITER_REQUESTED - Driver has requested the arbiter for gpu access
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1388,12 +1561,12 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_tl_arbiter_requested(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_JD_GPU_SOFT_RESET -
- *   gpu soft reset
+ * KBASE_TLSTREAM_JD_GPU_SOFT_RESET - gpu soft reset
  *
  * @kbdev: Kbase device
  * @gpu: Name of the GPU object
@@ -1407,302 +1580,365 @@ struct kbase_tlstream;
 		if (enabled & TLSTREAM_ENABLED)	\
 			__kbase_tlstream_jd_gpu_soft_reset(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				gpu);	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PM_STATE -
- *   PM state
+ * KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC - Tiler Heap Chunk Allocation
  *
  * @kbdev: Kbase device
- * @core_type: Core type (shader, tiler, l2 cache, l3 cache)
- * @core_state_bitset: 64bits bitmask reporting power state of the cores
- * (1-ON, 0-OFF)
+ * @ctx_nr: Kernel context number
+ * @heap_id: Unique id used to represent a heap under a context
+ * @chunk_va: Virtual start address of tiler heap chunk
  */
-#define KBASE_TLSTREAM_AUX_PM_STATE(	\
+#define KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(	\
 	kbdev,	\
-	core_type,	\
-	core_state_bitset	\
+	ctx_nr,	\
+	heap_id,	\
+	chunk_va	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_pm_state(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				core_type, core_state_bitset);	\
+			__kbase_tlstream_jd_tiler_heap_chunk_alloc(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				ctx_nr,	\
+				heap_id,	\
+				chunk_va	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PAGEFAULT -
- *   Page fault
+ * KBASE_TLSTREAM_TL_JS_SCHED_START - Scheduling starts
  *
  * @kbdev: Kbase device
- * @ctx_nr: Kernel context number
- * @as_nr: Address space number
- * @page_cnt_change: Number of pages to be added
+ * @dummy: dummy argument
  */
-#define KBASE_TLSTREAM_AUX_PAGEFAULT(	\
+#define KBASE_TLSTREAM_TL_JS_SCHED_START(	\
 	kbdev,	\
-	ctx_nr,	\
-	as_nr,	\
-	page_cnt_change	\
+	dummy	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_pagefault(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				ctx_nr, as_nr, page_cnt_change);	\
+			__kbase_tlstream_tl_js_sched_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				dummy	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PAGESALLOC -
- *   Total alloc pages change
+ * KBASE_TLSTREAM_TL_JS_SCHED_END - Scheduling ends
  *
  * @kbdev: Kbase device
- * @ctx_nr: Kernel context number
- * @page_cnt: Number of pages used by the context
+ * @dummy: dummy argument
  */
-#define KBASE_TLSTREAM_AUX_PAGESALLOC(	\
+#define KBASE_TLSTREAM_TL_JS_SCHED_END(	\
 	kbdev,	\
-	ctx_nr,	\
-	page_cnt	\
+	dummy	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_pagesalloc(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				ctx_nr, page_cnt);	\
+			__kbase_tlstream_tl_js_sched_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				dummy	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET -
- *   New device frequency target
+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START - Submitting an atom starts
  *
  * @kbdev: Kbase device
- * @target_freq: New target frequency
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(	\
+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(	\
 	kbdev,	\
-	target_freq	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_devfreq_target(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				target_freq);	\
+			__kbase_tlstream_tl_jd_submit_atom_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START -
- *   enter protected mode start
+ * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END - Submitting an atom ends
  *
  * @kbdev: Kbase device
- * @gpu: Name of the GPU object
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(	\
+#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(	\
 	kbdev,	\
-	gpu	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_aux_protected_enter_start(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				gpu);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jd_submit_atom_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END -
- *   enter protected mode end
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START - Within function jd_done_nolock
  *
  * @kbdev: Kbase device
- * @gpu: Name of the GPU object
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(	\
+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(	\
 	kbdev,	\
-	gpu	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_aux_protected_enter_end(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				gpu);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jd_done_no_lock_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START -
- *   leave protected mode start
+ * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END - Within function jd_done_nolock - end
  *
  * @kbdev: Kbase device
- * @gpu: Name of the GPU object
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(	\
+#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END(	\
 	kbdev,	\
-	gpu	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_aux_protected_leave_start(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				gpu);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jd_done_no_lock_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END -
- *   leave protected mode end
+ * KBASE_TLSTREAM_TL_JD_DONE_START - Start of kbase_jd_done
  *
  * @kbdev: Kbase device
- * @gpu: Name of the GPU object
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(	\
+#define KBASE_TLSTREAM_TL_JD_DONE_START(	\
 	kbdev,	\
-	gpu	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
-			__kbase_tlstream_aux_protected_leave_end(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				gpu);	\
+		if (enabled & TLSTREAM_ENABLED)	\
+			__kbase_tlstream_tl_jd_done_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_JIT_STATS -
- *   per-bin JIT statistics
+ * KBASE_TLSTREAM_TL_JD_DONE_END - End of kbase_jd_done
  *
  * @kbdev: Kbase device
- * @ctx_nr: Kernel context number
- * @bid: JIT bin id
- * @max_allocs: Maximum allocations allowed in this bin.
- * @allocs: Number of active allocations in this bin
- * @va_pages: Number of virtual pages allocated in this bin
- * @ph_pages: Number of physical pages allocated in this bin
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_JIT_STATS(	\
+#define KBASE_TLSTREAM_TL_JD_DONE_END(	\
 	kbdev,	\
-	ctx_nr,	\
-	bid,	\
-	max_allocs,	\
-	allocs,	\
-	va_pages,	\
-	ph_pages	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_jit_stats(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				ctx_nr, bid, max_allocs, allocs, va_pages, ph_pages);	\
+			__kbase_tlstream_tl_jd_done_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS -
- *   Tiler Heap statistics
+ * KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE - Atom marked complete
  *
  * @kbdev: Kbase device
- * @ctx_nr: Kernel context number
- * @heap_id: Unique id used to represent a heap under a context
- * @va_pages: Number of virtual pages allocated in this bin
- * @ph_pages: Number of physical pages allocated in this bin
- * @max_chunks: The maximum number of chunks that the heap should be allowed to use
- * @chunk_size: Size of each chunk in tiler heap, in bytes
- * @chunk_count: The number of chunks currently allocated in the tiler heap
- * @target_in_flight: Number of render-passes that the driver should attempt
- * to keep in flight for which allocation of new chunks is allowed
- * @nr_in_flight: Number of render-passes that are in flight
+ * @atom: Atom identifier
  */
-#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(	\
+#define KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(	\
 	kbdev,	\
-	ctx_nr,	\
-	heap_id,	\
-	va_pages,	\
-	ph_pages,	\
-	max_chunks,	\
-	chunk_size,	\
-	chunk_count,	\
-	target_in_flight,	\
-	nr_in_flight	\
+	atom	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_tiler_heap_stats(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				ctx_nr, heap_id, va_pages, ph_pages, max_chunks, chunk_size, chunk_count, target_in_flight, nr_in_flight);	\
+			__kbase_tlstream_tl_jd_atom_complete(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT -
- *   event on a given job slot
+ * KBASE_TLSTREAM_TL_RUN_ATOM_START - Running of atom starts
  *
  * @kbdev: Kbase device
- * @ctx: Name of the context object
- * @slot_nr: Job slot number
+ * @atom: Atom identifier
  * @atom_nr: Sequential number of an atom
- * @event: Event type. One of TL_JS_EVENT values
  */
-#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(	\
+#define KBASE_TLSTREAM_TL_RUN_ATOM_START(	\
 	kbdev,	\
-	ctx,	\
-	slot_nr,	\
-	atom_nr,	\
-	event	\
+	atom,	\
+	atom_nr	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_event_job_slot(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				ctx, slot_nr, atom_nr, event);	\
+			__kbase_tlstream_tl_run_atom_start(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom,	\
+				atom_nr	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_AUX_MMU_COMMAND -
- *   mmu commands with synchronicity info
+ * KBASE_TLSTREAM_TL_RUN_ATOM_END - Running of atom ends
  *
  * @kbdev: Kbase device
- * @kernel_ctx_id: Unique ID for the KBase Context
- * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE)
- * @mmu_synchronicity: Indicates whether the command is related to current running job
- * that needs to be resolved to make it progress (synchronous, e.g.
- * grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls
- * from user-space). This param will be 0 if it is an asynchronous
- * operation.
- * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated
- * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated
+ * @atom: Atom identifier
+ * @atom_nr: Sequential number of an atom
  */
-#define KBASE_TLSTREAM_AUX_MMU_COMMAND(	\
+#define KBASE_TLSTREAM_TL_RUN_ATOM_END(	\
 	kbdev,	\
-	kernel_ctx_id,	\
-	mmu_cmd_id,	\
-	mmu_synchronicity,	\
-	mmu_lock_addr,	\
-	mmu_lock_page_num	\
+	atom,	\
+	atom_nr	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_aux_mmu_command(	\
-				__TL_DISPATCH_STREAM(kbdev, aux),	\
-				kernel_ctx_id, mmu_cmd_id, mmu_synchronicity, mmu_lock_addr, mmu_lock_page_num);	\
+			__kbase_tlstream_tl_run_atom_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom,	\
+				atom_nr	\
+				);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY - atom priority
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ * @prio: Atom priority
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(	\
+	kbdev,	\
+	atom,	\
+	prio	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_priority(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom,	\
+				prio	\
+				);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE - atom state
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ * @state: Atom state
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(	\
+	kbdev,	\
+	atom,	\
+	state	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_state(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom,	\
+				state	\
+				);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED - atom caused priority change
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(	\
+	kbdev,	\
+	atom	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_tl_attrib_atom_prioritized(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom	\
+				);	\
+	} while (0)
+
+/**
+ * KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT - jit done for atom
+ *
+ * @kbdev: Kbase device
+ * @atom: Atom identifier
+ * @edit_addr: Address edited by jit
+ * @new_addr: Address placed into the edited location
+ * @jit_flags: Flags specifying the special requirements for the JIT allocation.
+ * @mem_flags: Flags defining the properties of a memory region
+ * @j_id: Unique ID provided by the caller, this is used to pair allocation and free requests.
+ * @com_pgs: The minimum number of physical pages which should back the allocation.
+ * @extent: Granularity of physical pages to grow the allocation by during a fault.
+ * @va_pgs: The minimum number of virtual pages required
+ */
+#define KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(	\
+	kbdev,	\
+	atom,	\
+	edit_addr,	\
+	new_addr,	\
+	jit_flags,	\
+	mem_flags,	\
+	j_id,	\
+	com_pgs,	\
+	extent,	\
+	va_pgs	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_JOB_DUMPING_ENABLED)	\
+			__kbase_tlstream_tl_attrib_atom_jit(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				atom,	\
+				edit_addr,	\
+				new_addr,	\
+				jit_flags,	\
+				mem_flags,	\
+				j_id,	\
+				com_pgs,	\
+				extent,	\
+				va_pgs	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE -
- *   New KBase Device
+ * KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE - New KBase Device
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
  * @kbase_device_gpu_core_count: The number of gpu cores in the physical hardware
  * @kbase_device_max_num_csgs: The max number of CSGs the physical hardware supports
  * @kbase_device_as_count: The number of address spaces the physical hardware has available
- * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the
- * physical hardware has available
+ * @kbase_device_sb_entry_count: The number of entries each scoreboard set in the physical hardware has available
  * @kbase_device_has_cross_stream_sync: Whether cross-stream synchronization is supported
  * @kbase_device_supports_gpu_sleep: Whether GPU sleep is supported
  */
@@ -1722,7 +1958,14 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_new_device(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kbase_device_id, kbase_device_gpu_core_count, kbase_device_max_num_csgs, kbase_device_as_count, kbase_device_sb_entry_count, kbase_device_has_cross_stream_sync, kbase_device_supports_gpu_sleep);	\
+				kbase_device_id,	\
+				kbase_device_gpu_core_count,	\
+				kbase_device_max_num_csgs,	\
+				kbase_device_as_count,	\
+				kbase_device_sb_entry_count,	\
+				kbase_device_has_cross_stream_sync,	\
+				kbase_device_supports_gpu_sleep	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_NEW_DEVICE(	\
@@ -1739,8 +1982,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG -
- *   CSG is programmed to a slot
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG - CSG is programmed to a slot
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
@@ -1763,7 +2005,12 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_device_program_csg(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kbase_device_id, kernel_ctx_id, gpu_cmdq_grp_handle, kbase_device_csg_slot_index, kbase_device_csg_slot_resumed);	\
+				kbase_device_id,	\
+				kernel_ctx_id,	\
+				gpu_cmdq_grp_handle,	\
+				kbase_device_csg_slot_index,	\
+				kbase_device_csg_slot_resumed	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_PROGRAM_CSG(	\
@@ -1778,8 +2025,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG -
- *   CSG is deprogrammed from a slot
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG - CSG is deprogrammed from a slot
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
@@ -1796,7 +2042,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_device_deprogram_csg(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kbase_device_id, kbase_device_csg_slot_index);	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_DEPROGRAM_CSG(	\
@@ -1808,13 +2056,13 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG -
- *   CSG is halted
+ * KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG - CSG is halted
  *
  * @kbdev: Kbase device
  * @kbase_device_id: The ID of the physical hardware
  * @kbase_device_csg_slot_index: The index of the slot in the scheduler being programmed
  */
+#if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
 	kbdev,	\
 	kbase_device_id,	\
@@ -1822,15 +2070,24 @@ struct kbase_tlstream;
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & TLSTREAM_ENABLED)	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_device_halt_csg(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kbase_device_id, kbase_device_csg_slot_index);	\
+				kbase_device_id,	\
+				kbase_device_csg_slot_index	\
+				);	\
 	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_DEVICE_HALT_CSG(	\
+	kbdev,	\
+	kbase_device_id,	\
+	kbase_device_csg_slot_index	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_NEW_CTX -
- *   New KBase Context
+ * KBASE_TLSTREAM_TL_KBASE_NEW_CTX - New KBase Context
  *
  * @kbdev: Kbase device
  * @kernel_ctx_id: Unique ID for the KBase Context
@@ -1847,7 +2104,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_new_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kernel_ctx_id, kbase_device_id);	\
+				kernel_ctx_id,	\
+				kbase_device_id	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_NEW_CTX(	\
@@ -1859,8 +2118,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEL_CTX -
- *   Delete KBase Context
+ * KBASE_TLSTREAM_TL_KBASE_DEL_CTX - Delete KBase Context
  *
  * @kbdev: Kbase device
  * @kernel_ctx_id: Unique ID for the KBase Context
@@ -1875,7 +2133,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_del_ctx(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kernel_ctx_id);	\
+				kernel_ctx_id	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_DEL_CTX(	\
@@ -1886,8 +2145,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS -
- *   Address Space is assigned to a KBase context
+ * KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS - Address Space is assigned to a KBase context
  *
  * @kbdev: Kbase device
  * @kernel_ctx_id: Unique ID for the KBase Context
@@ -1904,7 +2162,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_ctx_assign_as(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kernel_ctx_id, kbase_device_as_index);	\
+				kernel_ctx_id,	\
+				kbase_device_as_index	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CTX_ASSIGN_AS(	\
@@ -1916,8 +2176,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS -
- *   Address Space is unassigned from a KBase context
+ * KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS - Address Space is unassigned from a KBase context
  *
  * @kbdev: Kbase device
  * @kernel_ctx_id: Unique ID for the KBase Context
@@ -1932,7 +2191,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_ctx_unassign_as(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kernel_ctx_id);	\
+				kernel_ctx_id	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CTX_UNASSIGN_AS(	\
@@ -1943,19 +2203,19 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE -
- *   New KCPU Queue
+ * KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE - New KCPU Queue
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @kcpu_queue_id: KCPU queue ID
  * @kernel_ctx_id: Unique ID for the KBase Context
- * @kcpuq_num_pending_cmds: Number of commands already enqueued
- * in the KCPU queue
+ * @kcpuq_num_pending_cmds: Number of commands already enqueued in the KCPU queue
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(	\
 	kbdev,	\
 	kcpu_queue,	\
+	kcpu_queue_id,	\
 	kernel_ctx_id,	\
 	kcpuq_num_pending_cmds	\
 	)	\
@@ -1964,12 +2224,17 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_new_kcpuqueue(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, kernel_ctx_id, kcpuq_num_pending_cmds);	\
+				kcpu_queue,	\
+				kcpu_queue_id,	\
+				kernel_ctx_id,	\
+				kcpuq_num_pending_cmds	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_NEW_KCPUQUEUE(	\
 	kbdev,	\
 	kcpu_queue,	\
+	kcpu_queue_id,	\
 	kernel_ctx_id,	\
 	kcpuq_num_pending_cmds	\
 	)	\
@@ -1977,8 +2242,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE -
- *   Delete KCPU Queue
+ * KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE - Delete KCPU Queue
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -1993,7 +2257,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_del_kcpuqueue(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_DEL_KCPUQUEUE(	\
@@ -2004,8 +2269,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL -
- *   KCPU Queue enqueues Signal on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL - KCPU Queue enqueues Signal on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2022,7 +2286,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_signal(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, fence);	\
+				kcpu_queue,	\
+				fence	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_SIGNAL(	\
@@ -2034,8 +2300,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT -
- *   KCPU Queue enqueues Wait on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT - KCPU Queue enqueues Wait on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2052,7 +2317,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_fence_wait(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, fence);	\
+				kcpu_queue,	\
+				fence	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_FENCE_WAIT(	\
@@ -2064,14 +2331,12 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT -
- *   KCPU Queue enqueues Wait on Cross Queue Sync Object
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT - KCPU Queue enqueues Wait on Cross Queue Sync Object
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
  * @cqs_obj_gpu_addr: CQS Object GPU pointer
- * @cqs_obj_compare_value: Semaphore value that should be exceeded
- * for the WAIT to pass
+ * @cqs_obj_compare_value: Semaphore value that should be exceeded for the WAIT to pass
  * @cqs_obj_inherit_error: Flag which indicates if the CQS object error state should be inherited by the queue
  */
 #if MALI_USE_CSF
@@ -2087,7 +2352,11 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value, cqs_obj_inherit_error);	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr,	\
+				cqs_obj_compare_value,	\
+				cqs_obj_inherit_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
@@ -2101,8 +2370,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET -
- *   KCPU Queue enqueues Set on Cross Queue Sync Object
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET - KCPU Queue enqueues Set on Cross Queue Sync Object
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2119,7 +2387,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, cqs_obj_gpu_addr);	\
+				kcpu_queue,	\
+				cqs_obj_gpu_addr	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET(	\
@@ -2131,8 +2401,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT -
- *   KCPU Queue enqueues Map Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT - KCPU Queue enqueues Map Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2149,7 +2418,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_map_import(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, map_import_buf_gpu_addr);	\
+				kcpu_queue,	\
+				map_import_buf_gpu_addr	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(	\
@@ -2161,8 +2432,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT -
- *   KCPU Queue enqueues Unmap Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT - KCPU Queue enqueues Unmap Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2179,7 +2449,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, map_import_buf_gpu_addr);	\
+				kcpu_queue,	\
+				map_import_buf_gpu_addr	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT(	\
@@ -2191,8 +2463,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE -
- *   KCPU Queue enqueues Unmap Import ignoring reference count
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE - KCPU Queue enqueues Unmap Import ignoring reference count
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2209,7 +2480,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, map_import_buf_gpu_addr);	\
+				kcpu_queue,	\
+				map_import_buf_gpu_addr	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE(	\
@@ -2221,8 +2494,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER -
- *   KCPU Queue enqueues Error Barrier
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER - KCPU Queue enqueues Error Barrier
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2237,7 +2509,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(	\
@@ -2248,8 +2521,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND -
- *   KCPU Queue enqueues Group Suspend
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND - KCPU Queue enqueues Group Suspend
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2268,7 +2540,10 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, group_suspend_buf, gpu_cmdq_grp_handle);	\
+				kcpu_queue,	\
+				group_suspend_buf,	\
+				gpu_cmdq_grp_handle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(	\
@@ -2281,8 +2556,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
- *   Begin array of KCPU Queue enqueues JIT Alloc
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Begin array of KCPU Queue enqueues JIT Alloc
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2297,7 +2571,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
@@ -2308,30 +2583,19 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
- *   Array item of KCPU Queue enqueues JIT Alloc
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC - Array item of KCPU Queue enqueues JIT Alloc
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
- * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write
- * the JIT allocated GPU virtual address to
+ * @jit_alloc_gpu_alloc_addr_dest: The GPU virtual address to write the JIT allocated GPU virtual address to
  * @jit_alloc_va_pages: The minimum number of virtual pages required
- * @jit_alloc_commit_pages: The minimum number of physical pages which
- * should back the allocation
- * @jit_alloc_extent: Granularity of physical pages to grow the allocation
- * by during a fault
- * @jit_alloc_jit_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests. Zero is not a valid value
- * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with
- * max_allocations to limit the number of each type of JIT allocation
- * @jit_alloc_max_allocations: The maximum number of allocations
- * allowed within the bin specified by bin_id. Should be the same for all
- * JIT allocations within the same bin.
- * @jit_alloc_flags: Flags specifying the special requirements for the
- * JIT allocation
- * @jit_alloc_usage_id: A hint about which allocation should be
- * reused. The kernel should attempt to use a previous allocation with the same
- * usage_id
+ * @jit_alloc_commit_pages: The minimum number of physical pages which should back the allocation
+ * @jit_alloc_extent: Granularity of physical pages to grow the allocation by during a fault
+ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value
+ * @jit_alloc_bin_id: The JIT allocation bin, used in conjunction with max_allocations to limit the number of each type of JIT allocation
+ * @jit_alloc_max_allocations: The maximum number of allocations allowed within the bin specified by bin_id. Should be the same for all JIT allocations within the same bin.
+ * @jit_alloc_flags: Flags specifying the special requirements for the JIT allocation
+ * @jit_alloc_usage_id: A hint about which allocation should be reused. The kernel should attempt to use a previous allocation with the same usage_id
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
@@ -2352,7 +2616,17 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_alloc(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_alloc_gpu_alloc_addr_dest, jit_alloc_va_pages, jit_alloc_commit_pages, jit_alloc_extent, jit_alloc_jit_id, jit_alloc_bin_id, jit_alloc_max_allocations, jit_alloc_flags, jit_alloc_usage_id);	\
+				kcpu_queue,	\
+				jit_alloc_gpu_alloc_addr_dest,	\
+				jit_alloc_va_pages,	\
+				jit_alloc_commit_pages,	\
+				jit_alloc_extent,	\
+				jit_alloc_jit_id,	\
+				jit_alloc_bin_id,	\
+				jit_alloc_max_allocations,	\
+				jit_alloc_flags,	\
+				jit_alloc_usage_id	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
@@ -2372,8 +2646,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
- *   End array of KCPU Queue enqueues JIT Alloc
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC - End array of KCPU Queue enqueues JIT Alloc
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2388,7 +2661,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_alloc(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(	\
@@ -2399,8 +2673,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE -
- *   Begin array of KCPU Queue enqueues JIT Free
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE - Begin array of KCPU Queue enqueues JIT Free
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2415,7 +2688,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_free(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
@@ -2426,13 +2700,11 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE -
- *   Array item of KCPU Queue enqueues JIT Free
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE - Array item of KCPU Queue enqueues JIT Free
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
- * @jit_alloc_jit_id: Unique ID provided by the caller, this is used
- * to pair allocation and free requests. Zero is not a valid value
+ * @jit_alloc_jit_id: Unique ID provided by the caller, this is used to pair allocation and free requests. Zero is not a valid value
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
@@ -2445,7 +2717,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_enqueue_jit_free(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_alloc_jit_id);	\
+				kcpu_queue,	\
+				jit_alloc_jit_id	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
@@ -2457,8 +2731,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE -
- *   End array of KCPU Queue enqueues JIT Free
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE - End array of KCPU Queue enqueues JIT Free
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2473,7 +2746,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_end_kcpuqueue_enqueue_jit_free(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_FREE(	\
@@ -2484,8 +2758,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START -
- *   KCPU Queue starts a Signal on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START - KCPU Queue starts a Signal on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2500,7 +2773,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(	\
@@ -2511,8 +2785,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END -
- *   KCPU Queue ends a Signal on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END - KCPU Queue ends a Signal on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2529,7 +2802,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
@@ -2541,8 +2816,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START -
- *   KCPU Queue starts a Wait on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START - KCPU Queue starts a Wait on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2557,7 +2831,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START(	\
@@ -2568,8 +2843,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END -
- *   KCPU Queue ends a Wait on Fence
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END - KCPU Queue ends a Wait on Fence
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2586,7 +2860,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
@@ -2598,8 +2874,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START -
- *   KCPU Queue starts a Wait on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START - KCPU Queue starts a Wait on an array of Cross Queue Sync Objects
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2614,7 +2889,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START(	\
@@ -2625,8 +2901,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END -
- *   KCPU Queue ends a Wait on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END - KCPU Queue ends a Wait on an array of Cross Queue Sync Objects
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2643,7 +2918,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
@@ -2655,8 +2932,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET -
- *   KCPU Queue executes a Set on an array of Cross Queue Sync Objects
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET - KCPU Queue executes a Set on an array of Cross Queue Sync Objects
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2673,7 +2949,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(	\
@@ -2685,8 +2963,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START -
- *   KCPU Queue starts a Map Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START - KCPU Queue starts a Map Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2701,7 +2978,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(	\
@@ -2712,8 +2990,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END -
- *   KCPU Queue ends a Map Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END - KCPU Queue ends a Map Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2730,7 +3007,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
@@ -2742,8 +3021,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START -
- *   KCPU Queue starts an Unmap Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START - KCPU Queue starts an Unmap Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2758,7 +3036,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(	\
@@ -2769,8 +3048,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END -
- *   KCPU Queue ends an Unmap Import
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END - KCPU Queue ends an Unmap Import
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2787,7 +3065,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
@@ -2799,8 +3079,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START -
- *   KCPU Queue starts an Unmap Import ignoring reference count
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START - KCPU Queue starts an Unmap Import ignoring reference count
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2815,7 +3094,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(	\
@@ -2826,8 +3106,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END -
- *   KCPU Queue ends an Unmap Import ignoring reference count
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END - KCPU Queue ends an Unmap Import ignoring reference count
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2844,7 +3123,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(	\
@@ -2856,8 +3137,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START -
- *   KCPU Queue starts an array of JIT Allocs
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START - KCPU Queue starts an array of JIT Allocs
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2872,7 +3152,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(	\
@@ -2883,8 +3164,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
- *   Begin array of KCPU Queue ends an array of JIT Allocs
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Begin array of KCPU Queue ends an array of JIT Allocs
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2899,7 +3179,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
@@ -2910,8 +3191,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
- *   Array item of KCPU Queue ends an array of JIT Allocs
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - Array item of KCPU Queue ends an array of JIT Allocs
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2932,7 +3212,11 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
+				kcpu_queue,	\
+				execute_error,	\
+				jit_alloc_gpu_alloc_addr,	\
+				jit_alloc_mmu_flags	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
@@ -2946,8 +3230,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END -
- *   End array of KCPU Queue ends an array of JIT Allocs
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END - End array of KCPU Queue ends an array of JIT Allocs
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2962,7 +3245,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
@@ -2973,8 +3257,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START -
- *   KCPU Queue starts an array of JIT Frees
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START - KCPU Queue starts an array of JIT Frees
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -2989,7 +3272,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_free_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(	\
@@ -3000,8 +3284,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END -
- *   Begin array of KCPU Queue ends an array of JIT Frees
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END - Begin array of KCPU Queue ends an array of JIT Frees
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -3016,7 +3299,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
@@ -3027,14 +3311,12 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END -
- *   Array item of KCPU Queue ends an array of JIT Frees
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END - Array item of KCPU Queue ends an array of JIT Frees
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
  * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
- * @jit_free_pages_used: The actual number of pages used by the JIT
- * allocation
+ * @jit_free_pages_used: The actual number of pages used by the JIT allocation
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
@@ -3048,7 +3330,10 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error, jit_free_pages_used);	\
+				kcpu_queue,	\
+				execute_error,	\
+				jit_free_pages_used	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
@@ -3061,8 +3346,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END -
- *   End array of KCPU Queue ends an array of JIT Frees
+ * KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END - End array of KCPU Queue ends an array of JIT Frees
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -3077,7 +3361,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
@@ -3088,8 +3373,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER -
- *   KCPU Queue executes an Error Barrier
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER - KCPU Queue executes an Error Barrier
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -3104,7 +3388,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(	\
@@ -3115,8 +3400,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START -
- *   KCPU Queue starts a group suspend
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START - KCPU Queue starts a group suspend
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -3131,7 +3415,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(	\
@@ -3142,8 +3427,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END -
- *   KCPU Queue ends a group suspend
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END - KCPU Queue ends a group suspend
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
@@ -3160,7 +3444,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, execute_error);	\
+				kcpu_queue,	\
+				execute_error	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(	\
@@ -3172,8 +3458,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING -
- *   CSF FW is being reloaded
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING - CSF FW is being reloaded
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3188,7 +3473,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_reloading(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_RELOADING(	\
@@ -3199,8 +3485,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING -
- *   CSF FW is being enabled
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING - CSF FW is being enabled
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3215,7 +3500,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_enabling(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_ENABLING(	\
@@ -3226,8 +3512,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP -
- *   CSF FW sleep is requested
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP - CSF FW sleep is requested
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3242,7 +3527,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_request_sleep(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_SLEEP(	\
@@ -3253,8 +3539,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP -
- *   CSF FW wake up is requested
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP - CSF FW wake up is requested
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3269,7 +3554,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_request_wakeup(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_WAKEUP(	\
@@ -3280,8 +3566,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT -
- *   CSF FW halt is requested
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT - CSF FW halt is requested
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3296,7 +3581,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_request_halt(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_REQUEST_HALT(	\
@@ -3307,8 +3593,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING -
- *   CSF FW is being disabled
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING - CSF FW is being disabled
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3323,7 +3608,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_disabling(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_DISABLING(	\
@@ -3334,8 +3620,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF -
- *   CSF FW is off
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF - CSF FW is off
  *
  * @kbdev: Kbase device
  * @csffw_cycle: Cycle number of a CSFFW event
@@ -3350,7 +3635,8 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_fw_off(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_cycle);	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_FW_OFF(	\
@@ -3361,8 +3647,7 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
- *   An overflow has happened with the CSFFW Timeline stream
+ * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW - An overflow has happened with the CSFFW Timeline stream
  *
  * @kbdev: Kbase device
  * @csffw_timestamp: Timestamp of a CSFFW event
@@ -3379,7 +3664,9 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSFFW_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_csffw_tlstream_overflow(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				csffw_timestamp, csffw_cycle);	\
+				csffw_timestamp,	\
+				csffw_cycle	\
+				);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW(	\
@@ -3391,218 +3678,304 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_JS_SCHED_START -
- *   Scheduling starts
+ * KBASE_TLSTREAM_AUX_PM_STATE - PM state
  *
  * @kbdev: Kbase device
- * @dummy: dummy argument
+ * @core_type: Core type (shader, tiler, l2 cache, l3 cache)
+ * @core_state_bitset: 64bits bitmask reporting power state of the cores (1-ON, 0-OFF)
  */
-#define KBASE_TLSTREAM_TL_JS_SCHED_START(	\
+#define KBASE_TLSTREAM_AUX_PM_STATE(	\
 	kbdev,	\
-	dummy	\
+	core_type,	\
+	core_state_bitset	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_js_sched_start(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				dummy);	\
+			__kbase_tlstream_aux_pm_state(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				core_type,	\
+				core_state_bitset	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JS_SCHED_END -
- *   Scheduling ends
+ * KBASE_TLSTREAM_AUX_PAGEFAULT - Page fault
  *
  * @kbdev: Kbase device
- * @dummy: dummy argument
+ * @ctx_nr: Kernel context number
+ * @as_nr: Address space number
+ * @page_cnt_change: Number of pages to be added
  */
-#define KBASE_TLSTREAM_TL_JS_SCHED_END(	\
+#define KBASE_TLSTREAM_AUX_PAGEFAULT(	\
 	kbdev,	\
-	dummy	\
+	ctx_nr,	\
+	as_nr,	\
+	page_cnt_change	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_js_sched_end(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				dummy);	\
+			__kbase_tlstream_aux_pagefault(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr,	\
+				as_nr,	\
+				page_cnt_change	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START -
- *   Submitting an atom starts
+ * KBASE_TLSTREAM_AUX_PAGESALLOC - Total alloc pages change
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @ctx_nr: Kernel context number
+ * @page_cnt: Number of pages used by the context
  */
-#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_START(	\
+#define KBASE_TLSTREAM_AUX_PAGESALLOC(	\
 	kbdev,	\
-	atom	\
+	ctx_nr,	\
+	page_cnt	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_submit_atom_start(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_pagesalloc(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr,	\
+				page_cnt	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END -
- *   Submitting an atom ends
+ * KBASE_TLSTREAM_AUX_DEVFREQ_TARGET - New device frequency target
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @target_freq: New target frequency
  */
-#define KBASE_TLSTREAM_TL_JD_SUBMIT_ATOM_END(	\
+#define KBASE_TLSTREAM_AUX_DEVFREQ_TARGET(	\
 	kbdev,	\
-	atom	\
+	target_freq	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_submit_atom_end(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_devfreq_target(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				target_freq	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START -
- *   Within function jd_done_nolock
+ * KBASE_TLSTREAM_AUX_JIT_STATS - per-bin JIT statistics
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @ctx_nr: Kernel context number
+ * @bid: JIT bin id
+ * @max_allocs: Maximum allocations allowed in this bin.
+ * @allocs: Number of active allocations in this bin
+ * @va_pages: Number of virtual pages allocated in this bin
+ * @ph_pages: Number of physical pages allocated in this bin
  */
-#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_START(	\
+#define KBASE_TLSTREAM_AUX_JIT_STATS(	\
 	kbdev,	\
-	atom	\
+	ctx_nr,	\
+	bid,	\
+	max_allocs,	\
+	allocs,	\
+	va_pages,	\
+	ph_pages	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_done_no_lock_start(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_jit_stats(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr,	\
+				bid,	\
+				max_allocs,	\
+				allocs,	\
+				va_pages,	\
+				ph_pages	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END -
- *   Within function jd_done_nolock - end
+ * KBASE_TLSTREAM_AUX_TILER_HEAP_STATS - Tiler Heap statistics
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @ctx_nr: Kernel context number
+ * @heap_id: Unique id used to represent a heap under a context
+ * @va_pages: Number of virtual pages allocated in this bin
+ * @ph_pages: Number of physical pages allocated in this bin
+ * @max_chunks: The maximum number of chunks that the heap should be allowed to use
+ * @chunk_size: Size of each chunk in tiler heap, in bytes
+ * @chunk_count: The number of chunks currently allocated in the tiler heap
+ * @target_in_flight: Number of render-passes that the driver should attempt to keep in flight for which allocation of new chunks is allowed
+ * @nr_in_flight: Number of render-passes that are in flight
  */
-#define KBASE_TLSTREAM_TL_JD_DONE_NO_LOCK_END(	\
+#define KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(	\
 	kbdev,	\
-	atom	\
+	ctx_nr,	\
+	heap_id,	\
+	va_pages,	\
+	ph_pages,	\
+	max_chunks,	\
+	chunk_size,	\
+	chunk_count,	\
+	target_in_flight,	\
+	nr_in_flight	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_done_no_lock_end(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_tiler_heap_stats(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx_nr,	\
+				heap_id,	\
+				va_pages,	\
+				ph_pages,	\
+				max_chunks,	\
+				chunk_size,	\
+				chunk_count,	\
+				target_in_flight,	\
+				nr_in_flight	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_DONE_START -
- *   Start of kbase_jd_done
+ * KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT - event on a given job slot
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @ctx: Name of the context object
+ * @slot_nr: Job slot number
+ * @atom_nr: Sequential number of an atom
+ * @event: Event type. One of TL_JS_EVENT values
  */
-#define KBASE_TLSTREAM_TL_JD_DONE_START(	\
+#define KBASE_TLSTREAM_AUX_EVENT_JOB_SLOT(	\
 	kbdev,	\
-	atom	\
+	ctx,	\
+	slot_nr,	\
+	atom_nr,	\
+	event	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_done_start(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_event_job_slot(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				ctx,	\
+				slot_nr,	\
+				atom_nr,	\
+				event	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_DONE_END -
- *   End of kbase_jd_done
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START - enter protected mode start
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @gpu: Name of the GPU object
  */
-#define KBASE_TLSTREAM_TL_JD_DONE_END(	\
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_START(	\
 	kbdev,	\
-	atom	\
+	gpu	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_done_end(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_protected_enter_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE -
- *   Atom marked complete
+ * KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END - enter protected mode end
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
+ * @gpu: Name of the GPU object
  */
-#define KBASE_TLSTREAM_TL_JD_ATOM_COMPLETE(	\
+#define KBASE_TLSTREAM_AUX_PROTECTED_ENTER_END(	\
 	kbdev,	\
-	atom	\
+	gpu	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_jd_atom_complete(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom);	\
+			__kbase_tlstream_aux_protected_enter_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RUN_ATOM_START -
- *   Running of atom starts
+ * KBASE_TLSTREAM_AUX_MMU_COMMAND - mmu commands with synchronicity info
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
- * @atom_nr: Sequential number of an atom
+ * @kernel_ctx_id: Unique ID for the KBase Context
+ * @mmu_cmd_id: MMU Command ID (e.g AS_COMMAND_UPDATE)
+ * @mmu_synchronicity: Indicates whether the command is related to current running job that needs to be resolved to make it progress (synchronous, e.g. grow on page fault, JIT) or not (asynchronous, e.g. IOCTL calls from user-space). This param will be 0 if it is an asynchronous operation.
+ * @mmu_lock_addr: start address of regions to be locked/unlocked/invalidated
+ * @mmu_lock_page_num: number of pages to be locked/unlocked/invalidated
  */
-#define KBASE_TLSTREAM_TL_RUN_ATOM_START(	\
+#define KBASE_TLSTREAM_AUX_MMU_COMMAND(	\
 	kbdev,	\
-	atom,	\
-	atom_nr	\
+	kernel_ctx_id,	\
+	mmu_cmd_id,	\
+	mmu_synchronicity,	\
+	mmu_lock_addr,	\
+	mmu_lock_page_num	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_run_atom_start(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, atom_nr);	\
+			__kbase_tlstream_aux_mmu_command(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				kernel_ctx_id,	\
+				mmu_cmd_id,	\
+				mmu_synchronicity,	\
+				mmu_lock_addr,	\
+				mmu_lock_page_num	\
+				);	\
 	} while (0)
 
 /**
- * KBASE_TLSTREAM_TL_RUN_ATOM_END -
- *   Running of atom ends
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START - leave protected mode start
  *
  * @kbdev: Kbase device
- * @atom: Atom identifier
- * @atom_nr: Sequential number of an atom
+ * @gpu: Name of the GPU object
  */
-#define KBASE_TLSTREAM_TL_RUN_ATOM_END(	\
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_START(	\
 	kbdev,	\
-	atom,	\
-	atom_nr	\
+	gpu	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
-		if (enabled & TLSTREAM_ENABLED)	\
-			__kbase_tlstream_tl_run_atom_end(	\
-				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				atom, atom_nr);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_start(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu	\
+				);	\
 	} while (0)
 
+/**
+ * KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END - leave protected mode end
+ *
+ * @kbdev: Kbase device
+ * @gpu: Name of the GPU object
+ */
+#define KBASE_TLSTREAM_AUX_PROTECTED_LEAVE_END(	\
+	kbdev,	\
+	gpu	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_LATENCY_TRACEPOINTS)	\
+			__kbase_tlstream_aux_protected_leave_end(	\
+				__TL_DISPATCH_STREAM(kbdev, aux),	\
+				gpu	\
+				);	\
+	} while (0)
 
 /* Gator tracepoints are hooked into TLSTREAM interface.
  * When the following tracepoints are called, corresponding
author	Siddharth Kapoor <ksiddharth@google.com>	2022-03-02 14:51:29 +0800
committer	Siddharth Kapoor <ksiddharth@google.com>	2022-03-02 14:51:29 +0800
commit	88d7d984fed1c2a4358ce2bbc334e82d71e3a391 (patch)
tree	18f20402a0ed15ae9fe62b29a9957922ebcc2ada /mali_kbase
parent	0207d6c3b7a2002f15c60d08617e956faf5ba90c (diff)
download	gpu-88d7d984fed1c2a4358ce2bbc334e82d71e3a391.tar.gz