Mali Valhall DDK r28p0 KMD

Provenance: f61f43e2c (collaborate/EAC/v_r28p0) VX504X08X-BU-00000-r28p0-01eac0 - Android DDK VX504X08X-SW-99006-r28p0-01eac0 - Android Renderscript AOSP parts VX504X08X-BU-60000-r28p0-01eac0 - Android Document Bundle VX504X08X-DC-11001-r28p0-01eac0 - Valhall Android DDK Software Errata Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Iafabf59869cc06a23d69668f6ae1a152cb86b7f3
author: Sidath Senanayake <sidaths@google.com> 2021-01-29 14:51:21 +0000
committer: Sidath Senanayake <sidaths@google.com> 2021-01-29 14:51:21 +0000
commit: 201c8bfb4637601363b6e9283f3bdc510711a226 (patch)
tree: afa8b543c81e78e5b82156be5d5266060c71e069
parent: 72f2457ff7355ff0389efe5bc9cec3365362d8c4 (diff)
download: gpu-201c8bfb4637601363b6e9283f3bdc510711a226.tar.gz
106 files changed, 6400 insertions, 1553 deletions
diff --git a/common/include/linux/protected_memory_allocator.h b/common/include/linux/protected_memory_allocator.h
new file mode 100644
index 0000000..3b9205b
--- /dev/null
+++ b/common/include/linux/protected_memory_allocator.h
@@ -0,0 +1,110 @@
+/*
+ *
+ * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _PROTECTED_MEMORY_ALLOCATOR_H_
+#define _PROTECTED_MEMORY_ALLOCATOR_H_
+
+#include <linux/mm.h>
+
+/**
+ * struct protected_memory_allocation - Protected memory allocation
+ *
+ * @pa:    Physical address of the protected memory allocation.
+ * @order: Size of memory allocation in pages, as a base-2 logarithm.
+ */
+struct protected_memory_allocation {
+	phys_addr_t pa;
+	unsigned int order;
+};
+
+struct protected_memory_allocator_device;
+
+/**
+ * struct protected_memory_allocator_ops - Callbacks for protected memory
+ *                                         allocator operations
+ *
+ * @pma_alloc_page:    Callback to allocate protected memory
+ * @pma_get_phys_addr: Callback to get the physical address of an allocation
+ * @pma_free_page:     Callback to free protected memory
+ */
+struct protected_memory_allocator_ops {
+	/**
+	 * pma_alloc_page - Allocate protected memory pages
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @order:   How many pages to allocate, as a base-2 logarithm.
+	 *
+	 * Return: Pointer to allocated memory, or NULL if allocation failed.
+	 */
+	struct protected_memory_allocation *(*pma_alloc_page)(
+		struct protected_memory_allocator_device *pma_dev,
+		unsigned int order);
+
+	/**
+	 * pma_get_phys_addr - Get the physical address of the protected memory
+	 *                     allocation
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @pma:     The protected memory allocation whose physical address
+	 *           shall be retrieved
+	 *
+	 * Return: The physical address of the given allocation.
+	 */
+	phys_addr_t (*pma_get_phys_addr)(
+		struct protected_memory_allocator_device *pma_dev,
+		struct protected_memory_allocation *pma);
+
+	/**
+	 * pma_free_page - Free a page of memory
+	 *
+	 * @pma_dev: The protected memory allocator the request is being made
+	 *           through.
+	 * @pma:     The protected memory allocation to free.
+	 */
+	void (*pma_free_page)(
+		struct protected_memory_allocator_device *pma_dev,
+		struct protected_memory_allocation *pma);
+};
+
+/**
+ * struct protected_memory_allocator_device - Device structure for protected
+ *                                            memory allocator
+ *
+ * @ops:   Callbacks associated with this device
+ * @owner: Pointer to the module owner
+ *
+ * In order for a system integrator to provide custom behaviors for protected
+ * memory operations performed by the kbase module (controller driver),
+ * they shall provide a platform-specific driver module which implements
+ * this interface.
+ *
+ * This structure should be registered with the platform device using
+ * platform_set_drvdata().
+ */
+struct protected_memory_allocator_device {
+	struct protected_memory_allocator_ops ops;
+	struct module *owner;
+};
+
+#endif /* _PROTECTED_MEMORY_ALLOCATOR_H_ */
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index a1a08d3..5b495c9 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -21,7 +21,7 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r27p0-01eac0"
+MALI_RELEASE_NAME ?= "r28p0-01eac0"
 
 # Paths required for build
 
@@ -116,6 +116,7 @@ SRC := \
 	mali_kbase_strings.c \
 	mali_kbase_as_fault_debugfs.c \
 	mali_kbase_regs_history_debugfs.c \
+	mali_kbase_dvfs_debugfs.c \
 	mali_power_gpu_frequency_trace.c \
 	mali_kbase_trace_gpu_mem.c \
 	thirdparty/mali_kbase_mmap.c \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index ca59dbb..06f428f 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -277,10 +277,20 @@ config MALI_JOB_DUMP
 	  minimal overhead when not in use. Enable only if you know what
 	  you are doing.
 
+choice
+	prompt "Performance counters set"
+	default MALI_PRFCNT_SET_PRIMARY
+	depends on MALI_MIDGARD && MALI_EXPERT
+
+config MALI_PRFCNT_SET_PRIMARY
+	bool "Primary"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	help
+	  Select this option to use primary set of performance counters.
+
 config MALI_PRFCNT_SET_SECONDARY
-	bool "Use secondary set of performance counters"
+	bool "Secondary"
 	depends on MALI_MIDGARD && MALI_EXPERT
-	default n
 	help
 	  Select this option to use secondary set of performance counters. Kernel
 	  features that depend on an access to the primary set of counters may
@@ -288,21 +298,43 @@ config MALI_PRFCNT_SET_SECONDARY
 	  from working optimally and may cause instrumentation tools to return
 	  bogus results.
 
-	  If unsure, say N.
+	  If unsure, use MALI_PRFCNT_SET_PRIMARY.
+
+config MALI_PRFCNT_SET_TERTIARY
+	bool "Tertiary"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	help
+	  Select this option to use tertiary set of performance counters. Kernel
+	  features that depend on an access to the primary set of counters may
+	  become unavailable. Enabling this option will prevent power management
+	  from working optimally and may cause instrumentation tools to return
+	  bogus results.
 
-config MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	bool "Use secondary set of performance counters"
-	depends on MALI_MIDGARD && MALI_EXPERT && !MALI_PRFCNT_SET_SECONDARY && DEBUG_FS
+	  If unsure, use MALI_PRFCNT_SET_PRIMARY.
+
+endchoice
+
+config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	bool "Allow runtime selection of performance counters set via debugfs"
+	depends on MALI_MIDGARD && MALI_EXPERT && DEBUG_FS
 	default n
 	help
 	  Select this option to make the secondary set of performance counters
 	  available at runtime via debugfs. Kernel features that depend on an
 	  access to the primary set of counters may become unavailable.
 
+	  If no runtime debugfs option is set, the build time counter set
+	  choice will be used.
+
 	  This feature is unsupported and unstable, and may break at any time.
 	  Enabling this option will prevent power management from working
 	  optimally and may cause instrumentation tools to return bogus results.
 
+	  No validation is done on the debugfs input. Invalid input could cause
+	  performance counter errors. Valid inputs are the values accepted by
+	  the SET_SELECT bits of the PRFCNT_CONFIG register as defined in the
+	  architecture specification.
+
 	  If unsure, say N.
 
 source "drivers/gpu/arm/midgard/platform/Kconfig"
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index b29d659..a70c76c 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -273,6 +273,9 @@ config MALI_GEM5_BUILD
 # Instrumentation options.
 
 # config MALI_JOB_DUMP exists in the Kernel Kconfig but is configured using CINSTR_JOB_DUMP in Mconfig.
+# config MALI_PRFCNT_SET_PRIMARY exists in the Kernel Kconfig but is configured using CINSTR_PRIMARY_HWC in Mconfig.
 # config MALI_PRFCNT_SET_SECONDARY exists in the Kernel Kconfig but is configured using CINSTR_SECONDARY_HWC in Mconfig.
+# config MALI_PRFCNT_SET_TERTIARY exists in the Kernel Kconfig but is configured using CINSTR_TERTIARY_HWC in Mconfig.
+# config MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS exists in the Kernel Kconfig but is configured using CINSTR_HWC_SET_SELECT_VIA_DEBUG_FS in Mconfig.
 
 source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/mali_kbase/arbiter/mali_kbase_arbif.c b/mali_kbase/arbiter/mali_kbase_arbif.c
index ddf1a0c..3bfc62d 100644
--- a/mali_kbase/arbiter/mali_kbase_arbif.c
+++ b/mali_kbase/arbiter/mali_kbase_arbif.c
@@ -34,6 +34,12 @@
 #include <linux/of_platform.h>
 #include "mali_kbase_arbiter_interface.h"
 
+/**
+ * on_gpu_stop() - sends KBASE_VM_GPU_STOP_EVT event on VM stop
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU STOP event from arbiter interface
+ */
 static void on_gpu_stop(struct device *dev)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
@@ -42,6 +48,12 @@ static void on_gpu_stop(struct device *dev)
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_STOP_EVT);
 }
 
+/**
+ * on_gpu_granted() - sends KBASE_VM_GPU_GRANTED_EVT event on GPU granted
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU GRANT event from arbiter interface
+ */
 static void on_gpu_granted(struct device *dev)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
@@ -50,6 +62,12 @@ static void on_gpu_granted(struct device *dev)
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_GRANTED_EVT);
 }
 
+/**
+ * on_gpu_lost() - sends KBASE_VM_GPU_LOST_EVT event  on GPU granted
+ * @dev: arbiter interface device handle
+ *
+ * call back function to signal a GPU LOST event from arbiter interface
+ */
 static void on_gpu_lost(struct device *dev)
 {
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
@@ -57,6 +75,14 @@ static void on_gpu_lost(struct device *dev)
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_LOST_EVT);
 }
 
+/**
+ * kbase_arbif_init() - Kbase Arbiter interface initialisation.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Initialise Kbase Arbiter interface and assign callback functions.
+ *
+ * Return: 0 on success else a Linux error code
+ */
 int kbase_arbif_init(struct kbase_device *kbdev)
 {
 #ifdef CONFIG_OF
@@ -119,6 +145,12 @@ int kbase_arbif_init(struct kbase_device *kbdev)
 	return 0;
 }
 
+/**
+ * kbase_arbif_destroy() - De-init Kbase arbiter interface
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * De-initialise Kbase arbiter interface
+ */
 void kbase_arbif_destroy(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -133,6 +165,12 @@ void kbase_arbif_destroy(struct kbase_device *kbdev)
 	kbdev->arb.arb_dev = NULL;
 }
 
+/**
+ * kbase_arbif_gpu_request() - Request GPU from
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * call back function from arb interface to arbiter requesting GPU for VM
+ */
 void kbase_arbif_gpu_request(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -143,6 +181,12 @@ void kbase_arbif_gpu_request(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbif_gpu_stopped() - send GPU stopped message to the arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @gpu_required: GPU request flag
+ *
+ */
 void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -154,6 +198,12 @@ void kbase_arbif_gpu_stopped(struct kbase_device *kbdev, u8 gpu_required)
 	}
 }
 
+/**
+ * kbase_arbif_gpu_active() - Sends a GPU_ACTIVE message to the Arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Informs the arbiter VM is active
+ */
 void kbase_arbif_gpu_active(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
@@ -164,6 +214,12 @@ void kbase_arbif_gpu_active(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbif_gpu_idle() - Inform the arbiter that the VM has gone idle
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Informs the arbiter VM is idle
+ */
 void kbase_arbif_gpu_idle(struct kbase_device *kbdev)
 {
 	struct arbiter_if_dev *arb_if = kbdev->arb.arb_if;
diff --git a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
index 02b5de2..1fc432b 100644
--- a/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
+++ b/mali_kbase/arbiter/mali_kbase_arbiter_pm.c
@@ -39,6 +39,13 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev);
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev);
 
+/**
+ * kbase_arbiter_pm_vm_state_str() - Helper function to get string
+ *                                   for kbase VM state.(debug)
+ * @state: kbase VM state
+ *
+ * Return: string representation of Kbase_vm_state
+ */
 static inline const char *kbase_arbiter_pm_vm_state_str(
 	enum kbase_vm_state state)
 {
@@ -73,6 +80,13 @@ static inline const char *kbase_arbiter_pm_vm_state_str(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_event_str() - Helper function to get string
+ *                                   for kbase VM event.(debug)
+ * @evt: kbase VM state
+ *
+ * Return: String representation of Kbase_arbif_event
+ */
 static inline const char *kbase_arbiter_pm_vm_event_str(
 	enum kbase_arbif_evt evt)
 {
@@ -99,6 +113,13 @@ static inline const char *kbase_arbiter_pm_vm_event_str(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_set_state() - Sets new kbase_arbiter_vm_state
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @new_state: kbase VM new state
+ *
+ * This function sets the new state for the VM
+ */
 static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
 	enum kbase_vm_state new_state)
 {
@@ -107,11 +128,19 @@ static void kbase_arbiter_pm_vm_set_state(struct kbase_device *kbdev,
 	dev_dbg(kbdev->dev, "VM set_state %s -> %s",
 	kbase_arbiter_pm_vm_state_str(arb_vm_state->vm_state),
 	kbase_arbiter_pm_vm_state_str(new_state));
+
 	lockdep_assert_held(&arb_vm_state->vm_state_lock);
 	arb_vm_state->vm_state = new_state;
 	wake_up(&arb_vm_state->vm_state_wait);
 }
 
+/**
+ * kbase_arbiter_pm_suspend_wq() - suspend work queue of the driver.
+ * @data: work queue
+ *
+ * Suspends work queue of the driver, when VM is in SUSPEND_PENDING or
+ * STOPPING_IDLE or STOPPING_ACTIVE state
+ */
 static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
@@ -136,6 +165,13 @@ static void kbase_arbiter_pm_suspend_wq(struct work_struct *data)
 	dev_dbg(kbdev->dev, "<%s\n", __func__);
 }
 
+/**
+ * kbase_arbiter_pm_resume_wq() -Kbase resume work queue.
+ * @data: work item
+ *
+ * Resume work queue of the driver when VM is in STARTING state,
+ * else if its in STOPPING_ACTIVE will request a stop event.
+ */
 static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = container_of(data,
@@ -160,6 +196,16 @@ static void kbase_arbiter_pm_resume_wq(struct work_struct *data)
 	dev_dbg(kbdev->dev, "<%s\n", __func__);
 }
 
+/**
+ * kbase_arbiter_pm_early_init() - Initialize arbiter for VM
+ *                                 Paravirtualized use.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Initialize the arbiter and other required resources during the runtime
+ * and request the GPU for the VM for the first time.
+ *
+ * Return: 0 if success, or a Linux error code
+ */
 int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 {
 	int err;
@@ -179,6 +225,7 @@ int kbase_arbiter_pm_early_init(struct kbase_device *kbdev)
 		WQ_HIGHPRI);
 	if (!arb_vm_state->vm_arb_wq) {
 		dev_err(kbdev->dev, "Failed to allocate vm_arb workqueue\n");
+		kfree(arb_vm_state);
 		return -ENOMEM;
 	}
 	INIT_WORK(&arb_vm_state->vm_suspend_work, kbase_arbiter_pm_suspend_wq);
@@ -210,6 +257,12 @@ arbif_init_fail:
 	return err;
 }
 
+/**
+ * kbase_arbiter_pm_early_term() - Shutdown arbiter and free resources
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Clean up all the resources
+ */
 void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -227,6 +280,12 @@ void kbase_arbiter_pm_early_term(struct kbase_device *kbdev)
 	kbdev->pm.arb_vm_state = NULL;
 }
 
+/**
+ * kbase_arbiter_pm_release_interrupts() - Release the GPU interrupts
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Releases interrupts if needed (GPU is available) otherwise does nothing
+ */
 void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -240,6 +299,12 @@ void kbase_arbiter_pm_release_interrupts(struct kbase_device *kbdev)
 	mutex_unlock(&arb_vm_state->vm_state_lock);
 }
 
+/**
+ * kbase_arbiter_pm_vm_stopped() - Handle stop state for the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles a stop state for the VM
+ */
 void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 {
 	bool request_gpu = false;
@@ -277,6 +342,12 @@ void kbase_arbiter_pm_vm_stopped(struct kbase_device *kbdev)
 	kbase_arbif_gpu_stopped(kbdev, request_gpu);
 }
 
+/**
+ * kbase_arbiter_pm_vm_gpu_start() - Handles the start state of the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles the start state of the VM
+ */
 static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -306,6 +377,12 @@ static void kbase_arbiter_pm_vm_gpu_start(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_gpu_stop() - Handles the stop state of the VM
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Handles the start state of the VM
+ */
 static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -348,6 +425,12 @@ static void kbase_arbiter_pm_vm_gpu_stop(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_gpu_lost() - Kbase signals GPU is lost on a lost event signal
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * On GPU lost event signals GPU_LOST to the aribiter
+ */
 static void kbase_gpu_lost(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -396,6 +479,13 @@ static void kbase_gpu_lost(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_suspend_ready_state() - checks if VM is ready
+ *			to be moved to suspended state.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Return: True if its ready to be suspended else False.
+ */
 static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(
 	struct kbase_device *kbdev)
 {
@@ -410,6 +500,14 @@ static inline bool kbase_arbiter_pm_vm_os_suspend_ready_state(
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_prepare_suspend() - Prepare OS to be in suspend state
+ *                             until it receives the grant message from arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Prepares OS to be in suspend state until it receives GRANT message
+ * from Arbiter asynchronously.
+ */
 static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -475,6 +573,14 @@ static void kbase_arbiter_pm_vm_os_prepare_suspend(struct kbase_device *kbdev)
 	}
 }
 
+/**
+ * kbase_arbiter_pm_vm_os_resume() - Resume OS function once it receives
+ *                                   a grant message from arbiter
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Resume OS function once it receives GRANT message
+ * from Arbiter asynchronously.
+ */
 static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -498,6 +604,14 @@ static void kbase_arbiter_pm_vm_os_resume(struct kbase_device *kbdev)
 	mutex_lock(&arb_vm_state->vm_state_lock);
 }
 
+/**
+ * kbase_arbiter_pm_vm_event() - Dispatch VM event to the state machine.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @evt: VM event
+ *
+ * The state machine function. Receives events and transitions states
+ * according the event received and the current state
+ */
 void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 	enum kbase_arbif_evt evt)
 {
@@ -586,6 +700,12 @@ void kbase_arbiter_pm_vm_event(struct kbase_device *kbdev,
 
 KBASE_EXPORT_TEST_API(kbase_arbiter_pm_vm_event);
 
+/**
+ * kbase_arbiter_pm_vm_wait_gpu_assignment() - VM wait for a GPU assignment.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * VM waits for a GPU assignment.
+ */
 static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 {
 	struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -597,6 +717,12 @@ static void kbase_arbiter_pm_vm_wait_gpu_assignment(struct kbase_device *kbdev)
 	dev_dbg(kbdev->dev, "Waiting for GPU assignment - done\n");
 }
 
+/**
+ * kbase_arbiter_pm_vm_gpu_assigned_lockheld() - Check if VM holds VM state lock
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * Checks if the virtual machine holds VM state lock.
+ */
 static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 	struct kbase_device *kbdev)
 {
@@ -607,6 +733,19 @@ static inline bool kbase_arbiter_pm_vm_gpu_assigned_lockheld(
 		arb_vm_state->vm_state == KBASE_VM_STATE_ACTIVE);
 }
 
+/**
+ * kbase_arbiter_pm_ctx_active_handle_suspend() - Handle suspend operation for
+ *                                                arbitration mode
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @suspend_handler: The handler code for how to handle a suspend
+ *                   that might occur
+ *
+ * This function handles a suspend event from the driver,
+ * communicating with the arbiter and waiting synchronously for the GPU
+ * to be granted again depending on the VM state.
+ *
+ * Return: 0 on success else 1 suspend handler isn not possible.
+ */
 int kbase_arbiter_pm_ctx_active_handle_suspend(struct kbase_device *kbdev,
 	enum kbase_pm_suspend_handler suspend_handler)
 {
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 54b0748..7c5001e 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -75,12 +75,12 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 
 	/* Configure */
 	prfcnt_config = kctx->as_nr << PRFCNT_CONFIG_AS_SHIFT;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	if (kbdev->hwcnt.backend.use_secondary_override)
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	prfcnt_config |= kbdev->hwcnt.backend.override_counter_set
+			 << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #else
-	if (enable->use_secondary)
+	prfcnt_config |= enable->counter_set << PRFCNT_CONFIG_SETSELECT_SHIFT;
 #endif
-		prfcnt_config |= 1 << PRFCNT_CONFIG_SETSELECT_SHIFT;
 
 #if MALI_USE_CSF
 	kbase_reg_write(kbdev, GPU_CONTROL_MCU_REG(PRFCNT_CONFIG),
@@ -209,7 +209,6 @@ int kbase_instr_hwcnt_disable_internal(struct kbase_context *kctx)
 									kctx);
 
 	err = 0;
-
  out:
 	return err;
 }
@@ -305,39 +304,6 @@ bool kbase_instr_hwcnt_dump_complete(struct kbase_context *kctx,
 }
 KBASE_EXPORT_SYMBOL(kbase_instr_hwcnt_dump_complete);
 
-void kbasep_cache_clean_worker(struct work_struct *data)
-{
-	struct kbase_device *kbdev;
-	unsigned long flags, pm_flags;
-
-	kbdev = container_of(data, struct kbase_device,
-						hwcnt.backend.cache_clean_work);
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, pm_flags);
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-
-	/* Clean and invalidate the caches so we're sure the mmu tables for the
-	 * dump buffer is valid.
-	 */
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-					KBASE_INSTR_STATE_REQUEST_CLEAN);
-	kbase_gpu_start_cache_clean_nolock(kbdev);
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, pm_flags);
-
-	kbase_gpu_wait_cache_clean(kbdev);
-
-	spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
-	KBASE_DEBUG_ASSERT(kbdev->hwcnt.backend.state ==
-					KBASE_INSTR_STATE_REQUEST_CLEAN);
-	/* All finished and idle */
-	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
-	kbdev->hwcnt.backend.triggered = 1;
-	wake_up(&kbdev->hwcnt.backend.wait);
-
-	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-}
-
 #if MALI_USE_CSF
 /**
  * kbasep_hwcnt_irq_poll_tasklet - tasklet to poll MCU IRQ status register
@@ -395,20 +361,10 @@ void kbase_instr_hwcnt_sample_done(struct kbase_device *kbdev)
 		kbdev->hwcnt.backend.triggered = 1;
 		wake_up(&kbdev->hwcnt.backend.wait);
 	} else if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING) {
-		if (kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
-			/* All finished and idle */
-			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
-			kbdev->hwcnt.backend.triggered = 1;
-			wake_up(&kbdev->hwcnt.backend.wait);
-		} else {
-			int ret;
-			/* Always clean and invalidate the cache after a successful dump
-			 */
-			kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_REQUEST_CLEAN;
-			ret = queue_work(kbdev->hwcnt.backend.cache_clean_wq,
-						&kbdev->hwcnt.backend.cache_clean_work);
-			KBASE_DEBUG_ASSERT(ret);
-		}
+		/* All finished and idle */
+		kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_IDLE;
+		kbdev->hwcnt.backend.triggered = 1;
+		wake_up(&kbdev->hwcnt.backend.wait);
 	}
 
 	spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
@@ -480,8 +436,6 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
 	kbdev->hwcnt.backend.state = KBASE_INSTR_STATE_DISABLED;
 
 	init_waitqueue_head(&kbdev->hwcnt.backend.wait);
-	INIT_WORK(&kbdev->hwcnt.backend.cache_clean_work,
-						kbasep_cache_clean_worker);
 
 #if MALI_USE_CSF
 	tasklet_init(&kbdev->hwcnt.backend.csf_hwc_irq_poll_tasklet,
@@ -490,15 +444,17 @@ int kbase_instr_backend_init(struct kbase_device *kbdev)
 
 	kbdev->hwcnt.backend.triggered = 0;
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	kbdev->hwcnt.backend.use_secondary_override = false;
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+/* Use the build time option for the override default. */
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+	/* Default to primary */
+	kbdev->hwcnt.backend.override_counter_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
 #endif
-
-	kbdev->hwcnt.backend.cache_clean_wq =
-			alloc_workqueue("Mali cache cleaning workqueue", 0, 1);
-	if (NULL == kbdev->hwcnt.backend.cache_clean_wq)
-		ret = -EINVAL;
-
 	return ret;
 }
 
@@ -507,14 +463,20 @@ void kbase_instr_backend_term(struct kbase_device *kbdev)
 #if MALI_USE_CSF
 	tasklet_kill(&kbdev->hwcnt.backend.csf_hwc_irq_poll_tasklet);
 #endif
-	destroy_workqueue(kbdev->hwcnt.backend.cache_clean_wq);
 }
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 void kbase_instr_backend_debugfs_init(struct kbase_device *kbdev)
 {
-	debugfs_create_bool("hwcnt_use_secondary", S_IRUGO | S_IWUSR,
-		kbdev->mali_debugfs_directory,
-		&kbdev->hwcnt.backend.use_secondary_override);
+	/* No validation is done on the debugfs input. Invalid input could cause
+	 * performance counter errors. This is acceptable since this is a debug
+	 * only feature and users should know what they are doing.
+	 *
+	 * Valid inputs are the values accepted bythe SET_SELECT bits of the
+	 * PRFCNT_CONFIG register as defined in the architecture specification.
+	*/
+	debugfs_create_u8("hwcnt_set_select", S_IRUGO | S_IWUSR,
+			  kbdev->mali_debugfs_directory,
+			  (u8 *)&kbdev->hwcnt.backend.override_counter_set);
 }
 #endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
index 9f785ce..e4b6280 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_defs.h
@@ -27,6 +27,8 @@
 #ifndef _KBASE_INSTR_DEFS_H_
 #define _KBASE_INSTR_DEFS_H_
 
+#include "../../mali_kbase_hwcnt_gpu.h"
+
 /*
  * Instrumentation State Machine States
  */
@@ -37,8 +39,6 @@ enum kbase_instr_state {
 	KBASE_INSTR_STATE_IDLE,
 	/* Hardware is currently dumping a frame. */
 	KBASE_INSTR_STATE_DUMPING,
-	/* We've requested a clean to occur on a workqueue */
-	KBASE_INSTR_STATE_REQUEST_CLEAN,
 	/* An error has occured during DUMPING (page fault). */
 	KBASE_INSTR_STATE_FAULT
 };
@@ -47,17 +47,14 @@ enum kbase_instr_state {
 struct kbase_instr_backend {
 	wait_queue_head_t wait;
 	int triggered;
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
-	bool use_secondary_override;
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
+	enum kbase_hwcnt_physical_set override_counter_set;
 #endif
 
 	enum kbase_instr_state state;
-	struct workqueue_struct *cache_clean_wq;
-	struct work_struct  cache_clean_work;
 #if MALI_USE_CSF
 	struct tasklet_struct csf_hwc_irq_poll_tasklet;
 #endif
 };
 
 #endif /* _KBASE_INSTR_DEFS_H_ */
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
index 9cccf22..7d6c814 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
  *
  */
 
-
 /*
  * Register-based HW access backend specific definitions
  */
@@ -78,9 +77,8 @@ struct slot_rb {
  * The hwaccess_lock (a spinlock) must be held when accessing this structure
  */
 struct kbase_backend_data {
-	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
-
 #if !MALI_USE_CSF
+	struct slot_rb slot_rb[BASE_JM_MAX_NR_SLOTS];
 	struct hrtimer scheduling_timer;
 
 	bool timer_running;
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index c43bf64..bb11d68 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -156,15 +156,23 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 #endif /* CONFIG_MALI_DEBUG */
 	init_waitqueue_head(&kbdev->pm.backend.gpu_in_desired_state_wait);
 
+#if !MALI_USE_CSF
 	/* Initialise the metrics subsystem */
 	ret = kbasep_pm_metrics_init(kbdev);
 	if (ret)
 		return ret;
+#else
+	/* Due to dependency on kbase_ipa_control, the metrics subsystem can't
+	 * be initialized here.
+	 */
+	CSTD_UNUSED(ret);
+#endif
 
 	init_waitqueue_head(&kbdev->pm.backend.reset_done_wait);
 	kbdev->pm.backend.reset_done = false;
 
 	init_waitqueue_head(&kbdev->pm.zero_active_count_wait);
+	init_waitqueue_head(&kbdev->pm.resume_wait);
 	kbdev->pm.active_count = 0;
 
 	spin_lock_init(&kbdev->pm.backend.gpu_cycle_counter_requests_lock);
@@ -221,7 +229,9 @@ pm_state_machine_fail:
 	kbase_pm_policy_term(kbdev);
 	kbase_pm_ca_term(kbdev);
 workq_fail:
+#if !MALI_USE_CSF
 	kbasep_pm_metrics_term(kbdev);
+#endif
 	return -EINVAL;
 }
 
@@ -568,11 +578,24 @@ int kbase_hwaccess_pm_powerup(struct kbase_device *kbdev,
 		kbase_pm_unlock(kbdev);
 		return ret;
 	}
-
+#if MALI_USE_CSF
+	kbdev->pm.debug_core_mask =
+		kbdev->gpu_props.props.raw_props.shader_present;
+	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
+	/* Set the initial value for 'shaders_avail'. It would be later
+	 * modified only from the MCU state machine, when the shader core
+	 * allocation enable mask request has completed. So its value would
+	 * indicate the mask of cores that are currently being used by FW for
+	 * the allocation of endpoints requested by CSGs.
+	 */
+	kbdev->pm.backend.shaders_avail = kbase_pm_ca_get_core_mask(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
+#else
 	kbdev->pm.debug_core_mask_all = kbdev->pm.debug_core_mask[0] =
 			kbdev->pm.debug_core_mask[1] =
 			kbdev->pm.debug_core_mask[2] =
 			kbdev->gpu_props.props.raw_props.shader_present;
+#endif
 
 	/* Pretend the GPU is active to prevent a power policy turning the GPU
 	 * cores off */
@@ -645,8 +668,10 @@ void kbase_hwaccess_pm_term(struct kbase_device *kbdev)
 	kbase_pm_policy_term(kbdev);
 	kbase_pm_ca_term(kbdev);
 
+#if !MALI_USE_CSF
 	/* Shut down the metrics subsystem */
 	kbasep_pm_metrics_term(kbdev);
+#endif
 
 	destroy_workqueue(kbdev->pm.backend.gpu_poweroff_wait_wq);
 }
@@ -665,6 +690,17 @@ void kbase_pm_power_changed(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
+#if MALI_USE_CSF
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	kbdev->pm.debug_core_mask = new_core_mask;
+	kbase_pm_update_dynamic_cores_onoff(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask);
+#else
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 		u64 new_core_mask_js0, u64 new_core_mask_js1,
 		u64 new_core_mask_js2)
@@ -685,6 +721,7 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 
 	kbase_pm_update_dynamic_cores_onoff(kbdev);
 }
+#endif /* MALI_USE_CSF */
 
 void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev)
 {
@@ -735,6 +772,7 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	kbase_backend_timer_resume(kbdev);
 #endif /* !MALI_USE_CSF */
 
+	wake_up_all(&kbdev->pm.resume_wait);
 	kbase_pm_unlock(kbdev);
 }
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
index f518b40..02dfdb0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_ca.c
@@ -59,6 +59,14 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
+#if MALI_USE_CSF
+	if (!(core_mask & kbdev->pm.debug_core_mask)) {
+		dev_err(kbdev->dev,
+			"OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
+			core_mask, kbdev->pm.debug_core_mask);
+		goto unlock;
+	}
+#else
 	if (!(core_mask & kbdev->pm.debug_core_mask_all)) {
 		dev_err(kbdev->dev, "OPP core mask 0x%llX does not intersect with debug mask 0x%llX\n",
 				core_mask, kbdev->pm.debug_core_mask_all);
@@ -69,6 +77,7 @@ void kbase_devfreq_set_core_mask(struct kbase_device *kbdev, u64 core_mask)
 		dev_err(kbdev->dev, "Dynamic core scaling not supported as dummy job WA is enabled");
 		goto unlock;
 	}
+#endif /* MALI_USE_CSF */
 
 	pm_backend->ca_cores_enabled = core_mask;
 
@@ -80,21 +89,24 @@ unlock:
 	dev_dbg(kbdev->dev, "Devfreq policy : new core mask=%llX\n",
 			pm_backend->ca_cores_enabled);
 }
+KBASE_EXPORT_TEST_API(kbase_devfreq_set_core_mask);
 #endif
 
 u64 kbase_pm_ca_get_core_mask(struct kbase_device *kbdev)
 {
-#ifdef CONFIG_MALI_DEVFREQ
-	struct kbase_pm_backend_data *pm_backend = &kbdev->pm.backend;
+#if MALI_USE_CSF
+	u64 debug_core_mask = kbdev->pm.debug_core_mask;
+#else
+	u64 debug_core_mask = kbdev->pm.debug_core_mask_all;
 #endif
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 #ifdef CONFIG_MALI_DEVFREQ
-	return pm_backend->ca_cores_enabled & kbdev->pm.debug_core_mask_all;
+	return kbdev->pm.backend.ca_cores_enabled & debug_core_mask;
 #else
 	return kbdev->gpu_props.props.raw_props.shader_present &
-			kbdev->pm.debug_core_mask_all;
+	       debug_core_mask;
 #endif
 }
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
index 3e19459..d7d8f75 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_defs.h
@@ -99,6 +99,8 @@ enum kbase_l2_core_state {
  * @KBASE_MCU_ON_HWCNT_ENABLE: The Global requests have completed and MCU is
  *                             now ready for use and hwcnt is being enabled.
  * @KBASE_MCU_ON:             The MCU is active and hwcnt has been enabled.
+ * @KBASE_MCU_ON_CORE_MASK_UPDATE_PEND: The MCU is active and mask of enabled
+ *                                      shader cores is being updated.
  * @KBASE_MCU_ON_HWCNT_DISABLE: The MCU is on and hwcnt is being disabled.
  * @KBASE_MCU_ON_HALT:        The MCU is on and hwcnt has been disabled,
  *                            MCU halt would be triggered.
@@ -178,8 +180,10 @@ enum kbase_shader_core_state {
 struct kbasep_pm_metrics {
 	u32 time_busy;
 	u32 time_idle;
+#if !MALI_USE_CSF
 	u32 busy_cl[2];
 	u32 busy_gl;
+#endif
 };
 
 /**
@@ -205,9 +209,14 @@ struct kbasep_pm_metrics {
  */
 struct kbasep_pm_metrics_state {
 	ktime_t time_period_start;
+#if MALI_USE_CSF
+	/* Handle returned on registering DVFS as a kbase_ipa_control client */
+	void *ipa_control_client;
+#else
 	bool gpu_active;
 	u32 active_cl_ctx[2];
 	u32 active_gl_ctx[3];
+#endif
 	spinlock_t lock;
 
 	void *platform_data;
@@ -335,10 +344,10 @@ union kbase_pm_policy_data {
  * @shaders_avail: This is updated by the state machine when it is in a state
  *                 where it can write to the SHADER_PWRON or PWROFF registers
  *                 to have the same set of available cores as specified by
- *                 @shaders_desired_mask. So it would eventually have the same
- *                 value as @shaders_desired_mask and would precisely indicate
- *                 the cores that are currently available. This is internal to
- *                 shader state machine and should *not* be modified elsewhere.
+ *                 @shaders_desired_mask. So would precisely indicate the cores
+ *                 that are currently available. This is internal to shader
+ *                 state machine of JM GPUs and should *not* be modified
+ *                 elsewhere.
  * @shaders_desired_mask: This is updated by the state machine when it is in
  *                        a state where it can handle changes to the core
  *                        availability (either by DVFS or sysfs). This is
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index e9e30eb..420d5c5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -47,6 +47,9 @@
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
+#if MALI_USE_CSF
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#endif
 
 #include <linux/of.h>
 
@@ -561,6 +564,16 @@ static const char *kbase_mcu_state_to_string(enum kbase_mcu_state state)
 		return strings[state];
 }
 
+static inline void kbase_pm_mcu_update_core_mask(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+	WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
+
+	kbase_csf_firmware_update_core_mask(kbdev,
+			kbdev->pm.backend.shaders_desired_mask);
+	kbdev->pm.backend.mcu_state = KBASE_MCU_ON_CORE_MASK_UPDATE_PEND;
+}
+
 static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
@@ -591,15 +604,21 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 
 		case KBASE_MCU_PEND_ON_RELOAD:
 			if (kbdev->csf.firmware_reloaded) {
-				kbase_csf_firmware_global_reinit(kbdev);
+				backend->shaders_desired_mask =
+					kbase_pm_ca_get_core_mask(kbdev);
+				kbase_csf_firmware_global_reinit(kbdev,
+						backend->shaders_desired_mask);
 				backend->mcu_state =
 					KBASE_MCU_ON_GLB_REINIT_PEND;
 			}
 			break;
 
 		case KBASE_MCU_ON_GLB_REINIT_PEND:
-			if (kbase_csf_firmware_global_reinit_complete(kbdev))
+			if (kbase_csf_firmware_global_reinit_complete(kbdev)) {
+				backend->shaders_avail =
+					backend->shaders_desired_mask;
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_ENABLE;
+			}
 			break;
 
 		case KBASE_MCU_ON_HWCNT_ENABLE:
@@ -615,11 +634,22 @@ static int kbase_pm_mcu_update_state(struct kbase_device *kbdev)
 		case KBASE_MCU_ON:
 			if (!kbase_pm_is_mcu_desired(kbdev))
 				backend->mcu_state = KBASE_MCU_ON_HWCNT_DISABLE;
+			else {
+				backend->shaders_desired_mask =
+					kbase_pm_ca_get_core_mask(kbdev);
+				if (unlikely(backend->shaders_avail !=
+					     backend->shaders_desired_mask))
+					kbase_pm_mcu_update_core_mask(kbdev);
+			}
 			break;
 
-		/* ToDo. Add new state(s) if shader cores mask change for DVFS
-		 * has to be accommodated in the MCU state machine.
-		 */
+		case KBASE_MCU_ON_CORE_MASK_UPDATE_PEND:
+			if (kbase_csf_firmware_core_mask_updated(kbdev)) {
+				backend->shaders_avail =
+					backend->shaders_desired_mask;
+				backend->mcu_state = KBASE_MCU_ON;
+			}
+			break;
 
 		case KBASE_MCU_ON_HWCNT_DISABLE:
 			if (kbase_pm_is_mcu_desired(kbdev)) {
@@ -699,7 +729,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
 	u64 l2_present = kbdev->gpu_props.props.raw_props.l2_present;
+#if !MALI_USE_CSF
 	u64 tiler_present = kbdev->gpu_props.props.raw_props.tiler_present;
+#endif
 	enum kbase_l2_core_state prev_state;
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
@@ -710,10 +742,13 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				KBASE_PM_CORE_L2);
 		u64 l2_ready = kbase_pm_get_ready_cores(kbdev,
 				KBASE_PM_CORE_L2);
+
+#if !MALI_USE_CSF
 		u64 tiler_trans = kbase_pm_get_trans_cores(kbdev,
 				KBASE_PM_CORE_TILER);
 		u64 tiler_ready = kbase_pm_get_ready_cores(kbdev,
 				KBASE_PM_CORE_TILER);
+#endif
 
 		/*
 		 * kbase_pm_get_ready_cores and kbase_pm_get_trans_cores
@@ -736,8 +771,9 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 		 * between the register reads
 		 */
 		l2_trans &= ~l2_ready;
+#if !MALI_USE_CSF
 		tiler_trans &= ~tiler_ready;
-
+#endif
 		prev_state = backend->l2_state;
 
 		switch (backend->l2_state) {
@@ -748,7 +784,7 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				 * powering it on
 				 */
 				kbase_pm_l2_config_override(kbdev);
-
+#if !MALI_USE_CSF
 				/* L2 is required, power on.  Powering on the
 				 * tiler will also power the first L2 cache.
 				 */
@@ -762,14 +798,30 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 					kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
 							l2_present & ~1,
 							ACTION_PWRON);
+#else
+				/* With CSF firmware, Host driver doesn't need to
+				 * handle power management with both shader and tiler cores.
+				 * The CSF firmware will power up the cores appropriately.
+				 * So only power the l2 cache explicitly.
+				 */
+				kbase_pm_invoke(kbdev, KBASE_PM_CORE_L2,
+						l2_present, ACTION_PWRON);
+#endif
 				backend->l2_state = KBASE_L2_PEND_ON;
 			}
 			break;
 
 		case KBASE_L2_PEND_ON:
+#if !MALI_USE_CSF
 			if (!l2_trans && l2_ready == l2_present && !tiler_trans
 					&& tiler_ready == tiler_present) {
-				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, tiler_ready);
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL,
+						tiler_ready);
+#else
+			if (!l2_trans && l2_ready == l2_present) {
+				KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL,
+						l2_ready);
+#endif
 				/*
 				 * Ensure snoops are enabled after L2 is powered
 				 * up. Note that kbase keeps track of the snoop
@@ -948,9 +1000,11 @@ static int kbase_pm_l2_update_state(struct kbase_device *kbdev)
 				 */
 				kbase_gpu_start_cache_clean_nolock(
 						kbdev);
-
+#if !MALI_USE_CSF
 			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_TILER, NULL, 0u);
-
+#else
+			KBASE_KTRACE_ADD(kbdev, PM_CORES_CHANGE_AVAILABLE_L2, NULL, 0u);
+#endif
 			backend->l2_state = KBASE_L2_PEND_OFF;
 			break;
 
@@ -1827,6 +1881,22 @@ void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
 
 KBASE_EXPORT_TEST_API(kbase_pm_disable_interrupts);
 
+#if MALI_USE_CSF
+static void update_user_reg_page_mapping(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->pm.lock);
+
+	if (kbdev->csf.mali_file_inode) {
+		/* This would zap the pte corresponding to the mapping of User
+		 * register page for all the Kbase contexts.
+		 */
+		unmap_mapping_range(kbdev->csf.mali_file_inode->i_mapping,
+				    BASEP_MEM_CSF_USER_REG_PAGE_HANDLE,
+				    PAGE_SIZE, 1);
+	}
+}
+#endif
+
 /*
  * pmu layout:
  * 0x0000: PMU TAG (RO) (0xCAFECAFE)
@@ -1876,6 +1946,11 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	kbdev->pm.backend.gpu_powered = true;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
+#if MALI_USE_CSF
+	/* GPU has been turned on, can switch to actual register page */
+	update_user_reg_page_mapping(kbdev);
+#endif
+
 	if (reset_required) {
 		/* GPU state was lost, reset GPU to ensure it is in a
 		 * consistent state */
@@ -1918,6 +1993,17 @@ void kbase_pm_clock_on(struct kbase_device *kbdev, bool is_resume)
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbdev->pm.backend.gpu_ready = true;
 	kbdev->pm.backend.l2_desired = true;
+#if MALI_USE_CSF
+	if (reset_required) {
+		/* GPU reset was done after the power on, so send the post
+		 * reset event instead. This is okay as GPU power off event
+		 * is same as pre GPU reset event.
+		 */
+		kbase_ipa_control_handle_gpu_reset_post(kbdev);
+	} else {
+		kbase_ipa_control_handle_gpu_power_on(kbdev);
+	}
+#endif
 	kbase_pm_update_state(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
@@ -1958,6 +2044,9 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 	}
 
 	kbase_pm_cache_snoop_disable(kbdev);
+#if MALI_USE_CSF
+	kbase_ipa_control_handle_gpu_power_off(kbdev);
+#endif
 
 	kbdev->pm.backend.gpu_ready = false;
 
@@ -1974,6 +2063,12 @@ bool kbase_pm_clock_off(struct kbase_device *kbdev)
 #endif
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF
+	/* GPU is about to be turned off, switch to dummy page */
+	update_user_reg_page_mapping(kbdev);
+#endif
+
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_GPU_IDLE_EVENT);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index e968ce8..9f02dd4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -551,10 +551,15 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
  * Return:         Returns 0 on failure and non zero on success.
  */
 
+#if MALI_USE_CSF
+int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation);
+#else
 int kbase_platform_dvfs_event(struct kbase_device *kbdev, u32 utilisation,
-	u32 util_gl_share, u32 util_cl_share[2]);
+			      u32 util_gl_share, u32 util_cl_share[2]);
 #endif
 
+#endif /* CONFIG_MALI_MIDGARD_DVFS */
+
 void kbase_pm_power_changed(struct kbase_device *kbdev);
 
 /**
@@ -708,6 +713,19 @@ extern bool corestack_driver_control;
  */
 bool kbase_pm_is_l2_desired(struct kbase_device *kbdev);
 
+#if MALI_USE_CSF
+/**
+ * kbase_pm_is_mcu_desired - Check whether MCU is desired
+ *
+ * @kbdev: Device pointer
+ *
+ * This shall be called to check whether MCU needs to be enabled.
+ *
+ * Return: true if MCU needs to be enabled.
+ */
+bool kbase_pm_is_mcu_desired(struct kbase_device *kbdev);
+#endif
+
 /**
  * kbase_pm_lock - Lock all necessary mutexes to perform PM actions
  *
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
index e163bd4..bc3e6b1 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_mcu_states.h
@@ -31,6 +31,7 @@ KBASEP_MCU_STATE(PEND_ON_RELOAD)
 KBASEP_MCU_STATE(ON_GLB_REINIT_PEND)
 KBASEP_MCU_STATE(ON_HWCNT_ENABLE)
 KBASEP_MCU_STATE(ON)
+KBASEP_MCU_STATE(ON_CORE_MASK_UPDATE_PEND)
 KBASEP_MCU_STATE(ON_HWCNT_DISABLE)
 KBASEP_MCU_STATE(ON_HALT)
 KBASEP_MCU_STATE(ON_PEND_HALT)
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
index 8477063..dc07412 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_metrics.c
@@ -29,24 +29,30 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
-#if !MALI_USE_CSF
+
+#if MALI_USE_CSF
+#include "mali_kbase_clk_rate_trace_mgr.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#else
 #include <backend/gpu/mali_kbase_jm_rb.h>
 #endif /* !MALI_USE_CSF */
+
 #include <backend/gpu/mali_kbase_pm_defs.h>
 #include <mali_linux_trace.h>
 
-/* When VSync is being hit aim for utilisation between 70-90% */
-#define KBASE_PM_VSYNC_MIN_UTILISATION          70
-#define KBASE_PM_VSYNC_MAX_UTILISATION          90
-/* Otherwise aim for 10-40% */
-#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
-#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
-
 /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
  * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
  * under 11s. Exceeding this will cause overflow */
 #define KBASE_PM_TIME_SHIFT			8
 
+#if MALI_USE_CSF
+/* CSHW counter block offsets */
+#define GPU_ACTIVE (4)
+
+/* To get the GPU_ACTIVE value in nano seconds unit */
+#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
+#endif
+
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 {
@@ -73,11 +79,44 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 
 int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 {
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
+#if MALI_USE_CSF
+	struct kbase_ipa_control_perf_counter perf_counter;
+	int err;
 
+	/* One counter group */
+	const size_t NUM_PERF_COUNTERS = 1;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	kbdev->pm.backend.metrics.kbdev = kbdev;
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+
+	perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR;
+
+	/* Normalize values by GPU frequency */
+	perf_counter.gpu_norm = true;
 
+	/* We need the GPU_ACTIVE counter, which is in the CSHW group */
+	perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW;
+
+	/* We need the GPU_ACTIVE counter */
+	perf_counter.idx = GPU_ACTIVE;
+
+	err = kbase_ipa_control_register(
+		kbdev, &perf_counter, NUM_PERF_COUNTERS,
+		&kbdev->pm.backend.metrics.ipa_control_client);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register IPA with kbase_ipa_control: err=%d",
+			err);
+		return -1;
+	}
+#else
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.metrics.kbdev = kbdev;
 	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+
 	kbdev->pm.backend.metrics.gpu_active = false;
 	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
@@ -91,6 +130,7 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
 	kbdev->pm.backend.metrics.values.busy_gl = 0;
 
+#endif
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);
 
 #ifdef CONFIG_MALI_MIDGARD_DVFS
@@ -118,6 +158,11 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev)
 
 	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+#if MALI_USE_CSF
+	kbase_ipa_control_unregister(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client);
+#endif
 }
 
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
@@ -125,8 +170,52 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
 /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
  * function
  */
+#if MALI_USE_CSF
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
 static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
-								ktime_t now)
+					       ktime_t now)
+{
+	ktime_t diff;
+	int err;
+	u64 gpu_active_counter;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start);
+	if (ktime_to_ns(diff) < 0)
+		return;
+
+	/* TODO: The final parameter to this function is used to obtain the amount of
+	 * protected-mode time the GPU has spent. This is the subject of a future
+	 * ticket, and is not yet considered / implemented.
+	 */
+	err = kbase_ipa_control_query(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client,
+		&gpu_active_counter, 1, NULL);
+
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to query the increment of GPU_ACTIVE counter: err=%d",
+			err);
+	} else {
+		u32 ns_time = (u32) (ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
+
+		gpu_active_counter >>= KBASE_PM_TIME_SHIFT;
+
+		WARN_ON_ONCE(gpu_active_counter > ns_time);
+
+		kbdev->pm.backend.metrics.values.time_busy +=
+			gpu_active_counter;
+		kbdev->pm.backend.metrics.values.time_idle +=
+			ns_time - gpu_active_counter;
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+#else
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
+					       ktime_t now)
 {
 	ktime_t diff;
 
@@ -157,6 +246,7 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
 
 	kbdev->pm.backend.metrics.time_period_start = now;
 }
+#endif  /* MALI_USE_CSF */
 
 #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
 void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
@@ -172,9 +262,12 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
 	memset(diff, 0, sizeof(*diff));
 	diff->time_busy = cur->time_busy - last->time_busy;
 	diff->time_idle = cur->time_idle - last->time_idle;
+
+#if !MALI_USE_CSF
 	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
 	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
 	diff->busy_gl = cur->busy_gl - last->busy_gl;
+#endif
 
 	*last = *cur;
 
@@ -186,10 +279,13 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 {
-	int utilisation, util_gl_share;
-	int util_cl_share[2];
+	int utilisation;
 	int busy;
 	struct kbasep_pm_metrics *diff;
+#if !MALI_USE_CSF
+	int util_gl_share;
+	int util_cl_share[2];
+#endif
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
@@ -201,11 +297,16 @@ void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 			max(diff->time_busy + diff->time_idle, 1u);
 
 	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+
+#if !MALI_USE_CSF
 	util_gl_share = (100 * diff->busy_gl) / busy;
 	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
 	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
 
 	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+#else
+	kbase_platform_dvfs_event(kbdev, utilisation);
+#endif
 }
 
 bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
@@ -296,7 +397,6 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 		}
 	}
 }
-#endif /* !MALI_USE_CSF */
 
 /* called when job is submitted to or removed from a GPU slot */
 void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
@@ -313,12 +413,12 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
 		timestamp = &now;
 	}
 
-	/* Track how long CL and/or GL jobs have been busy for */
+	/* Track how much of time has been spent busy or idle. For JM GPUs, this also
+	 * evaluates how long CL and/or GL jobs have been busy for
+	 */
 	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
 
-#if !MALI_USE_CSF
 	kbase_pm_metrics_active_calc(kbdev);
-#endif /* !MALI_USE_CSF */
-
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 }
+#endif /* !MALI_USE_CSF */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index 9bc7ded..426e8a4 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -126,18 +126,20 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 	lockdep_assert_held(&kbdev->pm.lock);
 
-#if MALI_USE_CSF
-	/* On CSF GPUs, Host driver isn't supposed to do the power management
-	 * for shader cores. CSF firmware will power up the cores appropriately
-	 * and so from Driver's standpoint 'shaders_desired' flag shall always
-	 * remain 0.
-	 */
-	return;
-#endif
 	if (kbdev->pm.backend.pm_current_policy == NULL)
 		return;
 	if (kbdev->pm.backend.poweroff_wait_in_progress)
 		return;
+
+#if MALI_USE_CSF
+	CSTD_UNUSED(shaders_desired);
+	/* Invoke the MCU state machine to send a request to FW for updating
+	 * the mask of shader cores that can be used for allocation of
+	 * endpoints requested by CSGs.
+	 */
+	if (kbase_pm_is_mcu_desired(kbdev))
+		kbase_pm_update_state(kbdev);
+#else
 	/* In protected transition, don't allow outside shader core request
 	 * affect transition, return directly
 	 */
@@ -149,6 +151,7 @@ void kbase_pm_update_dynamic_cores_onoff(struct kbase_device *kbdev)
 	if (shaders_desired && kbase_pm_is_l2_desired(kbdev)) {
 		kbase_pm_update_state(kbdev);
 	}
+#endif
 }
 
 void kbase_pm_update_cores_state_nolock(struct kbase_device *kbdev)
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index b4f4262..b8d8310 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -135,8 +135,11 @@ bob_kernel_module {
     cinstr_secondary_hwc: {
         kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY=y"],
     },
-    cinstr_secondary_hwc_via_debug_fs: {
-        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS=y"],
+    cinstr_tertiary_hwc: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_TERTIARY=y"],
+    },
+    cinstr_hwc_set_select_via_debug_fs: {
+        kbuild_options: ["CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS=y"],
     },
     mali_2mb_alloc: {
         kbuild_options: ["CONFIG_MALI_2MB_ALLOC=y"],
@@ -158,6 +161,8 @@ bob_kernel_module {
             "jm/*.h",
             "tl/backend/*_jm.c",
             "mmu/backend/*_jm.c",
+            "ipa/backend/*_jm.c",
+            "ipa/backend/*_jm.h",
         ],
     },
     gpu_has_csf: {
@@ -166,6 +171,9 @@ bob_kernel_module {
             "csf/*.c",
             "csf/*.h",
             "csf/Kbuild",
+            "csf/ipa_control/*.c",
+            "csf/ipa_control/*.h",
+            "csf/ipa_control/Kbuild",
             "debug/backend/*_csf.c",
             "debug/backend/*_csf.h",
             "device/backend/*_csf.c",
@@ -173,6 +181,8 @@ bob_kernel_module {
             "gpu/backend/*_csf.h",
             "tl/backend/*_csf.c",
             "mmu/backend/*_csf.c",
+            "ipa/backend/*_csf.c",
+            "ipa/backend/*_csf.h",
         ],
     },
     mali_arbiter_support: {
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 7c68eb2..6476921 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -28,18 +28,16 @@
 #include <context/mali_kbase_context_internal.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
 #include <mali_kbase.h>
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_dma_fence.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
-#include <tl/mali_kbase_timeline.h>
-#include <tl/mali_kbase_tracepoints.h>
 
 #ifdef CONFIG_DEBUG_FS
 #include <csf/mali_kbase_csf_csg_debugfs.h>
 #include <csf/mali_kbase_csf_kcpu_debugfs.h>
 #include <csf/mali_kbase_csf_tiler_heap_debugfs.h>
+#include <csf/mali_kbase_csf_cpu_queue_debugfs.h>
 #include <mali_kbase_debug_mem_view.h>
 #include <mali_kbase_mem_pool_debugfs.h>
 
@@ -51,6 +49,7 @@ void kbase_context_debugfs_init(struct kbase_context *const kctx)
 	kbase_csf_queue_group_debugfs_init(kctx);
 	kbase_csf_kcpu_debugfs_init(kctx);
 	kbase_csf_tiler_heap_debugfs_init(kctx);
+	kbase_csf_cpu_queue_debugfs_init(kctx);
 }
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_init);
 
@@ -162,11 +161,19 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;
 
-	/* Ensure the core is powered up for the destroy process
-	 * A suspend won't happen here, because we're in a syscall
-	 * from a userspace thread.
+	/* Context termination could happen whilst the system suspend of
+	 * the GPU device is ongoing or has completed. It has been seen on
+	 * Customer side for JM GPUs that a hang could occur if context
+	 * termination is not blocked until the resume of GPU device.
+	 * Similar issue can potentially occur on CSF GPUs also.
 	 */
-	kbase_pm_context_active(kbdev);
+	while (kbase_pm_context_active_handle_suspend(
+		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_info(kbdev->dev,
+			 "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait,
+			   !kbase_pm_is_suspending(kbdev));
+	}
 
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 0eb4258..a8cefb6 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -212,11 +212,18 @@ void kbase_destroy_context(struct kbase_context *kctx)
 	if (WARN_ON(!kbdev))
 		return;
 
-	/* Ensure the core is powered up for the destroy process
-	 * A suspend won't happen here, because we're in a syscall
-	 * from a userspace thread.
+	/* Context termination could happen whilst the system suspend of
+	 * the GPU device is ongoing or has completed. It has been seen on
+	 * Customer side that a hang could occur if context termination is
+	 * not blocked until the resume of GPU device.
 	 */
-	kbase_pm_context_active(kbdev);
+	while (kbase_pm_context_active_handle_suspend(
+		kbdev, KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
+		dev_info(kbdev->dev,
+			 "Suspend in progress when destroying context");
+		wait_event(kbdev->pm.resume_wait,
+			   !kbase_pm_is_suspending(kbdev));
+	}
 
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 83182f9..5de4c6b 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -28,7 +28,6 @@
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
 #include <mali_kbase_mem_linux.h>
-#include <mali_kbase_dma_fence.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <tl/mali_kbase_tracepoints.h>
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index 91063c8..d55084a 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -33,8 +33,11 @@ mali_kbase-y += \
 	csf/mali_kbase_csf_csg_debugfs.o \
 	csf/mali_kbase_csf_kcpu_debugfs.o \
 	csf/mali_kbase_csf_protected_memory.o \
-	csf/mali_kbase_csf_tiler_heap_debugfs.o
+	csf/mali_kbase_csf_tiler_heap_debugfs.o \
+	csf/mali_kbase_csf_cpu_queue_debugfs.o
 
 mali_kbase-$(CONFIG_MALI_REAL_HW) += csf/mali_kbase_csf_firmware.o
 
 mali_kbase-$(CONFIG_MALI_NO_MALI) += csf/mali_kbase_csf_firmware_no_mali.o
+
+include $(src)/csf/ipa_control/Kbuild
+\ No newline at end of file
diff --git a/mali_kbase/csf/ipa_control/Kbuild b/mali_kbase/csf/ipa_control/Kbuild
new file mode 100644
index 0000000..5d00d1c
--- /dev/null
+++ b/mali_kbase/csf/ipa_control/Kbuild
@@ -0,0 +1,23 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+mali_kbase-y += \
+	csf/ipa_control/mali_kbase_csf_ipa_control.o
+\ No newline at end of file
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
new file mode 100644
index 0000000..66ac377
--- /dev/null
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -0,0 +1,834 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+#include <mali_kbase.h>
+#include "mali_kbase_clk_rate_trace_mgr.h"
+#include "mali_kbase_csf_ipa_control.h"
+
+/*
+ * Status flags from the STATUS register of the IPA Control interface.
+ */
+#define STATUS_COMMAND_ACTIVE ((u32)1 << 0)
+#define STATUS_TIMER_ACTIVE ((u32)1 << 1)
+#define STATUS_AUTO_ACTIVE ((u32)1 << 2)
+#define STATUS_PROTECTED_MODE ((u32)1 << 8)
+#define STATUS_RESET ((u32)1 << 9)
+#define STATUS_TIMER_ENABLED ((u32)1 << 31)
+
+/*
+ * Commands for the COMMAND register of the IPA Control interface.
+ */
+#define COMMAND_NOP ((u32)0)
+#define COMMAND_APPLY ((u32)1)
+#define COMMAND_CLEAR ((u32)2)
+#define COMMAND_SAMPLE ((u32)3)
+#define COMMAND_PROTECTED_ACK ((u32)4)
+#define COMMAND_RESET_ACK ((u32)5)
+
+/**
+ * Default value for the TIMER register of the IPA Control interface,
+ * expressed as number of clock cycles.
+ */
+#define TIMER_DEFAULT_VALUE_CLK_CYCLES ((u32)1000)
+
+/**
+ * Maximum number of loops polling the GPU before we assume the GPU has hung.
+ */
+#define IPA_INACTIVE_MAX_LOOPS ((unsigned int)100000000)
+
+/**
+ * Number of bits used to configure a performance counter in SELECT registers.
+ */
+#define IPA_CONTROL_SELECT_BITS_PER_CNT ((u64)8)
+
+/**
+ * Maximum value of a performance counter.
+ */
+#define MAX_PRFCNT_VALUE (((u64)1 << 48) - 1)
+
+/**
+ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency
+ *                                          listener
+ *
+ * @listener: GPU clock frequency listener.
+ * @kbdev:    Pointer to kbase device.
+ */
+struct kbase_ipa_control_listener_data {
+	struct kbase_clk_rate_listener listener;
+	struct kbase_device *kbdev;
+};
+
+static int wait_status(struct kbase_device *kbdev, u32 flags)
+{
+	unsigned int max_loops = IPA_INACTIVE_MAX_LOOPS;
+	u32 status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+
+	/*
+	 * Wait for the STATUS register to indicate that flags have been cleared,
+	 * in case a transition is pending.
+	 */
+	while (--max_loops && (status & flags))
+		status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	if (max_loops == 0) {
+		dev_err(kbdev->dev, "IPA_CONTROL STATUS register stuck");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int apply_select_config(struct kbase_device *kbdev, u64 *select)
+{
+	int ret;
+
+	u32 select_cshw_lo = (u32)(select[KBASE_IPA_CORE_TYPE_CSHW] & U32_MAX);
+	u32 select_cshw_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_CSHW] >> 32) & U32_MAX);
+	u32 select_memsys_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_MEMSYS] & U32_MAX);
+	u32 select_memsys_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_MEMSYS] >> 32) & U32_MAX);
+	u32 select_tiler_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_TILER] & U32_MAX);
+	u32 select_tiler_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_TILER] >> 32) & U32_MAX);
+	u32 select_shader_lo =
+		(u32)(select[KBASE_IPA_CORE_TYPE_SHADER] & U32_MAX);
+	u32 select_shader_hi =
+		(u32)((select[KBASE_IPA_CORE_TYPE_SHADER] >> 32) & U32_MAX);
+
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_LO), select_cshw_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_CSHW_HI), select_cshw_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_LO),
+			select_memsys_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_MEMSYS_HI),
+			select_memsys_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_LO),
+			select_tiler_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_TILER_HI),
+			select_tiler_hi);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_LO),
+			select_shader_lo);
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(SELECT_SHADER_HI),
+			select_shader_hi);
+
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+
+	if (!ret)
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY);
+
+	return ret;
+}
+
+static u64 read_value_cnt(struct kbase_device *kbdev, u8 type, int select_idx)
+{
+	u32 value_lo, value_hi;
+
+	switch (type) {
+	case KBASE_IPA_CORE_TYPE_CSHW:
+		value_lo = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_CSHW_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_MEMSYS:
+		value_lo = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_TILER:
+		value_lo = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev, IPA_CONTROL_REG(VALUE_TILER_REG_HI(select_idx)));
+		break;
+	case KBASE_IPA_CORE_TYPE_SHADER:
+		value_lo = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_SHADER_REG_LO(select_idx)));
+		value_hi = kbase_reg_read(
+			kbdev,
+			IPA_CONTROL_REG(VALUE_SHADER_REG_HI(select_idx)));
+		break;
+	default:
+		WARN(1, "Unknown core type: %u\n", type);
+		value_lo = value_hi = 0;
+		break;
+	}
+
+	return (((u64)value_hi << 32) | value_lo);
+}
+
+static void build_select_config(struct kbase_ipa_control *ipa_ctrl,
+				u64 *select_config)
+{
+	size_t i;
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) {
+		size_t j;
+
+		select_config[i] = 0ULL;
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			struct kbase_ipa_control_prfcnt_config *prfcnt_config =
+				&ipa_ctrl->blocks[i].select[j];
+
+			select_config[i] |=
+				((u64)prfcnt_config->idx
+				 << (IPA_CONTROL_SELECT_BITS_PER_CNT * j));
+		}
+	}
+}
+
+static inline void calc_prfcnt_delta(struct kbase_device *kbdev,
+				     struct kbase_ipa_control_prfcnt *prfcnt,
+				     bool gpu_ready)
+{
+	u64 delta_value, raw_value;
+
+	if (gpu_ready)
+		raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
+					   prfcnt->select_idx);
+	else
+		raw_value = prfcnt->latest_raw_value;
+
+	if (raw_value < prfcnt->latest_raw_value) {
+		delta_value = (MAX_PRFCNT_VALUE - prfcnt->latest_raw_value) +
+			      raw_value;
+	} else {
+		delta_value = raw_value - prfcnt->latest_raw_value;
+	}
+
+	delta_value *= prfcnt->scaling_factor;
+
+	if (!WARN_ON(kbdev->csf.ipa_control.cur_gpu_rate == 0))
+		if (prfcnt->gpu_norm)
+			delta_value /= kbdev->csf.ipa_control.cur_gpu_rate;
+
+	prfcnt->latest_raw_value = raw_value;
+
+	/* Accumulate the difference */
+	prfcnt->accumulated_diff += delta_value;
+}
+
+/**
+ * kbase_ipa_control_rate_change_notify - GPU frequency change callback
+ *
+ * @listener:     Clock frequency change listener.
+ * @clk_index:    Index of the clock for which the change has occurred.
+ * @clk_rate_hz:  Clock frequency(Hz).
+ *
+ * This callback notifies kbase_ipa_control about GPU frequency changes.
+ * Only top-level clock changes are meaningful. GPU frequency updates
+ * affect all performance counters which require GPU normalization
+ * in every session.
+ */
+static void
+kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
+				     u32 clk_index, u32 clk_rate_hz)
+{
+	if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) {
+		size_t i;
+		unsigned long flags;
+		struct kbase_ipa_control_listener_data *listener_data =
+			container_of(listener,
+				     struct kbase_ipa_control_listener_data,
+				     listener);
+		struct kbase_device *kbdev = listener_data->kbdev;
+		struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+		if (!kbdev->pm.backend.gpu_ready) {
+			dev_err(kbdev->dev,
+				"%s: GPU frequency cannot change while GPU is off",
+				__func__);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+			return;
+		}
+
+		/* Interrupts are already disabled and interrupt state is also saved */
+		spin_lock(&ipa_ctrl->lock);
+
+		for (i = 0; i < ipa_ctrl->num_active_sessions; i++) {
+			size_t j;
+			struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
+
+			for (j = 0; j < session->num_prfcnts; j++) {
+				struct kbase_ipa_control_prfcnt *prfcnt =
+					&session->prfcnts[j];
+
+				if (prfcnt->gpu_norm)
+					calc_prfcnt_delta(kbdev, prfcnt, true);
+			 }
+		}
+
+		ipa_ctrl->cur_gpu_rate = clk_rate_hz;
+		spin_unlock(&ipa_ctrl->lock);
+
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+}
+
+void kbase_ipa_control_init(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	struct kbase_ipa_control_listener_data *listener_data;
+	size_t i, j;
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++) {
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			ipa_ctrl->blocks[i].select[j].idx = 0;
+			ipa_ctrl->blocks[i].select[j].refcount = 0;
+		}
+		ipa_ctrl->blocks[i].num_available_counters =
+			KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS;
+	}
+
+	spin_lock_init(&ipa_ctrl->lock);
+	ipa_ctrl->num_active_sessions = 0;
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		ipa_ctrl->sessions[i].active = false;
+	}
+
+	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
+				GFP_KERNEL);
+	if (listener_data) {
+		listener_data->listener.notify =
+			kbase_ipa_control_rate_change_notify;
+		listener_data->kbdev = kbdev;
+		ipa_ctrl->rtm_listener_data = listener_data;
+	}
+
+	spin_lock(&clk_rtm->lock);
+	if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP])
+		ipa_ctrl->cur_gpu_rate =
+			clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]->clock_val;
+	if (listener_data)
+		kbase_clk_rate_trace_manager_subscribe_no_lock(
+			clk_rtm, &listener_data->listener);
+	spin_unlock(&clk_rtm->lock);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_init);
+
+void kbase_ipa_control_term(struct kbase_device *kbdev)
+{
+	unsigned long flags;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_ipa_control_listener_data *listener_data =
+		ipa_ctrl->rtm_listener_data;
+
+	WARN_ON(ipa_ctrl->num_active_sessions);
+
+	if (listener_data)
+		kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener);
+	kfree(ipa_ctrl->rtm_listener_data);
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	if (kbdev->pm.backend.gpu_powered)
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_term);
+
+int kbase_ipa_control_register(
+	struct kbase_device *kbdev,
+	const struct kbase_ipa_control_perf_counter *perf_counters,
+	size_t num_counters, void **client)
+{
+	int ret = 0;
+	size_t i, session_idx, req_counters[KBASE_IPA_CORE_TYPE_NUM];
+	bool already_configured[KBASE_IPA_CONTROL_MAX_COUNTERS];
+	bool new_config = false;
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session = NULL;
+	unsigned long flags;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(perf_counters == NULL) ||
+	    WARN_ON(client == NULL) ||
+	    WARN_ON(num_counters > KBASE_IPA_CONTROL_MAX_COUNTERS)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	kbase_pm_context_active(kbdev);
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	spin_lock_irqsave(&ipa_ctrl->lock, flags);
+
+	if (ipa_ctrl->num_active_sessions == KBASE_IPA_CONTROL_MAX_SESSIONS) {
+		dev_err(kbdev->dev, "%s: too many sessions", __func__);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++)
+		req_counters[i] = 0;
+
+	/*
+	 * Count how many counters would need to be configured in order to
+	 * satisfy the request. Requested counters which happen to be already
+	 * configured can be skipped.
+	 */
+	for (i = 0; i < num_counters; i++) {
+		size_t j;
+		enum kbase_ipa_core_type type = perf_counters[i].type;
+		u8 idx = perf_counters[i].idx;
+
+		if ((type >= KBASE_IPA_CORE_TYPE_NUM) ||
+		    (idx >= KBASE_IPA_CONTROL_CNT_MAX_IDX)) {
+			dev_err(kbdev->dev,
+				"%s: invalid requested type %u and/or index %u",
+				__func__, type, idx);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			struct kbase_ipa_control_prfcnt_config *prfcnt_config =
+				&ipa_ctrl->blocks[type].select[j];
+
+			if (prfcnt_config->refcount > 0) {
+				if (prfcnt_config->idx == idx) {
+					already_configured[i] = true;
+					break;
+				}
+			}
+		}
+
+		if (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS) {
+			already_configured[i] = false;
+			req_counters[type]++;
+			new_config = true;
+		}
+	}
+
+	for (i = 0; i < KBASE_IPA_CORE_TYPE_NUM; i++)
+		if (req_counters[i] >
+		    ipa_ctrl->blocks[i].num_available_counters) {
+			dev_err(kbdev->dev,
+				"%s: more counters (%zu) than available (%zu) have been requested for type %zu",
+				__func__, req_counters[i],
+				ipa_ctrl->blocks[i].num_available_counters, i);
+			ret = -EINVAL;
+			goto exit;
+		}
+
+	/*
+	 * The request has been validated.
+	 * Firstly, find an available session and then set up the initial state
+	 * of the session and update the configuration of performance counters
+	 * in the internal state of kbase_ipa_control.
+	 */
+	for (session_idx = 0; session_idx < KBASE_IPA_CONTROL_MAX_SESSIONS;
+	     session_idx++) {
+		session = &ipa_ctrl->sessions[session_idx];
+		if (!session->active)
+			break;
+	}
+
+	if (!session) {
+		dev_err(kbdev->dev, "%s: wrong or corrupt session state",
+			__func__);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+	for (i = 0; i < num_counters; i++) {
+		struct kbase_ipa_control_prfcnt_config *prfcnt_config;
+		size_t j;
+		u8 type = perf_counters[i].type;
+		u8 idx = perf_counters[i].idx;
+
+		for (j = 0; j < KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS; j++) {
+			prfcnt_config = &ipa_ctrl->blocks[type].select[j];
+
+			if (already_configured[i]) {
+				if ((prfcnt_config->refcount > 0) &&
+				    (prfcnt_config->idx == idx)) {
+					break;
+				}
+			} else {
+				if (prfcnt_config->refcount == 0)
+					break;
+			}
+		}
+
+		if (WARN_ON((prfcnt_config->refcount > 0 &&
+			     prfcnt_config->idx != idx) ||
+			    (j == KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS))) {
+			dev_err(kbdev->dev,
+				"%s: invalid internal state: counter already configured or no counter available to configure",
+				__func__);
+			ret = -EBUSY;
+			goto exit;
+		}
+
+		if (prfcnt_config->refcount == 0) {
+			prfcnt_config->idx = idx;
+			ipa_ctrl->blocks[type].num_available_counters--;
+		}
+
+		session->prfcnts[i].accumulated_diff = 0;
+		session->prfcnts[i].type = type;
+		session->prfcnts[i].select_idx = j;
+		session->prfcnts[i].scaling_factor =
+			perf_counters[i].scaling_factor;
+		session->prfcnts[i].gpu_norm = perf_counters[i].gpu_norm;
+
+		prfcnt_config->refcount++;
+	}
+
+	/*
+	 * Apply new configuration, if necessary.
+	 * As a temporary solution, make sure that the GPU is on
+	 * before applying the new configuration.
+	 */
+	if (new_config) {
+		u64 select_config[KBASE_IPA_CORE_TYPE_NUM];
+
+		build_select_config(ipa_ctrl, select_config);
+		ret = apply_select_config(kbdev, select_config);
+		if (ret)
+			dev_err(kbdev->dev,
+				"%s: failed to apply SELECT configuration",
+				__func__);
+	}
+
+	if (!ret) {
+		/* Accumulator registers don't contain any sample if the timer
+		 * has not been enabled first. Take a sample manually before
+		 * enabling the timer.
+		 */
+		if (ipa_ctrl->num_active_sessions == 0) {
+			kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND),
+					COMMAND_SAMPLE);
+			ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+			if (!ret) {
+				kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
+						TIMER_DEFAULT_VALUE_CLK_CYCLES);
+			} else {
+				dev_err(kbdev->dev,
+					"%s: failed to sample new counters",
+					__func__);
+			}
+		}
+	}
+
+	if (!ret) {
+		session->num_prfcnts = num_counters;
+		session->active = true;
+		ipa_ctrl->num_active_sessions++;
+		*client = session;
+
+		/*
+		 * Read current raw value to initialize the session.
+		 * This is necessary to put the first query in condition
+		 * to generate a correct value by calculating the difference
+		 * from the beginning of the session.
+		 */
+		for (i = 0; i < session->num_prfcnts; i++) {
+			struct kbase_ipa_control_prfcnt *prfcnt =
+				&session->prfcnts[i];
+			u64 raw_value = read_value_cnt(kbdev, (u8)prfcnt->type,
+						       prfcnt->select_idx);
+			prfcnt->latest_raw_value = raw_value;
+		}
+	}
+
+exit:
+	spin_unlock_irqrestore(&ipa_ctrl->lock, flags);
+	kbase_pm_context_idle(kbdev);
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_register);
+
+int kbase_ipa_control_unregister(struct kbase_device *kbdev, const void *client)
+{
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session;
+	int ret = 0;
+	size_t i;
+	unsigned long flags;
+	bool new_config = false, valid_session = false;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	kbase_pm_context_active(kbdev);
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	session = (struct kbase_ipa_control_session *)client;
+
+	spin_lock_irqsave(&ipa_ctrl->lock, flags);
+
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		if (session == &ipa_ctrl->sessions[i]) {
+			valid_session = true;
+			break;
+		}
+	}
+
+	if (!valid_session) {
+		dev_err(kbdev->dev, "%s: invalid session handle", __func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (ipa_ctrl->num_active_sessions == 0) {
+		dev_err(kbdev->dev, "%s: no active sessions found", __func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (!session->active) {
+		dev_err(kbdev->dev, "%s: session is already inactive",
+			__func__);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	for (i = 0; i < session->num_prfcnts; i++) {
+		struct kbase_ipa_control_prfcnt_config *prfcnt_config;
+		u8 type = session->prfcnts[i].type;
+		u8 idx = session->prfcnts[i].select_idx;
+
+		prfcnt_config = &ipa_ctrl->blocks[type].select[idx];
+
+		if (!WARN_ON(prfcnt_config->refcount == 0)) {
+			prfcnt_config->refcount--;
+			if (prfcnt_config->refcount == 0) {
+				new_config = true;
+				ipa_ctrl->blocks[type].num_available_counters++;
+			}
+		}
+	}
+
+	if (new_config) {
+		u64 select_config[KBASE_IPA_CORE_TYPE_NUM];
+
+		build_select_config(ipa_ctrl, select_config);
+		ret = apply_select_config(kbdev, select_config);
+		if (ret)
+			dev_err(kbdev->dev,
+				"%s: failed to apply SELECT configuration",
+				__func__);
+	}
+
+	session->num_prfcnts = 0;
+	session->active = false;
+	ipa_ctrl->num_active_sessions--;
+
+exit:
+	spin_unlock_irqrestore(&ipa_ctrl->lock, flags);
+	kbase_pm_context_idle(kbdev);
+	return ret;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_unregister);
+
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
+			    u64 *values, size_t num_values, u64 *protected_time)
+{
+	struct kbase_ipa_control *ipa_ctrl;
+	struct kbase_ipa_control_session *session;
+	size_t i;
+	unsigned long flags;
+	bool gpu_ready;
+
+	if (WARN_ON(kbdev == NULL) || WARN_ON(client == NULL) ||
+	    WARN_ON(values == NULL)) {
+		dev_err(kbdev->dev, "%s: wrong input arguments", __func__);
+		return -EINVAL;
+	}
+
+	ipa_ctrl = &kbdev->csf.ipa_control;
+	session = (struct kbase_ipa_control_session *)client;
+
+	if (WARN_ON(num_values < session->num_prfcnts)) {
+		dev_err(kbdev->dev,
+			"%s: not enough space (%zu) to return all counter values (%zu)",
+			__func__, num_values, session->num_prfcnts);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	gpu_ready = kbdev->pm.backend.gpu_ready;
+
+	for (i = 0; i < session->num_prfcnts; i++) {
+		struct kbase_ipa_control_prfcnt *prfcnt =
+			&session->prfcnts[i];
+
+		calc_prfcnt_delta(kbdev, prfcnt, gpu_ready);
+		/* Return all the accumulated difference */
+		values[i] = prfcnt->accumulated_diff;
+		prfcnt->accumulated_diff = 0;
+	}
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	for (i = session->num_prfcnts; i < num_values; i++)
+		values[i] = 0;
+
+	if (protected_time)
+		*protected_time = 0;
+
+	return 0;
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_query);
+
+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	size_t session_idx;
+	int ret;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should still be ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* First disable the automatic sampling through TIMER  */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), 0);
+	ret = wait_status(kbdev, STATUS_TIMER_ENABLED);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for disabling of IPA control timer failed: %d",
+			ret);
+	}
+
+	/* Now issue the manual SAMPLE command */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_SAMPLE);
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for the completion of manual sample failed: %d",
+			ret);
+	}
+
+	for (session_idx = 0; session_idx < ipa_ctrl->num_active_sessions;
+	     session_idx++) {
+		struct kbase_ipa_control_session *session =
+			&ipa_ctrl->sessions[session_idx];
+		size_t i;
+
+		for (i = 0; i < session->num_prfcnts; i++) {
+			struct kbase_ipa_control_prfcnt *prfcnt =
+				&session->prfcnts[i];
+
+			calc_prfcnt_delta(kbdev, prfcnt, true);
+		}
+	}
+
+	spin_unlock(&ipa_ctrl->lock);
+}
+
+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	int ret;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should have become ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* Re-issue the APPLY command, this is actually needed only for CSHW */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_APPLY);
+	ret = wait_status(kbdev, STATUS_COMMAND_ACTIVE);
+	if (ret) {
+		dev_err(kbdev->dev,
+			"Wait for the completion of apply command failed: %d",
+			ret);
+	}
+
+	/* Re-enable the timer for periodic sampling */
+	kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
+			TIMER_DEFAULT_VALUE_CLK_CYCLES);
+
+	spin_unlock(&ipa_ctrl->lock);
+}
+
+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev)
+{
+	/* A soft reset is treated as a power down */
+	kbase_ipa_control_handle_gpu_power_off(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_pre);
+
+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	int ret;
+	u32 status;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* GPU should have become ready for use when this function gets called */
+	WARN_ON(!kbdev->pm.backend.gpu_ready);
+
+	/* Interrupts are already disabled and interrupt state is also saved */
+	spin_lock(&ipa_ctrl->lock);
+
+	/* Check the status reset bit is set before acknowledging it */
+	status = kbase_reg_read(kbdev, IPA_CONTROL_REG(STATUS));
+	if (status & STATUS_RESET) {
+		/* Acknowledge the reset command */
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(COMMAND), COMMAND_RESET_ACK);
+		ret = wait_status(kbdev, STATUS_RESET);
+		if (ret) {
+			dev_err(kbdev->dev,
+				"Wait for the reset ack command failed: %d",
+				ret);
+		}
+	}
+
+	spin_unlock(&ipa_ctrl->lock);
+
+	kbase_ipa_control_handle_gpu_power_on(kbdev);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_handle_gpu_reset_post);
+
+#if MALI_UNIT_TEST
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
+					       u32 clk_index, u32 clk_rate_hz)
+{
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	struct kbase_ipa_control_listener_data *listener_data =
+		ipa_ctrl->rtm_listener_data;
+
+	kbase_ipa_control_rate_change_notify(&listener_data->listener,
+					     clk_index, clk_rate_hz);
+}
+KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test);
+#endif
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
new file mode 100644
index 0000000..ef93ff3
--- /dev/null
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.h
@@ -0,0 +1,217 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CSF_IPA_CONTROL_H_
+#define _KBASE_CSF_IPA_CONTROL_H_
+
+#include <mali_kbase.h>
+
+/**
+ * Maximum index accepted to configure an IPA Control performance counter.
+ */
+#define KBASE_IPA_CONTROL_CNT_MAX_IDX ((u8)64 * 3)
+
+/**
+ * struct kbase_ipa_control_perf_counter - Performance counter description
+ *
+ * @scaling_factor: Scaling factor by which the counter's value shall be
+ *                  multiplied. A scaling factor of 1 corresponds to units
+ *                  of 1 second if values are normalised by GPU frequency.
+ * @gpu_norm:       Indicating whether counter values shall be normalized by
+ *                  GPU frequency. If true, returned values represent
+ *                  an interval of time expressed in seconds (when the scaling
+ *                  factor is set to 1).
+ * @type:           Type of counter block for performance counter.
+ * @idx:            Index of the performance counter inside the block.
+ *                  It may be dependent on GPU architecture.
+ *                  It cannot be greater than KBASE_IPA_CONTROL_CNT_MAX_IDX.
+ *
+ * This structure is used by clients of the IPA Control component to describe
+ * a performance counter that they intend to read. The counter is identified
+ * by block and index. In addition to that, the client also specifies how
+ * values shall be represented. Raw values are a number of GPU cycles;
+ * if normalized, they are divided by GPU frequency and become an interval
+ * of time expressed in seconds, since the GPU frequency is given in Hz.
+ * The client may specify a scaling factor to multiply counter values before
+ * they are divided by frequency, in case the unit of time of 1 second is
+ * too low in resolution. For instance: a scaling factor of 1000 implies
+ * that the returned value is a time expressed in milliseconds; a scaling
+ * factor of 1000 * 1000 implies that the returned value is a time expressed
+ * in microseconds.
+ */
+struct kbase_ipa_control_perf_counter {
+	u64 scaling_factor;
+	bool gpu_norm;
+	enum kbase_ipa_core_type type;
+	u8 idx;
+};
+
+/**
+ * kbase_ipa_control_init - Initialize the IPA Control component
+ *
+ * @kbdev: Pointer to Kbase device.
+ */
+void kbase_ipa_control_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_term - Terminate the IPA Control component
+ *
+ * @kbdev: Pointer to Kbase device.
+ */
+void kbase_ipa_control_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_register - Register a client to the IPA Control component
+ *
+ * @kbdev:         Pointer to Kbase device.
+ * @perf_counters: Array of performance counters the client intends to read.
+ *                 For each counter the client specifies block, index,
+ *                 scaling factor and whether it must be normalized by GPU
+ *                 frequency.
+ * @num_counters:  Number of performance counters. It cannot exceed the total
+ *                 number of counters that exist on the IPA Control interface.
+ * @client:        Handle to an opaque structure set by IPA Control if
+ *                 the registration is successful. This handle identifies
+ *                 a client's session and shall be provided in its future
+ *                 queries.
+ *
+ * A client needs to subscribe to the IPA Control component by declaring which
+ * performance counters it intends to read, and specifying a scaling factor
+ * and whether normalization is requested for each performance counter.
+ * The function shall configure the IPA Control interface accordingly and start
+ * a session for the client that made the request. A unique handle is returned
+ * if registration is successful in order to identify the client's session
+ * and be used for future queries.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_register(
+	struct kbase_device *kbdev,
+	const struct kbase_ipa_control_perf_counter *perf_counters,
+	size_t num_counters, void **client);
+
+/**
+ * kbase_ipa_control_unregister - Unregister a client from IPA Control
+ *
+ * @kbdev:  Pointer to kbase device.
+ * @client: Handle to an opaque structure that identifies the client session
+ *          to terminate, as returned by kbase_ipa_control_register.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_unregister(struct kbase_device *kbdev,
+				 const void *client);
+
+/**
+ * kbase_ipa_control_query - Query performance counters
+ *
+ * @kbdev:          Pointer to kbase device.
+ * @client:         Handle to an opaque structure that identifies the client
+ *                  session, as returned by kbase_ipa_control_register.
+ * @values:         Array of values queried from performance counters, whose
+ *                  length depends on the number of counters requested at
+ *                  the time of registration. Values are scaled and normalized
+ *                  and represent the difference since the last query.
+ * @num_values:     Number of entries in the array of values that has been
+ *                  passed by the caller. It must be at least equal to the
+ *                  number of performance counters the client registered itself
+ *                  to read.
+ * @protected_time: Time spent in protected mode since last query,
+ *                  expressed in nanoseconds. This pointer may be NULL if the
+ *                  client doesn't want to know about this.
+ *
+ * A client that has already opened a session by registering itself to read
+ * some performance counters may use this function to query the values of
+ * those counters. The values returned are normalized by GPU frequency if
+ * requested and then multiplied by the scaling factor provided at the time
+ * of registration. Values always represent a difference since the last query.
+ *
+ * Performance counters are not updated while the GPU operates in protected
+ * mode. For this reason, returned values may be unreliable if the GPU has
+ * been in protected mode since the last query. The function returns success
+ * in that case, but it also gives a measure of how much time has been spent
+ * in protected mode.
+ *
+ * Return: 0 on success, negative -errno on error
+ */
+int kbase_ipa_control_query(struct kbase_device *kbdev, const void *client,
+			    u64 *values, size_t num_values,
+			    u64 *protected_time);
+
+/**
+ * kbase_ipa_control_handle_gpu_power_on - Handle the GPU power on event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called after GPU has been powered and is ready for use.
+ * After the GPU power on, IPA Control component needs to ensure that the
+ * counters start incrementing again.
+ */
+void kbase_ipa_control_handle_gpu_power_on(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_power_off - Handle the GPU power off event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called just before the GPU is powered off when it is still
+ * ready for use.
+ * IPA Control component needs to be aware of the GPU power off so that it can
+ * handle the query from Clients appropriately and return meaningful values
+ * to them.
+ */
+void kbase_ipa_control_handle_gpu_power_off(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_reset_pre - Handle the pre GPU reset event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called when the GPU is about to be reset.
+ */
+void kbase_ipa_control_handle_gpu_reset_pre(struct kbase_device *kbdev);
+
+/**
+ * kbase_ipa_control_handle_gpu_reset_post - Handle the post GPU reset event
+ *
+ * @kbdev:          Pointer to kbase device.
+ *
+ * This function is called after the GPU has been reset.
+ */
+void kbase_ipa_control_handle_gpu_reset_post(struct kbase_device *kbdev);
+
+#if MALI_UNIT_TEST
+/**
+ * kbase_ipa_control_rate_change_notify_test - Notify GPU rate change
+ *                                             (only for testing)
+ *
+ * @kbdev:       Pointer to kbase device.
+ * @clk_index:   Index of the clock for which the change has occurred.
+ * @clk_rate_hz: Clock frequency(Hz).
+ *
+ * Notify the IPA Control component about a GPU rate change.
+ */
+void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
+					       u32 clk_index, u32 clk_rate_hz);
+#endif /* MALI_UNIT_TEST */
+
+#endif /* _KBASE_CSF_IPA_CONTROL_H_ */
diff --git a/mali_kbase/csf/mali_base_csf_kernel.h b/mali_kbase/csf/mali_base_csf_kernel.h
index 301146c..bc356d1 100644
--- a/mali_kbase/csf/mali_base_csf_kernel.h
+++ b/mali_kbase/csf/mali_base_csf_kernel.h
@@ -416,7 +416,7 @@ struct base_kcpu_command_jit_free_info {
  *
  * @buffer:		Pointer to an array of elements of the type char.
  * @size:		Number of elements in the @buffer array.
- * @group_handle:	Handle to the mapping of command stream group.
+ * @group_handle:	Handle to the mapping of CSG.
  * @padding:		padding to a multiple of 64 bits.
  */
 struct base_kcpu_command_group_suspend_info {
@@ -450,7 +450,7 @@ struct base_kcpu_command {
 };
 
 /**
- * struct basep_cs_stream_control - Command Stream interface capabilities.
+ * struct basep_cs_stream_control - CSI capabilities.
  *
  * @features: Features of this stream
  * @padding:  Padding to a multiple of 64 bits.
@@ -461,7 +461,7 @@ struct basep_cs_stream_control {
 };
 
 /**
- * struct basep_cs_group_control - Command Stream Group interface capabilities.
+ * struct basep_cs_group_control - CSG interface capabilities.
  *
  * @features:     Features of this group
  * @stream_num:   Number of streams in this group
@@ -561,6 +561,8 @@ struct base_gpu_queue_group_error {
  * @BASE_CSF_NOTIFICATION_EVENT:                 Notification with kernel event
  * @BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR: Notification with GPU fatal
  *                                               error
+ * @BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP:        Notification with dumping cpu
+ *                                               queue
  * @BASE_CSF_NOTIFICATION_COUNT:                 The number of notification type
  *
  * This type is used for &struct_base_csf_notification.type.
@@ -568,6 +570,7 @@ struct base_gpu_queue_group_error {
 enum base_csf_notification_type {
 	BASE_CSF_NOTIFICATION_EVENT = 0,
 	BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+	BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP,
 	BASE_CSF_NOTIFICATION_COUNT
 };
 
@@ -591,6 +594,7 @@ struct base_csf_notification {
 			u8 padding[7];
 			struct base_gpu_queue_group_error error;
 		} csg_error;
+
 		u8 align[56];
 	} payload;
 };
diff --git a/mali_kbase/csf/mali_gpu_csf_registers.h b/mali_kbase/csf/mali_gpu_csf_registers.h
index 5c03445..281c9c2 100644
--- a/mali_kbase/csf/mali_gpu_csf_registers.h
+++ b/mali_kbase/csf/mali_gpu_csf_registers.h
@@ -91,8 +91,8 @@
 #define DB_BLK_DOORBELL 0x0000 /* (WO) Doorbell request */
 
 /* CS_KERNEL_INPUT_BLOCK register offsets */
-#define CS_REQ 0x0000 /* () Command stream request flags */
-#define CS_CONFIG 0x0004 /* () Command stream configuration */
+#define CS_REQ 0x0000 /* () CS request flags */
+#define CS_CONFIG 0x0004 /* () CS configuration */
 #define CS_ACK_IRQ_MASK 0x000C /* () Command steam interrupt mask */
 #define CS_BASE_LO 0x0010 /* () Base pointer for the ring buffer, low word */
 #define CS_BASE_HI 0x0014 /* () Base pointer for the ring buffer, high word */
@@ -107,11 +107,11 @@
 #define CS_USER_OUTPUT_HI 0x003C /* () CS user mode input page address, high word */
 
 /* CS_KERNEL_OUTPUT_BLOCK register offsets */
-#define CS_ACK 0x0000 /* () Command stream acknowledge flags */
+#define CS_ACK 0x0000 /* () CS acknowledge flags */
 #define CS_STATUS_CMD_PTR_LO 0x0040 /* () Program pointer current value, low word */
 #define CS_STATUS_CMD_PTR_HI 0x0044 /* () Program pointer current value, high word */
 #define CS_STATUS_WAIT 0x0048 /* () Wait condition status register */
-#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the command stream */
+#define CS_STATUS_REQ_RESOURCE 0x004C /* () Indicates the resources requested by the CS */
 #define CS_STATUS_WAIT_SYNC_POINTER_LO 0x0050 /* () Sync object pointer, low word */
 #define CS_STATUS_WAIT_SYNC_POINTER_HI 0x0054 /* () Sync object pointer, high word */
 #define CS_STATUS_WAIT_SYNC_VALUE 0x0058 /* () Sync object test value */
@@ -136,13 +136,13 @@
 /* CS_USER_OUTPUT_BLOCK register offsets */
 #define CS_EXTRACT_LO 0x0000 /* () Current extract offset for ring buffer, low word */
 #define CS_EXTRACT_HI 0x0004 /* () Current extract offset for ring buffer, high word */
-#define CS_ACTIVE 0x0008 /* () Initial extract offset when the command stream is started */
+#define CS_ACTIVE 0x0008 /* () Initial extract offset when the CS is started */
 
 /* CSG_INPUT_BLOCK register offsets */
 #define CSG_REQ 0x0000 /* () CSG request */
 #define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
 #define CSG_DB_REQ 0x0008 /* () Global doorbell request */
-#define CSG_IRQ_ACK 0x000C /* () Command stream IRQ acknowledge */
+#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
 #define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
 #define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
 #define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
@@ -156,9 +156,9 @@
 #define CSG_CONFIG 0x0050 /* () CSG configuration options */
 
 /* CSG_OUTPUT_BLOCK register offsets */
-#define CSG_ACK 0x0000 /* () Command stream group acknowledge flags */
-#define CSG_DB_ACK 0x0008 /* () Command stream kernel doorbell acknowledge flags */
-#define CSG_IRQ_REQ 0x000C /* () Command stream interrupt request flags */
+#define CSG_ACK 0x0000 /* () CSG acknowledge flags */
+#define CSG_DB_ACK 0x0008 /* () CS kernel doorbell acknowledge flags */
+#define CSG_IRQ_REQ 0x000C /* () CS interrupt request flags */
 #define CSG_STATUS_EP_CURRENT 0x0010 /* () Endpoint allocation status register */
 #define CSG_STATUS_EP_REQ 0x0014 /* () Endpoint request status register */
 #define CSG_RESOURCE_DEP 0x001C /* () Current resource dependencies */
@@ -177,12 +177,12 @@
 #define GROUP_CONTROL_COUNT 16
 
 /* STREAM_CONTROL_BLOCK register offsets */
-#define STREAM_FEATURES 0x0000 /* () Command Stream interface features */
+#define STREAM_FEATURES 0x0000 /* () CSI features */
 #define STREAM_INPUT_VA 0x0004 /* () Address of CS_KERNEL_INPUT_BLOCK */
 #define STREAM_OUTPUT_VA 0x0008 /* () Address of CS_KERNEL_OUTPUT_BLOCK */
 
 /* GROUP_CONTROL_BLOCK register offsets */
-#define GROUP_FEATURES 0x0000 /* () Command Stream Group interface features */
+#define GROUP_FEATURES 0x0000 /* () CSG interface features */
 #define GROUP_INPUT_VA 0x0004 /* () Address of CSG_INPUT_BLOCK */
 #define GROUP_OUTPUT_VA 0x0008 /* () Address of CSG_OUTPUT_BLOCK */
 #define GROUP_SUSPEND_SIZE 0x000C /* () Size of CSG suspend buffer */
@@ -231,6 +231,9 @@
 #define GLB_DEBUG_FWUTF_RESULT 0x0FE0 /* () Firmware debug test result */
 #define GLB_DEBUG_ACK 0x0FFC /* () Global debug acknowledge */
 
+/* USER register offsets */
+#define LATEST_FLUSH 0x0000 /* () Flush ID of latest clean-and-invalidate operation */
+
 /* End register offsets */
 
 /* CS_KERNEL_INPUT_BLOCK register set definitions */
@@ -251,15 +254,6 @@
 #define CS_REQ_EXTRACT_EVENT_SET(reg_val, value) \
 	(((reg_val) & ~CS_REQ_EXTRACT_EVENT_MASK) | (((value) << CS_REQ_EXTRACT_EVENT_SHIFT) & CS_REQ_EXTRACT_EVENT_MASK))
 
-/* From 10.x.5, CS_REQ_ERROR_MODE is removed but TI2 bitfile upload not finished.
- * Need to remove on GPUCORE-23972
- */
-#define CS_REQ_ERROR_MODE_SHIFT 5
-#define CS_REQ_ERROR_MODE_MASK (0x1 << CS_REQ_ERROR_MODE_SHIFT)
-#define CS_REQ_ERROR_MODE_GET(reg_val) ((reg_val & CS_REQ_ERROR_MODE_MASK) >> CS_REQ_ERROR_MODE_SHIFT)
-#define CS_REQ_ERROR_MODE_SET(reg_val, value) \
-         ((reg_val & ~CS_REQ_ERROR_MODE_MASK) | ((value << CS_REQ_ERROR_MODE_SHIFT) & CS_REQ_ERROR_MODE_MASK))
-
 #define CS_REQ_IDLE_SYNC_WAIT_SHIFT 8
 #define CS_REQ_IDLE_SYNC_WAIT_MASK (0x1 << CS_REQ_IDLE_SYNC_WAIT_SHIFT)
 #define CS_REQ_IDLE_SYNC_WAIT_GET(reg_val) (((reg_val)&CS_REQ_IDLE_SYNC_WAIT_MASK) >> CS_REQ_IDLE_SYNC_WAIT_SHIFT)
@@ -1212,6 +1206,26 @@
 	(((reg_val) & ~GLB_PROGRESS_TIMER_TIMEOUT_MASK) |  \
 	 (((value) << GLB_PROGRESS_TIMER_TIMEOUT_SHIFT) & GLB_PROGRESS_TIMER_TIMEOUT_MASK))
 
+/* GLB_PWROFF_TIMER register */
+#define GLB_PWROFF_TIMER_TIMEOUT_SHIFT 0
+#define GLB_PWROFF_TIMER_TIMEOUT_MASK (0x7FFFFFFF << GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMEOUT_MASK) >> GLB_PWROFF_TIMER_TIMEOUT_SHIFT)
+#define GLB_PWROFF_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMEOUT_SHIFT) & GLB_PWROFF_TIMER_TIMEOUT_MASK))
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT 31
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_MASK (0x1 << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) >> GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_PWROFF_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_PWROFF_TIMER_TIMER_SOURCE_SHIFT) & GLB_PWROFF_TIMER_TIMER_SOURCE_MASK))
+/* GLB_PWROFF_TIMER_TIMER_SOURCE values */
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
+
 /* GLB_ALLOC_EN register */
 #define GLB_ALLOC_EN_MASK_SHIFT 0
 #define GLB_ALLOC_EN_MASK_MASK (0xFFFFFFFFFFFFFFFF << GLB_ALLOC_EN_MASK_SHIFT)
@@ -1249,4 +1263,42 @@
 	(((reg_val) & ~GLB_ACK_CFG_ALLOC_EN_MASK) | (((value) << GLB_ACK_CFG_ALLOC_EN_SHIFT) & GLB_ACK_CFG_ALLOC_EN_MASK))
 /* End of GLB_OUTPUT_BLOCK register set definitions */
 
+/* The following register and fields are for headers before 10.x.7/11.x.4 */
+#define GLB_REQ_IDLE_ENABLE_SHIFT (10)
+#define GLB_REQ_REQ_IDLE_ENABLE (1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_REQ_IDLE_DISABLE (0 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_ENABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_DISABLE_MASK (0x1 << GLB_REQ_IDLE_ENABLE_SHIFT)
+#define GLB_REQ_IDLE_EVENT_SHIFT (26)
+#define GLB_REQ_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_SHIFT (10)
+#define GLB_ACK_ACK_IDLE_ENABLE (1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_ACK_IDLE_DISABLE (0 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_ENABLE_MASK (0x1 << GLB_ACK_IDLE_ENABLE_SHIFT)
+#define GLB_ACK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IDLE_EVENT_MASK (0x1 << GLB_REQ_IDLE_EVENT_SHIFT)
+
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT (26)
+#define GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK (0x1 << GLB_ACK_IRQ_MASK_IDLE_EVENT_SHIFT)
+
+#define GLB_IDLE_TIMER (0x0080)
+/* GLB_IDLE_TIMER register */
+#define GLB_IDLE_TIMER_TIMEOUT_SHIFT (0)
+#define GLB_IDLE_TIMER_TIMEOUT_MASK ((0x7FFFFFFF) << GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_GET(reg_val) (((reg_val)&GLB_IDLE_TIMER_TIMEOUT_MASK) >> GLB_IDLE_TIMER_TIMEOUT_SHIFT)
+#define GLB_IDLE_TIMER_TIMEOUT_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMEOUT_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMEOUT_SHIFT) & GLB_IDLE_TIMER_TIMEOUT_MASK))
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT (31)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_MASK ((0x1) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GET(reg_val) \
+	(((reg_val)&GLB_IDLE_TIMER_TIMER_SOURCE_MASK) >> GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT)
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val, value) \
+	(((reg_val) & ~GLB_IDLE_TIMER_TIMER_SOURCE_MASK) |  \
+	 (((value) << GLB_IDLE_TIMER_TIMER_SOURCE_SHIFT) & GLB_IDLE_TIMER_TIMER_SOURCE_MASK))
+/* GLB_IDLE_TIMER_TIMER_SOURCE values */
+#define GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP 0x0
+#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
+/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
+
 #endif /* _GPU_CSF_REGISTERS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 83d7513..b75cae3 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -22,7 +22,6 @@
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include "mali_kbase_csf.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
@@ -31,10 +30,11 @@
 #include "mali_gpu_csf_registers.h"
 #include "mali_kbase_csf_tiler_heap.h"
 #include <mmu/mali_kbase_mmu.h>
-#include <mali_kbase_ctx_sched.h>
+#include "mali_kbase_csf_timeout.h"
 
 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
 
 /**
  * struct kbase_csf_event - CSF event callback.
@@ -438,7 +438,6 @@ static void release_queue(struct kbase_queue *queue)
 }
 
 static void oom_event_worker(struct work_struct *data);
-static void fault_event_worker(struct work_struct *data);
 
 int kbase_csf_queue_register(struct kbase_context *kctx,
 			     struct kbase_ioctl_cs_queue_register *reg)
@@ -506,7 +505,6 @@ int kbase_csf_queue_register(struct kbase_context *kctx,
 	INIT_LIST_HEAD(&queue->link);
 	INIT_LIST_HEAD(&queue->error.link);
 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
-	INIT_WORK(&queue->fault_event_work, fault_event_worker);
 	list_add(&queue->link, &kctx->csf.queue_list);
 
 	region->flags |= KBASE_REG_NO_USER_FREE;
@@ -532,6 +530,8 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 	queue = find_queue(kctx, term->buffer_gpu_addr);
 
 	if (queue) {
+		unsigned long flags;
+
 		/* As the GPU queue has been terminated by the
 		 * user space, undo the actions that were performed when the
 		 * queue was registered i.e. remove the queue from the per
@@ -555,10 +555,12 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 		}
 		kbase_gpu_vm_unlock(kctx);
 
+		spin_lock_irqsave(&kctx->csf.event_lock, flags);
 		/* Remove any pending command queue fatal from
 		 * the per-context list.
 		 */
 		list_del_init(&queue->error.link);
+		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 		release_queue(queue);
 	}
@@ -678,30 +680,27 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
 }
 
 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
-			struct kbase_queue *queue)
+				       int csi_index, int csg_nr)
 {
-	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
-	struct kbase_queue_group *group = get_bound_queue_group(queue);
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	u32 value;
-	int slot;
 
-	if (WARN_ON(!group))
+	if (WARN_ON(csg_nr < 0) ||
+	    WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
 		return;
 
-	slot = kbase_csf_scheduler_group_get_slot(group);
+	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
 
-	if (WARN_ON(slot < 0))
+	if (WARN_ON(csi_index < 0) ||
+	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
-	ginfo = &global_iface->groups[slot];
-
 	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
-	value ^= (1 << queue->csi_index);
+	value ^= (1 << csi_index);
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
-					  1 << queue->csi_index);
+					  1 << csi_index);
 
-	kbase_csf_ring_csg_doorbell(kbdev, slot);
+	kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
 }
 
 int kbase_csf_queue_kick(struct kbase_context *kctx,
@@ -758,8 +757,8 @@ static void unbind_stopped_queue(struct kbase_context *kctx,
  * @kctx:	Address of the kbase context within which the queue was created.
  * @queue:	Pointer to the queue to be unlinked.
  *
- * This function will also send the stop request to firmware for the command
- * stream if the group to which the GPU command queue was bound is scheduled.
+ * This function will also send the stop request to firmware for the CS
+ * if the group to which the GPU command queue was bound is scheduled.
  *
  * This function would be called when :-
  * - queue is being unbound. This would happen when the IO mapping
@@ -827,15 +826,14 @@ static int find_free_group_handle(struct kbase_context *const kctx)
 }
 
 /**
- * iface_has_enough_streams() - Check that at least one command stream
- *				group supports a given number of streams
+ * iface_has_enough_streams() - Check that at least one CSG supports
+ *                              a given number of CS
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @cs_min:	Minimum number of command streams required.
+ * @kbdev:  Instance of a GPU platform device that implements a CSF interface.
+ * @cs_min: Minimum number of CSs required.
  *
- * Return: true if at least one command stream group supports the given number
- *         of command streams (or more); otherwise false.
+ * Return: true if at least one CSG supports the given number
+ *         of CSs (or more); otherwise false.
  */
 static bool iface_has_enough_streams(struct kbase_device *const kbdev,
 	u32 const cs_min)
@@ -942,9 +940,8 @@ phy_alloc_failed:
  * create_protected_suspend_buffer() - Create protected-mode suspend buffer
  *					per queue group
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @s_buf:	Pointer to suspend buffer that is attached to queue group
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to suspend buffer that is attached to queue group
  *
  * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
  *         MMU page table. Otherwise -ENOMEM.
@@ -1017,7 +1014,6 @@ phy_alloc_failed:
 	return err;
 }
 
-static void timer_event_worker(struct work_struct *data);
 static void protm_event_worker(struct work_struct *data);
 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
 		struct kbase_normal_suspend_buffer *s_buf);
@@ -1108,7 +1104,6 @@ static int create_queue_group(struct kbase_context *const kctx,
 			INIT_LIST_HEAD(&group->error_fatal.link);
 			INIT_LIST_HEAD(&group->error_timeout.link);
 			INIT_LIST_HEAD(&group->error_tiler_oom.link);
-			INIT_WORK(&group->timer_event_work, timer_event_worker);
 			INIT_WORK(&group->protm_event_work, protm_event_worker);
 			bitmap_zero(group->protm_pending_bitmap,
 					MAX_SUPPORTED_STREAMS_PER_GROUP);
@@ -1155,7 +1150,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 		err = -EINVAL;
 	} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
 		dev_err(kctx->kbdev->dev,
-			"No CSG has at least %d streams\n",
+			"No CSG has at least %d CSs\n",
 			create->in.cs_min);
 		err = -EINVAL;
 	} else {
@@ -1217,9 +1212,8 @@ static void term_normal_suspend_buffer(struct kbase_context *const kctx,
  * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
  *					queue group
  *
- * @kbdev:	Instance of a GPU platform device that implements a command
- *		stream front-end interface.
- * @s_buf:	Pointer to queue group suspend buffer to be freed
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to queue group suspend buffer to be freed
  */
 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
 		struct kbase_protected_suspend_buffer *s_buf)
@@ -1247,7 +1241,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
 {
 	struct kbase_context *kctx = group->kctx;
 
-	/* Currently each group supports the same number of streams */
+	/* Currently each group supports the same number of CS */
 	u32 max_streams =
 		kctx->kbdev->csf.global_iface.groups[0].stream_num;
 	u32 i;
@@ -1303,7 +1297,6 @@ static void term_queue_group(struct kbase_queue_group *group)
 
 static void cancel_queue_group_events(struct kbase_queue_group *group)
 {
-	cancel_work_sync(&group->timer_event_work);
 	cancel_work_sync(&group->protm_event_work);
 }
 
@@ -1317,10 +1310,14 @@ void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
 	group = find_queue_group(kctx, group_handle);
 
 	if (group) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&kctx->csf.event_lock, flags);
 		/* Remove any pending group fatal error from the per-context list. */
 		list_del_init(&group->error_tiler_oom.link);
 		list_del_init(&group->error_timeout.link);
 		list_del_init(&group->error_fatal.link);
+		spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 		term_queue_group(group);
 		kctx->csf.queue_groups[group_handle] = NULL;
@@ -1358,17 +1355,12 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	return err;
 }
 
-/**
- * kbase_csf_add_fatal_error_to_kctx - Add a fatal error to per-ctx error list.
- *
- * @group:       GPU command queue group.
- * @err_payload: Error payload to report.
- */
-static void kbase_csf_add_fatal_error_to_kctx(
-		struct kbase_queue_group *const group,
-		const struct base_gpu_queue_group_error *const err_payload)
+void kbase_csf_add_fatal_error_to_kctx(
+	struct kbase_queue_group *const group,
+	struct base_gpu_queue_group_error const *const err_payload)
 {
 	struct base_csf_notification error;
+	unsigned long flags;
 
 	if (WARN_ON(!group))
 		return;
@@ -1386,7 +1378,7 @@ static void kbase_csf_add_fatal_error_to_kctx(
 		}
 	};
 
-	lockdep_assert_held(&group->kctx->csf.lock);
+	spin_lock_irqsave(&group->kctx->csf.event_lock, flags);
 
 	/* If this group has already been in fatal error status,
 	 * subsequent fatal error on this group should never take place.
@@ -1396,6 +1388,8 @@ static void kbase_csf_add_fatal_error_to_kctx(
 		list_add_tail(&group->error_fatal.link,
 				&group->kctx->csf.error_list);
 	}
+
+	spin_unlock_irqrestore(&group->kctx->csf.event_lock, flags);
 }
 
 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
@@ -1404,7 +1398,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 	struct list_head evicted_groups;
 	struct kbase_queue_group *group;
 	int i;
-	bool fatal_error_built = false;
 
 	INIT_LIST_HEAD(&evicted_groups);
 
@@ -1412,10 +1405,6 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 
 	kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups);
 	while (!list_empty(&evicted_groups)) {
-		struct kbase_csf_scheduler *scheduler =
-						&kbdev->csf.scheduler;
-		unsigned long flags;
-
 		group = list_first_entry(&evicted_groups,
 				struct kbase_queue_group, link);
 
@@ -1423,28 +1412,8 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 			    kctx->tgid, kctx->id, group->handle);
 		kbase_csf_term_descheduled_queue_group(group);
 		list_del_init(&group->link);
-
-		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		if ((group == scheduler->active_protm_grp) &&
-		    group->faulted) {
-			const struct base_gpu_queue_group_error err_payload = {
-				.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
-				.payload = {
-					.fatal_group = {
-					.status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
-					}
-				}
-			};
-
-			kbase_csf_add_fatal_error_to_kctx(group, &err_payload);
-			fatal_error_built = true;
-		}
-		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 	}
 
-	if (fatal_error_built)
-		kbase_event_wakeup(kctx);
-
 	/* Acting on the queue groups that are pending to be terminated. */
 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
 		group = kctx->csf.queue_groups[i];
@@ -1458,6 +1427,7 @@ void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
 
 int kbase_csf_ctx_init(struct kbase_context *kctx)
 {
+	struct kbase_device *kbdev = kctx->kbdev;
 	int err = -ENOMEM;
 
 	INIT_LIST_HEAD(&kctx->csf.event_callback_list);
@@ -1467,6 +1437,19 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 
 	spin_lock_init(&kctx->csf.event_lock);
 	kctx->csf.user_reg_vma = NULL;
+	mutex_lock(&kbdev->pm.lock);
+	/* The inode information for /dev/malixx file is not available at the
+	 * time of device probe as the inode is created when the device node
+	 * is created by udevd (through mknod).
+	 */
+	if (kctx->filp) {
+		if (!kbdev->csf.mali_file_inode)
+			kbdev->csf.mali_file_inode = kctx->filp->f_inode;
+
+		/* inode is unique for a file */
+		WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
+	}
+	mutex_unlock(&kbdev->pm.lock);
 
 	/* Mark all the cookies as 'free' */
 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
@@ -1600,6 +1583,12 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 
 	mutex_unlock(&kctx->csf.lock);
 
+	/* Wait for the firmware error work item to also finish as it could
+	 * be affecting this outgoing context also. Proper handling would be
+	 * added in GPUCORE-25209.
+	 */
+	flush_work(&kctx->kbdev->csf.fw_error_work);
+
 	kbase_csf_tiler_heap_context_term(kctx);
 	kbase_csf_kcpu_queue_context_term(kctx);
 	kbase_csf_scheduler_context_term(kctx);
@@ -1654,8 +1643,9 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
 {
 	bool got_event = true;
 	struct kbase_csf_notification *error_data = NULL;
+	unsigned long flags;
 
-	mutex_lock(&kctx->csf.lock);
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
 
 	if (likely(!list_empty(&kctx->csf.error_list))) {
 		error_data = list_first_entry(&kctx->csf.error_list,
@@ -1666,7 +1656,7 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
 		got_event = false;
 	}
 
-	mutex_unlock(&kctx->csf.lock);
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 	return got_event;
 }
@@ -1674,10 +1664,11 @@ bool kbase_csf_read_error(struct kbase_context *kctx,
 bool kbase_csf_error_pending(struct kbase_context *kctx)
 {
 	bool event_pended = false;
+	unsigned long flags;
 
-	mutex_lock(&kctx->csf.lock);
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
 	event_pended = !list_empty(&kctx->csf.error_list);
-	mutex_unlock(&kctx->csf.lock);
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 
 	return event_pended;
 }
@@ -1742,19 +1733,19 @@ void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
 
 /**
  * handle_oom_event - Handle the OoM event generated by the firmware for the
- *                    command stream interface.
+ *                    CSI.
  *
  * This function will handle the OoM event request from the firmware for the
- * command stream. It will retrieve the address of heap context and heap's
- * statistics (like number of render passes in-flight) from the command
- * stream's kernel output page and pass them to the tiler heap function
- * to allocate a new chunk.
- * It will also update the command stream's kernel input page with the address
+ * CS. It will retrieve the address of heap context and heap's
+ * statistics (like number of render passes in-flight) from the CS's kernel
+ * kernel output page and pass them to the tiler heap function to allocate a
+ * new chunk.
+ * It will also update the CS's kernel input page with the address
  * of a new chunk that was allocated.
  *
  * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
  * @stream: Pointer to the structure containing info provided by the firmware
- *          about the command stream interface.
+ *          about the CSI.
  *
  * Return: 0 if successfully handled the request, otherwise a negative error
  *         code on failure.
@@ -1772,6 +1763,7 @@ static int handle_oom_event(struct kbase_context *const kctx,
 	const u32 frag_end =
 		kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END);
 	u32 renderpasses_in_flight;
+	u32 pending_frag_count;
 	u64 new_chunk_ptr;
 	int err;
 
@@ -1782,9 +1774,10 @@ static int handle_oom_event(struct kbase_context *const kctx,
 	}
 
 	renderpasses_in_flight = vt_start - frag_end;
+	pending_frag_count = vt_end - frag_end;
 
 	err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
-		gpu_heap_va, renderpasses_in_flight, &new_chunk_ptr);
+		gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
 
 	/* It is okay to acknowledge with a NULL chunk (firmware will then wait
 	 * for the fragment jobs to complete and release chunks)
@@ -1824,8 +1817,9 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
 							  BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
 					  } } } };
 	struct kbase_context *kctx = group->kctx;
+	unsigned long flags;
 
-	lockdep_assert_held(&kctx->csf.lock);
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
 
 	/* Ignore this error if the previous one hasn't been reported */
 	if (!WARN_ON(!list_empty(&group->error_tiler_oom.link))) {
@@ -1834,6 +1828,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
 			      &kctx->csf.error_list);
 		kbase_event_wakeup(kctx);
 	}
+
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 }
 
 /**
@@ -1841,8 +1837,8 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
  *
  * @queue: Pointer to queue for which out-of-memory event was received.
  *
- * Called with the command-stream front-end locked for the affected GPU
- * virtual address space. Do not call in interrupt context.
+ * Called with the CSF locked for the affected GPU virtual address space.
+ * Do not call in interrupt context.
  *
  * Handles tiler out-of-memory for a GPU command queue and then clears the
  * notification to allow the firmware to report out-of-memory again in future.
@@ -1859,6 +1855,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 	int slot_num, err;
 	struct kbase_csf_cmd_stream_group_info const *ginfo;
 	struct kbase_csf_cmd_stream_info const *stream;
+	int csi_index = queue->csi_index;
 	u32 cs_oom_ack, cs_oom_req;
 
 	lockdep_assert_held(&kctx->csf.lock);
@@ -1887,7 +1884,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 		goto unlock;
 
 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 	cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) &
 		     CS_ACK_TILER_OOM_MASK;
 	cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) &
@@ -1918,7 +1915,7 @@ static void kbase_queue_oom_event(struct kbase_queue *const queue)
 		return;
 	}
 
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num);
 unlock:
 	kbase_csf_scheduler_unlock(kbdev);
 }
@@ -1947,17 +1944,15 @@ static void oom_event_worker(struct work_struct *data)
 }
 
 /**
- * timer_event_worker - Timer event handler called from a workqueue.
+ * handle_progress_timer_event - Progress timer timeout event handler.
  *
- * @data: Pointer to a work_struct embedded in GPU command queue group data.
+ * @group: Pointer to GPU queue group for which the timeout event is received.
  *
  * Notify the event notification thread of progress timeout fault
  * for the GPU command queue group.
  */
-static void timer_event_worker(struct work_struct *data)
+static void handle_progress_timer_event(struct kbase_queue_group *const group)
 {
-	struct kbase_queue_group *const group =
-		container_of(data, struct kbase_queue_group, timer_event_work);
 	struct base_csf_notification const
 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
 			  .payload = {
@@ -1968,8 +1963,15 @@ static void timer_event_worker(struct work_struct *data)
 							  BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
 					  } } } };
 	struct kbase_context *const kctx = group->kctx;
+	unsigned long flags;
 
-	mutex_lock(&kctx->csf.lock);
+	kbase_csf_scheduler_spin_lock_assert_held(kctx->kbdev);
+
+	spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+	dev_warn(kctx->kbdev->dev,
+		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
+		 kbase_csf_timeout_get(kctx->kbdev));
 
 	/* Ignore this error if the previous one hasn't been reported */
 	if (!WARN_ON(!list_empty(&group->error_timeout.link))) {
@@ -1979,7 +1981,7 @@ static void timer_event_worker(struct work_struct *data)
 		kbase_event_wakeup(kctx);
 	}
 
-	mutex_unlock(&kctx->csf.lock);
+	spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
 }
 
 /**
@@ -2003,13 +2005,13 @@ static void protm_event_worker(struct work_struct *data)
  *
  * @queue:  Pointer to queue for which fault event was received.
  * @stream: Pointer to the structure containing info provided by the
- *          firmware about the command stream interface.
+ *          firmware about the CSI.
  *
  * Prints meaningful CS fault information.
  *
- * Return: 0 on success, otherwise a negative system code.
  */
-static int handle_fault_event(struct kbase_queue const *const queue,
+static void
+handle_fault_event(struct kbase_queue const *const queue,
 		   struct kbase_csf_cmd_stream_info const *const stream)
 {
 	const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
@@ -2025,6 +2027,8 @@ static int handle_fault_event(struct kbase_queue const *const queue,
 		CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
 	struct kbase_device *const kbdev = queue->kctx->kbdev;
 
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
 	dev_warn(kbdev->dev, "CSI: %d\n"
 			"CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
 			"CS_FAULT.EXCEPTION_DATA: 0x%x\n"
@@ -2032,8 +2036,6 @@ static int handle_fault_event(struct kbase_queue const *const queue,
 			queue->csi_index, cs_fault_exception_type,
 			kbase_gpu_exception_name(cs_fault_exception_type),
 			cs_fault_exception_data, cs_fault_info_exception_data);
-
-	return -EFAULT;
 }
 
 /**
@@ -2068,8 +2070,11 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
 			}
 		}
 	};
+	unsigned long flags;
 
-	lockdep_assert_held(&queue->kctx->csf.lock);
+	kbase_csf_scheduler_spin_lock_assert_held(queue->kctx->kbdev);
+
+	spin_lock_irqsave(&queue->kctx->csf.event_lock, flags);
 
 	/* If a queue has already been in fatal error status,
 	 * subsequent fatal error on the queue should never take place.
@@ -2079,6 +2084,8 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
 		list_add_tail(&queue->error.link, &queue->kctx->csf.error_list);
 		kbase_event_wakeup(queue->kctx);
 	}
+
+	spin_unlock_irqrestore(&queue->kctx->csf.event_lock, flags);
 }
 
 /**
@@ -2086,17 +2093,17 @@ static void report_queue_fatal_error(struct kbase_queue *const queue,
  *
  * @queue:    Pointer to queue for which fatal event was received.
  * @stream:   Pointer to the structure containing info provided by the
- *            firmware about the command stream interface.
+ *            firmware about the CSI.
  * @fw_error: Return true if internal firmware fatal is handled
  *
  * Prints meaningful CS fatal information.
  * Report queue fatal error to user space.
  *
- * Return: 0 on success otherwise a negative system error.
  */
-static int handle_fatal_event(struct kbase_queue *const queue,
-	struct kbase_csf_cmd_stream_info const *const stream,
-	bool *fw_error)
+static void
+handle_fatal_event(struct kbase_queue *const queue,
+		   struct kbase_csf_cmd_stream_info const *const stream,
+		   bool *fw_error)
 {
 	const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
 	const u64 cs_fatal_info =
@@ -2111,7 +2118,7 @@ static int handle_fatal_event(struct kbase_queue *const queue,
 		CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
 	struct kbase_device *const kbdev = queue->kctx->kbdev;
 
-	lockdep_assert_held(&queue->kctx->csf.lock);
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
 	dev_warn(kbdev->dev,
 		 "CSG: %d, CSI: %d\n"
@@ -2128,152 +2135,62 @@ static int handle_fatal_event(struct kbase_queue *const queue,
 		*fw_error = true;
 	else
 		report_queue_fatal_error(queue, cs_fatal, cs_fatal_info);
-
-	return -EFAULT;
-}
-
-/**
- * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
- *
- * @kbdev:  Pointer to kbase device
- *
- * Report group fatal error to user space for all GPU command queue groups
- * in the device, terminate them and reset GPU.
- */
-static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
-{
-	int as;
-
-	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
-		struct kbase_context *kctx;
-		struct kbase_fault fault = {
-			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
-		};
-
-		if (as == MCU_AS_NR)
-			continue;
-
-		kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as);
-		if (!kctx)
-			continue;
-
-		kbase_csf_ctx_handle_fault(kctx, &fault);
-		kbase_ctx_sched_release_ctx_lock(kctx);
-	}
-
-	if (kbase_prepare_to_reset_gpu(kbdev))
-		kbase_reset_gpu(kbdev);
 }
 
 /**
- * fault_event_worker - Worker function for CS fault/fatal.
+ * handle_queue_exception_event - Handler for CS fatal/fault exception events.
  *
- * @data: Pointer to a work_struct embedded in GPU command queue data.
- *
- * Handle the fault and fatal exception for a GPU command queue and then
- * releases a reference that was added to prevent the queue being destroyed
- * while this work item was pending on a workqueue.
- * 
- * Report the fault and fatal exception for a GPU command queue and then
- * clears the corresponding notification fields to allow the firmware to
- * report other faults in future.
- * 
- * It may also terminate the GPU command queue group(s) and reset GPU
- * in case internal firmware CS fatal exception occurred.
+ * @queue:  Pointer to queue for which fatal/fault event was received.
+ * @cs_req: Value of the CS_REQ register from the CS's input page.
+ * @cs_ack: Value of the CS_ACK register from the CS's output page.
  */
-static void fault_event_worker(struct work_struct *const data)
+static void handle_queue_exception_event(struct kbase_queue *const queue,
+					 const u32 cs_req, const u32 cs_ack)
 {
-	struct kbase_queue *const queue =
-		container_of(data, struct kbase_queue, fault_event_work);
-
-	struct kbase_context *const kctx = queue->kctx;
-	struct kbase_device *const kbdev = kctx->kbdev;
-	struct kbase_queue_group *group;
-	int slot_num;
 	struct kbase_csf_cmd_stream_group_info const *ginfo;
 	struct kbase_csf_cmd_stream_info const *stream;
-	u32 cs_ack, cs_req;
-	int err = 0;
+	struct kbase_context *const kctx = queue->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	struct kbase_queue_group *group = queue->group;
+	int csi_index = queue->csi_index;
+	int slot_num = group->csg_nr;
 	bool internal_fw_error = false;
 
-	mutex_lock(&kctx->csf.lock);
-	kbase_csf_scheduler_lock(kbdev);
-
-	group = get_bound_queue_group(queue);
-	if (!group) {
-		dev_warn(kbdev->dev, "queue not bound\n");
-		goto unlock;
-	}
-
-	slot_num = kbase_csf_scheduler_group_get_slot(group);
-
-	/* The group could have gone off slot before this work item got
-	 * a chance to execute.
-	 */
-	if (slot_num < 0) {
-		dev_warn(kbdev->dev, "invalid slot_num\n");
-		goto unlock;
-	}
-
-	/* If the bound group is on slot yet the kctx is marked with disabled
-	 * on address-space fault, the group is pending to be killed. So skip
-	 * the inflight queue exception event operation.
-	 */
-	if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT)) {
-		dev_warn(kbdev->dev, "kctx is already disabled on fault\n");
-		goto unlock;
-	}
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
-	stream = &ginfo->streams[queue->csi_index];
-	cs_ack = kbase_csf_firmware_cs_output(stream, CS_ACK);
-	cs_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ);
+	stream = &ginfo->streams[csi_index];
 
 	if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
-		err = handle_fatal_event(queue, stream, &internal_fw_error);
+		handle_fatal_event(queue, stream, &internal_fw_error);
 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
 						 CS_REQ_FATAL_MASK);
 	}
 
 	if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
-		err |= handle_fault_event(queue, stream);
+		handle_fault_event(queue, stream);
 		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
 						 CS_REQ_FAULT_MASK);
-		kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+		kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num);
 	}
 
-	if (err) {
-		/* From 10.x.5, CS_REQ_ERROR_MODE is removed but TI2 bitfile
-		 * upload not finished. Need to remove on GPUCORE-23972
-		 */
-		kbase_csf_firmware_cs_input_mask(stream, CS_REQ, ~cs_ack,
-						CS_REQ_ERROR_MODE_MASK);
-		dev_dbg(kbdev->dev, "Slot-%d CSI-%d entering error mode\n",
-			slot_num, queue->csi_index);
-	}
-
-unlock:
-	release_queue(queue);
-	kbase_csf_scheduler_unlock(kbdev);
-	mutex_unlock(&kctx->csf.lock);
-
 	if (internal_fw_error)
-		handle_internal_firmware_fatal(kbdev);
+		queue_work(system_wq, &kbdev->csf.fw_error_work);
 }
 
 /**
- * process_cs_interrupts - Process interrupts for a command stream.
+ * process_cs_interrupts - Process interrupts for a CS.
  *
  * @group:  Pointer to GPU command queue group data.
- * @ginfo:  The command stream group interface provided by the firmware.
- * @irqreq: CSG's IRQ request bitmask (one bit per stream).
- * @irqack: CSG's IRQ acknowledge bitmask (one bit per stream).
+ * @ginfo:  The CSG interface provided by the firmware.
+ * @irqreq: CSG's IRQ request bitmask (one bit per CS).
+ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
  *
  * If the interrupt request bitmask differs from the acknowledge bitmask
  * then the firmware is notifying the host of an event concerning those
- * streams indicated by bits whose value differs. The actions required
+ * CSs indicated by bits whose value differs. The actions required
  * are then determined by examining which notification flags differ between
- * the request and acknowledge registers for the individual stream(s).
+ * the request and acknowledge registers for the individual CS(s).
  */
 static void process_cs_interrupts(struct kbase_queue_group *const group,
 		      struct kbase_csf_cmd_stream_group_info const *const ginfo,
@@ -2282,6 +2199,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 	struct kbase_device *const kbdev = group->kctx->kbdev;
 	u32 remaining = irqreq ^ irqack;
 	bool protm_pend = false;
+	const bool group_suspending =
+		!kbase_csf_scheduler_group_events_enabled(kbdev, group);
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
@@ -2289,6 +2208,8 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 		int const i = ffs(remaining) - 1;
 		struct kbase_queue *const queue = group->bound_queues[i];
 
+		remaining &= ~(1 << i);
+
 		/* The queue pointer can be NULL, but if it isn't NULL then it
 		 * cannot disappear since scheduler spinlock is held and before
 		 * freeing a bound queue it has to be first unbound which
@@ -2305,12 +2226,18 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 
 			if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
 			    (cs_ack & CS_ACK_EXCEPTION_MASK)) {
-				get_queue(queue);
 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
-				if (!queue_work(wq, &queue->fault_event_work))
-					release_queue(queue);
+				handle_queue_exception_event(queue, cs_req,
+							     cs_ack);
 			}
 
+			/* PROTM_PEND and TILER_OOM can be safely ignored
+			 * because they will be raised again if the group
+			 * is assigned a CSG slot in future.
+			 */
+			if (group_suspending)
+				continue;
+
 			if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
 			     (cs_ack & CS_ACK_TILER_OOM_MASK))) {
 				get_queue(queue);
@@ -2337,8 +2264,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 				protm_pend = true;
 			}
 		}
-
-		remaining &= ~(1 << i);
 	}
 
 	if (protm_pend)
@@ -2346,13 +2271,12 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 }
 
 /**
- * process_csg_interrupts - Process interrupts for a command stream group.
+ * process_csg_interrupts - Process interrupts for a CSG.
  *
- * @kbdev: Instance of a GPU platform device that implements a command stream
- *         front-end interface.
- * @csg_nr: Command stream group number.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @csg_nr: CSG number.
  *
- * Handles interrupts for a command stream group and for streams within it.
+ * Handles interrupts for a CSG and for CSs within it.
  *
  * If the CSG's request register value differs from its acknowledge register
  * then the firmware is notifying the host of an event concerning the whole
@@ -2407,40 +2331,42 @@ static void process_csg_interrupts(struct kbase_device *const kbdev,
 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
 		return;
 
-	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE) {
+	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo,
-			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE);
+			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
 		kbase_csf_event_signal_cpu_only(group->kctx);
 	}
 
-	/* IDLE and TILER_OOM can be safely ignored because they will be
-	 * raised again if the group is assigned a CSG slot in future.
-	 * TILER_OOM and PROGRESS_TIMER_EVENT may terminate the group.
-	 */
-	if (!kbase_csf_scheduler_group_events_enabled(kbdev, group))
-		return;
-
 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
+		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
+
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_IDLE_MASK);
 
-		set_bit(csg_nr, kbdev->csf.scheduler.csg_slots_idle_mask);
+		set_bit(csg_nr, scheduler->csg_slots_idle_mask);
 
 		KBASE_KTRACE_ADD_CSF_GRP(kbdev,  CSG_IDLE_INTERRUPT, group, req ^ ack);
 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
 			 group->handle, csg_nr);
+
+		/* Check if the scheduling tick can be advanced */
+		if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
+		    !scheduler->gpu_idle_fw_timer_enabled) {
+			mod_delayed_work(scheduler->wq, &scheduler->tick_work, 0);
+		}
 	}
 
 	if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
 			CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
 
-		dev_dbg(kbdev->dev, "Timeout notification received for Group %u on slot %d\n",
+		dev_dbg(kbdev->dev,
+			"Timeout notification received for Group %u on slot %d\n",
 			group->handle, csg_nr);
 
-		queue_work(group->kctx->csf.wq, &group->timer_event_work);
+		handle_progress_timer_event(group);
 	}
 
 	process_cs_interrupts(group, ginfo, irqreq, irqack);
@@ -2465,10 +2391,7 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 
 		if (!kbdev->csf.firmware_reloaded)
 			kbase_csf_firmware_reload_completed(kbdev);
-		else if (kbdev->csf.glb_init_request_pending)
-			kbase_pm_update_state(kbdev);
-
-		if (global_iface->output) {
+		else if (global_iface->output) {
 			u32 glb_req, glb_ack;
 
 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
@@ -2485,8 +2408,35 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 				WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
 				scheduler->active_protm_grp = NULL;
 				KBASE_KTRACE_ADD(kbdev, SCHEDULER_EXIT_PROTM, NULL, 0u);
+				kbdev->protected_mode = false;
+			}
+
+			/* Handle IDLE Hysteresis notification event */
+			if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+				dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
+				kbase_csf_firmware_global_input_mask(
+						global_iface, GLB_REQ, glb_ack,
+						GLB_REQ_IDLE_EVENT_MASK);
+
+				if (!atomic_read(&scheduler->non_idle_offslot_grps)) {
+					mod_delayed_work(system_highpri_wq,
+						   &scheduler->gpu_idle_work, 0);
+				} else {
+					/* Advance the scheduling tick to get
+					 * the non-idle suspended groups loaded
+					 * soon.
+					 */
+					mod_delayed_work(scheduler->wq,
+						&scheduler->tick_work, 0);
+				}
 			}
+
 			kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+			/* Invoke the MCU state machine as a state transition
+			 * might have completed.
+			 */
+			kbase_pm_update_state(kbdev);
 		}
 
 		if (!remaining) {
@@ -2545,3 +2495,47 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 
 	return 0;
 }
+
+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+	if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
+		struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+
+		kbase_mem_pool_free(
+			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
+			false);
+	}
+}
+
+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+	struct tagged_addr phys;
+	struct page *page;
+	u32 *addr;
+	int ret;
+
+	kbdev->csf.dummy_user_reg_page = as_tagged(0);
+
+	ret = kbase_mem_pool_alloc_pages(
+		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+		false);
+
+	if (ret <= 0)
+		return ret;
+
+	page = as_page(phys);
+	addr = kmap_atomic(page);
+
+	/* Write a special value for the latest flush register inside the
+	 * dummy page
+	 */
+	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
+
+	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+				     DMA_BIDIRECTIONAL);
+	kunmap_atomic(addr);
+
+	kbdev->csf.dummy_user_reg_page = phys;
+
+	return 0;
+}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index c183d0a..44bc131 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -28,11 +28,11 @@
 #include "mali_kbase_csf_firmware.h"
 #include "mali_kbase_csf_protected_memory.h"
 
-/* Indicate invalid command stream h/w interface
+/* Indicate invalid CS h/w interface
  */
 #define KBASEP_IF_NR_INVALID ((s8)-1)
 
-/* Indicate invalid command stream group number for a GPU command queue group
+/* Indicate invalid CSG number for a GPU command queue group
  */
 #define KBASEP_CSG_NR_INVALID ((s8)-1)
 
@@ -43,10 +43,10 @@
 /* Waiting timeout for global request completion acknowledgment */
 #define GLB_REQ_WAIT_TIMEOUT_MS (300) /* 300 milliseconds */
 
-#define CSG_REQ_EP_CFG (0x1 << CSG_REQ_EP_CFG_SHIFT)
-#define CSG_REQ_SYNC_UPDATE (0x1 << CSG_REQ_SYNC_UPDATE_SHIFT)
 #define FIRMWARE_PING_INTERVAL_MS (2000) /* 2 seconds */
 
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_MS (10) /* Default 10 milliseconds */
+
 /**
  * enum kbase_csf_event_callback_action - return type for CSF event callbacks.
  *
@@ -124,9 +124,9 @@ void kbase_csf_event_wait_remove(struct kbase_context *kctx,
 void kbase_csf_event_wait_remove_all(struct kbase_context *kctx);
 
 /**
- * kbase_csf_read_error - Read command stream fatal error
+ * kbase_csf_read_error - Read CS fatal error
  *
- * This function takes the command stream fatal error from context's ordered
+ * This function takes the CS fatal error from context's ordered
  * error_list, copies its contents to @event_data.
  *
  * @kctx:       The kbase context to read fatal error from
@@ -150,8 +150,8 @@ bool kbase_csf_error_pending(struct kbase_context *kctx);
  * kbase_csf_event_signal - Signal a CSF event
  *
  * This function triggers all the CSF event callbacks that are registered to
- * a given Kbase context, and also signals the thread of userspace driver
- * (front-end), waiting for the CSF event.
+ * a given Kbase context, and also signals the event handling thread of
+ * userspace driver waiting for the CSF event.
  *
  * @kctx:  The kbase context whose CSF event callbacks shall be triggered.
  * @notify_gpu: Flag to indicate if CSF firmware should be notified of the
@@ -171,8 +171,7 @@ static inline void kbase_csf_event_signal_cpu_only(struct kbase_context *kctx)
 }
 
 /**
- * kbase_csf_ctx_init - Initialize the command-stream front-end for a GPU
- *                      address space.
+ * kbase_csf_ctx_init - Initialize the CSF interface for a GPU address space.
  *
  * @kctx:	Pointer to the kbase context which is being initialized.
  *
@@ -194,8 +193,7 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 		struct kbase_fault *fault);
 
 /**
- * kbase_csf_ctx_term - Terminate the command-stream front-end for a GPU
- *                      address space.
+ * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
  *
  * This function terminates any remaining CSGs and CSs which weren't destroyed
  * before context termination.
@@ -280,7 +278,9 @@ void kbase_csf_queue_unbind(struct kbase_queue *queue);
 int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick);
 
-/** Find if given the queue group handle is valid.
+/**
+ * kbase_csf_queue_group_handle_is_valid - Find if the given queue group handle
+ *                                         is valid.
  *
  * This function is used to determine if the queue group handle is valid.
  *
@@ -340,7 +340,6 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group);
  *			suspended.
  * @sus_buf:		Pointer to the structure which contains details of the
  *			user buffer and its kernel pinned pages.
- * @size:		The size in bytes for the user provided buffer.
  * @group_handle:	Handle for the group which uniquely identifies it within
  *			the context within which it was created.
  *
@@ -351,6 +350,16 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 	struct kbase_suspend_copy_buffer *sus_buf, u8 group_handle);
 
 /**
+ * kbase_csf_add_fatal_error_to_kctx - Add a fatal error to per-ctx error list.
+ *
+ * @group:       GPU command queue group.
+ * @err_payload: Error payload to report.
+ */
+void kbase_csf_add_fatal_error_to_kctx(
+	struct kbase_queue_group *const group,
+	struct base_gpu_queue_group_error const *const err_payload);
+
+/**
  * kbase_csf_interrupt - Handle interrupts issued by CSF firmware.
  *
  * @kbdev: The kbase device to handle an IRQ for
@@ -359,55 +368,89 @@ int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
 
 /**
- * kbase_csf_doorbell_mapping_init - Initialize the bitmap of Hw doorbell pages
- *                           used to track their availability.
+ * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates
+ *                                   the update of userspace mapping of HW
+ *                                   doorbell page.
+ *
+ * The function creates a file and allocates a dummy page to facilitate the
+ * update of userspace mapping to point to the dummy page instead of the real
+ * HW doorbell page after the suspend of queue group.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: 0 on success, or negative on failure.
  */
 int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_doorbell_mapping_term - Free the dummy page & close the file used
+ *                         to update the userspace mapping of HW doorbell page
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
 void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev);
 
 /**
- * kbase_csf_ring_csg_doorbell - ring the doorbell for a command stream group
- *                               interface.
+ * kbase_csf_setup_dummy_user_reg_page - Setup the dummy page that is accessed
+ *                                       instead of the User register page after
+ *                                       the GPU power down.
+ *
+ * The function allocates a dummy page which is used to replace the User
+ * register page in the userspace mapping after the power down of GPU.
+ * On the power up of GPU, the mapping is updated to point to the real
+ * User register page. The mapping is used to allow access to LATEST_FLUSH
+ * register from userspace.
  *
- * The function kicks a notification on the command stream group interface to
- * firmware.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
- * @slot: Index of command stream group interface for ringing the door-bell.
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_free_dummy_user_reg_page - Free the dummy page that was used
+ *                                 used to replace the User register page
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
+ *
+ * The function kicks a notification on the CSG interface to firmware.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @slot: Index of CSG interface for ringing the door-bell.
  */
 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot);
 
 /**
- * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of command
- *                                     stream group interfaces.
+ * kbase_csf_ring_csg_slots_doorbell - ring the doorbell for a set of CSG
+ *                                     interfaces.
  *
- * The function kicks a notification on a set of command stream group
- * interfaces to firmware.
+ * The function kicks a notification on a set of CSG interfaces to firmware.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @slot_bitmap: bitmap for the given slots, slot-0 on bit-0, etc.
  */
 void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
 				       u32 slot_bitmap);
 
 /**
- * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a queue
+ * kbase_csf_ring_cs_kernel_doorbell - ring the kernel doorbell for a CSI
+ *                                     assigned to a GPU queue
  *
- * The function kicks a notification to the firmware for the command stream
- * interface to which the queue is bound.
+ * The function sends a doorbell interrupt notification to the firmware for
+ * a CSI assigned to a GPU queue.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
- * @queue: Pointer to the queue for ringing the door-bell.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @csi_index: ID of the CSI assigned to the GPU queue.
+ * @csg_nr:    Index of the CSG slot assigned to the queue
+ *             group to which the GPU queue is bound.
  */
 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
-			struct kbase_queue *queue);
+				       int csi_index, int csg_nr);
 
 /**
  * kbase_csf_ring_cs_user_doorbell - ring the user doorbell allocated for a
@@ -416,8 +459,7 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
  * The function kicks a notification to the firmware on the doorbell assigned
  * to the queue.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @queue: Pointer to the queue for ringing the door-bell.
  */
 void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
@@ -427,9 +469,8 @@ void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
  * kbase_csf_active_queue_groups_reset - Reset the state of all active GPU
  *                            command queue groups associated with the context.
  *
- * @kbdev:     Instance of a GPU platform device that implements a command
- *             stream front-end interface.
- * @kctx:      The kbase context.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @kctx:  The kbase context.
  *
  * This function will iterate through all the active/scheduled GPU command
  * queue groups associated with the context, deschedule and mark them as
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
new file mode 100644
index 0000000..3acceeb
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -0,0 +1,193 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_csf_cpu_queue_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req)
+{
+	if (atomic_cmpxchg(&kctx->csf.cpu_queue.dump_req_status,
+			   BASE_CSF_CPU_QUEUE_DUMP_ISSUED,
+			   BASE_CSF_CPU_QUEUE_DUMP_PENDING) !=
+		BASE_CSF_CPU_QUEUE_DUMP_ISSUED) {
+		return false;
+	}
+
+	req->type = BASE_CSF_NOTIFICATION_CPU_QUEUE_DUMP;
+	return true;
+}
+
+/**
+ * kbasep_csf_cpu_queue_debugfs_show() - Print cpu queue information for per context
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_csf_cpu_queue_debugfs_show(struct seq_file *file, void *data)
+{
+	struct kbase_context *kctx = file->private;
+
+	mutex_lock(&kctx->csf.lock);
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
+				BASE_CSF_CPU_QUEUE_DUMP_COMPLETE) {
+		seq_printf(file, "Dump request already started! (try again)\n");
+		mutex_unlock(&kctx->csf.lock);
+		return -EBUSY;
+	}
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status, BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+	init_completion(&kctx->csf.cpu_queue.dump_cmp);
+	kbase_event_wakeup(kctx);
+	mutex_unlock(&kctx->csf.lock);
+
+	seq_printf(file, "CPU Queues table (version:v%u):\n", MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION);
+
+	wait_for_completion_timeout(&kctx->csf.cpu_queue.dump_cmp,
+			msecs_to_jiffies(3000));
+
+	mutex_lock(&kctx->csf.lock);
+	if (kctx->csf.cpu_queue.buffer) {
+		WARN_ON(atomic_read(&kctx->csf.cpu_queue.dump_req_status) !=
+				    BASE_CSF_CPU_QUEUE_DUMP_PENDING);
+
+		seq_printf(file, "%s\n", kctx->csf.cpu_queue.buffer);
+
+		kfree(kctx->csf.cpu_queue.buffer);
+		kctx->csf.cpu_queue.buffer = NULL;
+		kctx->csf.cpu_queue.buffer_size = 0;
+	}
+	else
+		seq_printf(file, "Dump error! (time out)\n");
+
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
+			BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+
+	mutex_unlock(&kctx->csf.lock);
+	return 0;
+}
+
+static int kbasep_csf_cpu_queue_debugfs_open(struct inode *in, struct file *file)
+{
+	return single_open(file, kbasep_csf_cpu_queue_debugfs_show, in->i_private);
+}
+
+static const struct file_operations kbasep_csf_cpu_queue_debugfs_fops = {
+	.open = kbasep_csf_cpu_queue_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
+{
+	struct dentry *file;
+
+	if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry)))
+		return;
+
+	file = debugfs_create_file("cpu_queue", 0444, kctx->kctx_dentry,
+			kctx, &kbasep_csf_cpu_queue_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kctx->kbdev->dev,
+				"Unable to create cpu queue debugfs entry");
+	}
+
+	kctx->csf.cpu_queue.buffer = NULL;
+	kctx->csf.cpu_queue.buffer_size = 0;
+	atomic_set(&kctx->csf.cpu_queue.dump_req_status,
+		   BASE_CSF_CPU_QUEUE_DUMP_COMPLETE);
+}
+
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+		u64 buffer, size_t buf_size)
+{
+	int err = 0;
+
+	size_t alloc_size = buf_size;
+	char *dump_buffer;
+
+	if (!buffer || !alloc_size)
+		goto done;
+
+	alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
+	dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(dump_buffer)) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
+
+	err = copy_from_user(dump_buffer,
+			u64_to_user_ptr(buffer),
+			buf_size);
+	if (err) {
+		kfree(dump_buffer);
+		err = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&kctx->csf.lock);
+
+	if (kctx->csf.cpu_queue.buffer)
+		kfree(kctx->csf.cpu_queue.buffer);
+
+	if (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
+			BASE_CSF_CPU_QUEUE_DUMP_PENDING) {
+		kctx->csf.cpu_queue.buffer = dump_buffer;
+		kctx->csf.cpu_queue.buffer_size = buf_size;
+		complete_all(&kctx->csf.cpu_queue.dump_cmp);
+	} else {
+		kfree(dump_buffer);
+	}
+
+	mutex_unlock(&kctx->csf.lock);
+done:
+	return err;
+}
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
+{
+}
+
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req)
+{
+	return false;
+}
+
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+			u64 buffer, size_t buf_size)
+{
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.h b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.h
new file mode 100644
index 0000000..71309bf
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.h
@@ -0,0 +1,91 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_
+#define _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_
+
+#include <asm/atomic.h>
+#include <linux/types.h>
+
+#include "mali_kbase.h"
+
+/* Forward declaration */
+struct base_csf_notification;
+
+#define MALI_CSF_CPU_QUEUE_DEBUGFS_VERSION 0
+
+/* CPU queue dump status */
+/* Dumping is done or no dumping is in progress. */
+#define BASE_CSF_CPU_QUEUE_DUMP_COMPLETE	0
+/* Dumping request is pending. */
+#define BASE_CSF_CPU_QUEUE_DUMP_PENDING		1
+/* Dumping request is issued to Userspace */
+#define BASE_CSF_CPU_QUEUE_DUMP_ISSUED		2
+
+
+/**
+ * kbase_csf_cpu_queue_debugfs_init() - Create a debugfs entry for per context cpu queue(s)
+ *
+ * @kctx: The kbase_context for which to create the debugfs entry
+ */
+void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_cpu_queue_read_dump_req - Read cpu queue dump request event
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ * @req:  Notification with cpu queue dump request.
+ *
+ * @return: If dumping is requested, it is true or false.
+ */
+bool kbase_csf_cpu_queue_read_dump_req(struct kbase_context *kctx,
+					struct base_csf_notification *req);
+
+/**
+ * kbase_csf_cpu_queue_dump_needed - Check the requirement for cpu queue dump
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ *
+ * @return: If it needs cpu queue dump, returns true, or returns false.
+ */
+static inline bool kbase_csf_cpu_queue_dump_needed(struct kbase_context *kctx)
+{
+#ifdef CONFIG_DEBUG_FS
+	return (atomic_read(&kctx->csf.cpu_queue.dump_req_status) ==
+		BASE_CSF_CPU_QUEUE_DUMP_ISSUED);
+#else
+	return false;
+#endif
+}
+
+/**
+ * kbase_csf_cpu_queue_dump - dump buffer containing cpu queue information to debugfs
+ *
+ * @kctx: The kbase_context which cpu queue dumpped belongs to
+ * @buffer: Buffer containing the cpu queue information.
+ * @buf_size: Buffer size.
+ *
+ * @return: Return 0 for dump successfully, or error code.
+ */
+int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
+		u64 buffer, size_t buf_size);
+#endif /* _KBASE_CSF_CPU_QUEUE_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index fd8329b..e54c724 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -428,6 +428,61 @@ DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops,
 		&kbasep_csf_debugfs_scheduling_timer_kick_set,
 		"%llu\n");
 
+/**
+ * kbase_csf_debugfs_scheduler_suspend_get() - get if the scheduler is suspended.
+ *
+ * @data: The debugfs dentry private data, a pointer to kbase_device
+ * @val: The debugfs output value, boolean: 1 suspended, 0 otherwise
+ *
+ * Return: 0
+ */
+static int kbase_csf_debugfs_scheduler_suspend_get(
+		void *data, u64 *val)
+{
+	struct kbase_device *kbdev = data;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+	kbase_csf_scheduler_lock(kbdev);
+	*val = (scheduler->state == SCHED_SUSPENDED);
+	kbase_csf_scheduler_unlock(kbdev);
+
+	return 0;
+}
+
+/**
+ * kbase_csf_debugfs_scheduler_suspend_set() - set the scheduler to suspended.
+ *
+ * @data: The debugfs dentry private data, a pointer to kbase_device
+ * @val: The debugfs input value, boolean: 1 suspend, 0 otherwise
+ *
+ * Return: Negative value if already in requested state, 0 otherwise.
+ */
+static int kbase_csf_debugfs_scheduler_suspend_set(
+		void *data, u64 val)
+{
+	struct kbase_device *kbdev = data;
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	enum kbase_csf_scheduler_state state;
+
+	kbase_csf_scheduler_lock(kbdev);
+	state = scheduler->state;
+	kbase_csf_scheduler_unlock(kbdev);
+
+	if (val && (state != SCHED_SUSPENDED))
+		kbase_csf_scheduler_pm_suspend(kbdev);
+	else if (!val && (state == SCHED_SUSPENDED))
+		kbase_csf_scheduler_pm_resume(kbdev);
+	else
+		return -1;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduler_suspend_fops,
+		&kbase_csf_debugfs_scheduler_suspend_get,
+		&kbase_csf_debugfs_scheduler_suspend_set,
+		"%llu\n");
+
 void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 {
 	debugfs_create_file("active_groups", 0444,
@@ -440,6 +495,9 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev)
 	debugfs_create_file("scheduling_timer_kick", 0200,
 			kbdev->mali_debugfs_directory, kbdev,
 			&kbasep_csf_debugfs_scheduling_timer_kick_fops);
+	debugfs_create_file("scheduler_suspend", 0644,
+			kbdev->mali_debugfs_directory, kbdev,
+			&kbasep_csf_debugfs_scheduler_suspend_fops);
 
 	kbase_csf_tl_reader_debugfs_init(kbdev);
 	kbase_csf_firmware_trace_buffer_debugfs_init(kbdev);
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 3829572..3939238 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -20,7 +20,7 @@
  *
  */
 
-/* Definitions (types, defines, etcs) common to the command stream frontend.
+/* Definitions (types, defines, etcs) common to the CSF.
  * They are placed here to allow the hierarchy of header files to work.
  */
 
@@ -86,17 +86,17 @@ enum kbase_csf_reset_gpu_state {
  *
  * @KBASE_CSF_GROUP_INACTIVE:          Group is inactive and won't be
  *                                     considered by scheduler for running on
- *                                     command stream group slot.
+ *                                     CSG slot.
  * @KBASE_CSF_GROUP_RUNNABLE:          Group is in the list of runnable groups
  *                                     and is subjected to time-slice based
  *                                     scheduling. A start request would be
  *                                     sent (or already has been sent) if the
- *                                     group is assigned the command stream
+ *                                     group is assigned the CS
  *                                     group slot for the fist time.
- * @KBASE_CSF_GROUP_IDLE:              Group is currently on a command stream
- *                                     group slot but all the command streams
- *                                     bound to the group have become either
- *                                     idle or waiting on sync object.
+ * @KBASE_CSF_GROUP_IDLE:              Group is currently on a CSG slot
+ *                                     but all the CSs bound to the group have
+ *                                     become either idle or waiting on sync
+ *                                     object.
  *                                     Group could be evicted from the slot on
  *                                     the next tick if there are no spare
  *                                     slots left after scheduling non-idle
@@ -110,12 +110,11 @@ enum kbase_csf_reset_gpu_state {
  *                                     KBASE_CSF_GROUP_SUSPENDED_ON_IDLE or
  *                                     KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC
  *                                     state.
- * @KBASE_CSF_GROUP_SUSPENDED:         Group was evicted from the command
- *                                     stream group slot and is not running but
- *                                     is still in the list of runnable groups
- *                                     and subjected to time-slice based
- *                                     scheduling. A resume request would be
- *                                     sent when a command stream group slot is
+ * @KBASE_CSF_GROUP_SUSPENDED:         Group was evicted from the CSG slot
+ *                                     and is not running but is still in the
+ *                                     list of runnable groups and subjected
+ *                                     to time-slice based scheduling. A resume
+ *                                     request would be sent when a CSG slot is
  *                                     re-assigned to the group and once the
  *                                     resume is complete group would be moved
  *                                     back to the RUNNABLE state.
@@ -128,8 +127,8 @@ enum kbase_csf_reset_gpu_state {
  *                                     bound to the group is kicked it would be
  *                                     moved to the SUSPENDED state.
  * @KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC: Same as GROUP_SUSPENDED_ON_IDLE
- *                                          except that at least one command
- *                                          stream bound to this group was
+ *                                          except that at least one CS
+ *                                          bound to this group was
  *                                          waiting for synchronization object
  *                                          before the suspension.
  * @KBASE_CSF_GROUP_FAULT_EVICTED:     Group is evicted from the scheduler due
@@ -185,10 +184,10 @@ enum kbase_csf_csg_slot_state {
  * enum kbase_csf_scheduler_state - state of the scheduler operational phases.
  *
  * @SCHED_BUSY:         The scheduler is busy performing on tick schedule
- *                      operations, the state of command stream group slots
+ *                      operations, the state of CSG slots
  *                      can't be changed.
  * @SCHED_INACTIVE:     The scheduler is inactive, it is allowed to modify the
- *                      state of command stream group slots by in-cycle
+ *                      state of CSG slots by in-cycle
  *                      priority scheduling.
  * @SCHED_SUSPENDED:    The scheduler is in low-power mode with scheduling
  *                      operations suspended and is not holding the power
@@ -240,33 +239,31 @@ struct kbase_csf_notification {
  * @refcount:    Reference count, stands for the number of times the queue
  *               has been referenced. The reference is taken when it is
  *               created, when it is bound to the group and also when the
- *               @oom_event_work or @fault_event_work work item is queued
+ *               @oom_event_work work item is queued
  *               for it.
  * @group:       Pointer to the group to which this queue is bound.
- * @queue_reg:   Pointer to the VA region allocated for command
- *               stream buffer.
+ * @queue_reg:   Pointer to the VA region allocated for CS buffer.
  * @oom_event_work: Work item corresponding to the out of memory event for
  *                  chunked tiler heap being used for this queue.
- * @fault_event_work: Work item corresponding to the firmware fault event.
- * @base_addr:      Base address of the command stream buffer.
- * @size:           Size of the command stream buffer.
+ * @base_addr:      Base address of the CS buffer.
+ * @size:           Size of the CS buffer.
  * @priority:       Priority of this queue within the group.
  * @bind_state:     Bind state of the queue.
- * @csi_index:      The ID of the assigned command stream hardware interface.
- * @enabled:        Indicating whether the command stream is running, or not.
- * @status_wait:    Value of CS_STATUS_WAIT register of the command stream will
- *                  be kept when the command stream gets blocked by sync wait.
+ * @csi_index:      The ID of the assigned CS hardware interface.
+ * @enabled:        Indicating whether the CS is running, or not.
+ * @status_wait:    Value of CS_STATUS_WAIT register of the CS will
+ *                  be kept when the CS gets blocked by sync wait.
  *                  CS_STATUS_WAIT provides information on conditions queue is
  *                  blocking on. This is set when the group, to which queue is
  *                  bound, is suspended after getting blocked, i.e. in
  *                  KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC state.
- * @sync_ptr:       Value of CS_STATUS_WAIT_SYNC_POINTER register of the command
- *                  stream will be kept when the command stream gets blocked by
+ * @sync_ptr:       Value of CS_STATUS_WAIT_SYNC_POINTER register of the CS
+ *                  will be kept when the CS gets blocked by
  *                  sync wait. CS_STATUS_WAIT_SYNC_POINTER contains the address
  *                  of synchronization object being waited on.
  *                  Valid only when @status_wait is set.
- * @sync_value:     Value of CS_STATUS_WAIT_SYNC_VALUE register of the command
- *                  stream will be kept when the command stream gets blocked by
+ * @sync_value:     Value of CS_STATUS_WAIT_SYNC_VALUE register of the CS
+ *                  will be kept when the CS gets blocked by
  *                  sync wait. CS_STATUS_WAIT_SYNC_VALUE contains the value
  *                  tested against the synchronization object.
  *                  Valid only when @status_wait is set.
@@ -285,7 +282,6 @@ struct kbase_queue {
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
 	struct work_struct oom_event_work;
-	struct work_struct fault_event_work;
 	u64 base_addr;
 	u32 size;
 	u8 priority;
@@ -335,9 +331,9 @@ struct kbase_protected_suspend_buffer {
  *				buffer. Protected-mode suspend buffer that is
  *				used for group context switch.
  * @handle:         Handle which identifies this queue group.
- * @csg_nr:         Number/index of the command stream group to
- *                  which this queue group is mapped; KBASEP_CSG_NR_INVALID
- *                  indicates that the queue group is not scheduled.
+ * @csg_nr:         Number/index of the CSG to which this queue group is
+ *                  mapped; KBASEP_CSG_NR_INVALID indicates that the queue
+ *                  group is not scheduled.
  * @priority:       Priority of the queue group, 0 being the highest,
  *                  BASE_QUEUE_GROUP_PRIORITY_COUNT - 1 being the lowest.
  * @tiler_max:      Maximum number of tiler endpoints the group is allowed
@@ -355,9 +351,6 @@ struct kbase_protected_suspend_buffer {
  *                    to be scheduled, if the group is runnable/suspended.
  *                    If the group is idle or waiting for CQS, it would be a
  *                    link to the list of idle/blocked groups list.
- * @timer_event_work: Work item corresponding to the event generated when a task
- *                    started by a queue in this group takes too long to execute
- *                    on an endpoint.
  * @run_state:      Current state of the queue group.
  * @prepared_seq_num: Indicates the position of queue group in the list of
  *                    prepared groups to be scheduled.
@@ -369,7 +362,7 @@ struct kbase_protected_suspend_buffer {
  *                  group.
  * @protm_event_work:   Work item corresponding to the protected mode entry
  *                      event for this queue.
- * @protm_pending_bitmap:  Bit array to keep a track of command streams that
+ * @protm_pending_bitmap:  Bit array to keep a track of CSs that
  *                         have pending protected mode entry requests.
  * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
  *               returned to userspace if such an error has occurred.
@@ -396,7 +389,6 @@ struct kbase_queue_group {
 
 	struct list_head link;
 	struct list_head link_to_schedule;
-	struct work_struct timer_event_work;
 	enum kbase_csf_group_state run_state;
 	u32 prepared_seq_num;
 	bool faulted;
@@ -443,6 +435,22 @@ struct kbase_csf_kcpu_queue_context {
 };
 
 /**
+ * struct kbase_csf_cpu_queue_context - Object representing the cpu queue
+ *                                      information.
+ *
+ * @bufffer:     Buffer containing CPU queue information provided by Userspace.
+ * @buffer_size: The size of @buffer.
+ * @dump_req_status:  Indicates the current status for CPU queues dump request.
+ * @dump_cmp:         Dumping cpu queue completion event.
+ */
+struct kbase_csf_cpu_queue_context {
+	char *buffer;
+	size_t buffer_size;
+	atomic_t dump_req_status;
+	struct completion dump_cmp;
+};
+
+/**
  * struct kbase_csf_heap_context_allocator - Allocator of heap contexts
  *
  * Heap context structures are allocated by the kernel for use by the firmware.
@@ -472,9 +480,9 @@ struct kbase_csf_heap_context_allocator {
  * struct kbase_csf_tiler_heap_context - Object representing the tiler heaps
  *                                       context for a GPU address space.
  *
- * This contains all of the command-stream front-end state relating to chunked
- * tiler heaps for one @kbase_context. It is not the same as a heap context
- * structure allocated by the kernel for use by the firmware.
+ * This contains all of the CSF state relating to chunked tiler heaps for one
+ * @kbase_context. It is not the same as a heap context structure allocated by
+ * the kernel for use by the firmware.
  *
  * @lock:      Lock preventing concurrent access to the tiler heaps.
  * @list:      List of tiler heaps.
@@ -500,7 +508,7 @@ struct kbase_csf_tiler_heap_context {
  * @num_idle_wait_grps: Length of the @idle_wait_groups list.
  * @sync_update_wq:     Dedicated workqueue to process work items corresponding
  *                      to the sync_update events by sync_set/sync_add
- *                      instruction execution on command streams bound to groups
+ *                      instruction execution on CSs bound to groups
  *                      of @idle_wait_groups list.
  * @sync_update_work:   work item to process the sync_update events by
  *                      sync_set / sync_add instruction execution on command
@@ -519,8 +527,7 @@ struct kbase_csf_scheduler_context {
 };
 
 /**
- * struct kbase_csf_context - Object representing command-stream front-end
- *                            for a GPU address space.
+ * struct kbase_csf_context - Object representing CSF for a GPU address space.
  *
  * @event_pages_head: A list of pages allocated for the event memory used by
  *                    the synchronization objects. A separate list would help
@@ -534,7 +541,7 @@ struct kbase_csf_scheduler_context {
  *                    deferred manner of a pair of User mode input/output pages
  *                    & a hardware doorbell page.
  *                    The pages are allocated when a GPU command queue is
- *                    bound to a command stream group in kbase_csf_queue_bind.
+ *                    bound to a CSG in kbase_csf_queue_bind.
  *                    This helps returning unique handles to Userspace from
  *                    kbase_csf_queue_bind and later retrieving the pointer to
  *                    queue in the mmap handler.
@@ -550,7 +557,8 @@ struct kbase_csf_scheduler_context {
  *                    userspace mapping created for them on bind operation
  *                    hasn't been removed.
  * @kcpu_queues:      Kernel CPU command queues.
- * @event_lock:       Lock protecting access to @event_callback_list
+ * @event_lock:       Lock protecting access to @event_callback_list and
+ *                    @error_list.
  * @event_callback_list: List of callbacks which are registered to serve CSF
  *                       events.
  * @tiler_heaps:      Chunked tiler memory heaps.
@@ -563,10 +571,12 @@ struct kbase_csf_scheduler_context {
  *                    of the USER register page. Currently used only for sanity
  *                    checking.
  * @sched:            Object representing the scheduler's context
- * @error_list:       List for command stream fatal errors in this context.
+ * @error_list:       List for CS fatal errors in this context.
  *                    Link of fatal error is
  *                    &struct_kbase_csf_notification.link.
- *                    @lock needs to be held to access to this list.
+ *                    @event_lock needs to be held to access this list.
+ * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
+ *                    is enabled.
  */
 struct kbase_csf_context {
 	struct list_head event_pages_head;
@@ -585,6 +595,9 @@ struct kbase_csf_context {
 	struct vm_area_struct *user_reg_vma;
 	struct kbase_csf_scheduler_context sched;
 	struct list_head error_list;
+#ifdef CONFIG_DEBUG_FS
+	struct kbase_csf_cpu_queue_context cpu_queue;
+#endif
 };
 
 /**
@@ -604,12 +617,12 @@ struct kbase_csf_reset_gpu {
 
 /**
  * struct kbase_csf_csg_slot - Object containing members for tracking the state
- *                             of command stream group slots.
+ *                             of CSG slots.
  * @resident_group:   pointer to the queue group that is resident on the
- *                    command stream group slot.
+ *                    CSG slot.
  * @state:            state of the slot as per enum kbase_csf_csg_slot_state.
  * @trigger_jiffies:  value of jiffies when change in slot state is recorded.
- * @priority:         dynamic priority assigned to command stream group slot.
+ * @priority:         dynamic priority assigned to CSG slot.
  */
 struct kbase_csf_csg_slot {
 	struct kbase_queue_group *resident_group;
@@ -620,8 +633,7 @@ struct kbase_csf_csg_slot {
 
 /**
  * struct kbase_csf_scheduler - Object representing the scheduler used for
- *                              command-stream front-end for an instance of
- *                              GPU platform device.
+ *                              CSF for an instance of GPU platform device.
  * @lock:                  Lock to serialize the scheduler operations and
  *                         access to the data members.
  * @interrupt_lock:        Lock to protect members accessed by interrupt
@@ -632,24 +644,24 @@ struct kbase_csf_csg_slot {
  * @doorbell_inuse_bitmap: Bitmap of hardware doorbell pages keeping track of
  *                         which pages are currently available for assignment
  *                         to clients.
- * @csg_inuse_bitmap:      Bitmap to keep a track of command stream group slots
+ * @csg_inuse_bitmap:      Bitmap to keep a track of CSG slots
  *                         that are currently in use.
- * @csg_slots:             The array for tracking the state of command stream
+ * @csg_slots:             The array for tracking the state of CS
  *                         group slots.
  * @runnable_kctxs:        List of Kbase contexts that have runnable command
  *                         queue groups.
  * @groups_to_schedule:    List of runnable queue groups prepared on every
- *                         scheduler tick. The dynamic priority of the command
- *                         stream group slot assigned to a group will depend
- *                         upon the position of group in the list.
+ *                         scheduler tick. The dynamic priority of the CSG
+ *                         slot assigned to a group will depend upon the
+ *                         position of group in the list.
  * @ngrp_to_schedule:      Number of groups in the @groups_to_schedule list,
  *                         incremented when a group is added to the list, used
  *                         to record the position of group in the list.
  * @num_active_address_spaces: Number of GPU address space slots that would get
  *                             used to program the groups in @groups_to_schedule
- *                             list on all the available command stream group
+ *                             list on all the available CSG
  *                             slots.
- * @num_csg_slots_for_tick:  Number of command stream group slots that can be
+ * @num_csg_slots_for_tick:  Number of CSG slots that can be
  *                           active in the given tick/tock. This depends on the
  *                           value of @num_active_address_spaces.
  * @idle_groups_to_schedule: List of runnable queue groups, in which all GPU
@@ -663,7 +675,7 @@ struct kbase_csf_csg_slot {
  * @csgs_events_enable_mask: Use for temporary masking off asynchronous events
  *                           from firmware (such as OoM events) before a group
  *                           is suspended.
- * @csg_slots_idle_mask:     Bit array for storing the mask of command stream
+ * @csg_slots_idle_mask:     Bit array for storing the mask of CS
  *                           group slots for which idle notification was
  *                           received.
  * @csg_slots_prio_update:  Bit array for tracking slots that have an on-slot
@@ -683,27 +695,38 @@ struct kbase_csf_csg_slot {
  * @tock_work:              Work item that would perform the schedule on tock
  *                          operation to implement the asynchronous scheduling.
  * @ping_work:              Work item that would ping the firmware at regular
- *                          intervals, only if there is a single active command
- *                          stream group slot, to check if firmware is alive
- *                          and would initiate a reset if the ping request
- *                          isn't acknowledged.
+ *                          intervals, only if there is a single active CSG
+ *                          slot, to check if firmware is alive and would
+ *                          initiate a reset if the ping request isn't
+ *                          acknowledged.
  * @top_ctx:                Pointer to the Kbase context corresponding to the
  *                          @top_grp.
  * @top_grp:                Pointer to queue group inside @groups_to_schedule
  *                          list that was assigned the highest slot priority.
  * @head_slot_priority:     The dynamic slot priority to be used for the
  *                          queue group at the head of @groups_to_schedule
- *                          list. Once the queue group is assigned a command
- *                          stream group slot, it is removed from the list and
- *                          priority is decremented.
+ *                          list. Once the queue group is assigned a CSG slot,
+ *                          it is removed from the list and priority is
+ *                          decremented.
  * @tock_pending_request:   A "tock" request is pending: a group that is not
  *                          currently on the GPU demands to be scheduled.
  * @active_protm_grp:       Indicates if firmware has been permitted to let GPU
  *                          enter protected mode with the given group. On exit
  *                          from protected mode the pointer is reset to NULL.
+ * @gpu_idle_fw_timer_enabled: Whether the CSF scheduler has activiated the
+ *                            firmware idle hysteresis timer for preparing a
+ *                            GPU suspend on idle.
  * @gpu_idle_work:          Work item for facilitating the scheduler to bring
  *                          the GPU to a low-power mode on becoming idle.
- * @non_idle_suspended_grps: Count of suspended queue groups not idle.
+ * @non_idle_offslot_grps:  Count of off-slot non-idle groups. Reset during
+ *                          the scheduler active phase in a tick. It then
+ *                          tracks the count of non-idle groups across all the
+ *                          other phases.
+ * @non_idle_scanout_grps:  Count on the non-idle groups in the scan-out
+ *                          list at the scheduling prepare stage.
+ * @apply_async_protm:      Signalling the internal scheduling apply stage to
+ *                          act with some special handling for entering the
+ *                          protected mode asynchronously.
  * @pm_active_count:        Count indicating if the scheduler is owning a power
  *                          management reference count. Reference is taken when
  *                          the count becomes 1 and is dropped when the count
@@ -739,8 +762,11 @@ struct kbase_csf_scheduler {
 	u8 head_slot_priority;
 	bool tock_pending_request;
 	struct kbase_queue_group *active_protm_grp;
+	bool gpu_idle_fw_timer_enabled;
 	struct delayed_work gpu_idle_work;
-	atomic_t non_idle_suspended_grps;
+	atomic_t non_idle_offslot_grps;
+	u32 non_idle_scanout_grps;
+	bool apply_async_protm;
 	u32 pm_active_count;
 };
 
@@ -758,8 +784,154 @@ struct kbase_csf_scheduler {
 	GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
 
 /**
- * struct kbase_csf      -  Object representing command-stream front-end for an
- *                          instance of GPU platform device.
+ * Number of GPU cycles per unit of the global poweroff timeout.
+ */
+#define GLB_PWROFF_TIMER_TIMEOUT_SCALE ((u64)1024)
+
+/**
+ * Minimum number of GPU cycles for which shader cores must be idle before they
+ * are powered off.
+ * Value chosen is equivalent to the hysteresis delay used in the shader cores
+ * state machine of JM GPUs, which is ~800 micro seconds. It is assumed the GPU
+ * is usually clocked at ~500 MHZ.
+ */
+#define DEFAULT_GLB_PWROFF_TIMER_TIMEOUT ((u64)800 * 500)
+
+/**
+ * Maximum number of sessions that can be managed by the IPA Control component.
+ */
+#if MALI_UNIT_TEST
+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)8)
+#else
+#define KBASE_IPA_CONTROL_MAX_SESSIONS ((size_t)2)
+#endif
+
+/**
+ * enum kbase_ipa_core_type - Type of counter block for performance counters
+ *
+ * @KBASE_IPA_CORE_TYPE_CSHW:   CS Hardware counters.
+ * @KBASE_IPA_CORE_TYPE_MEMSYS: Memory System counters.
+ * @KBASE_IPA_CORE_TYPE_TILER:  Tiler counters.
+ * @KBASE_IPA_CORE_TYPE_SHADER: Shader Core counters.
+ * @KBASE_IPA_CORE_TYPE_NUM:    Number of core types.
+ */
+enum kbase_ipa_core_type {
+	KBASE_IPA_CORE_TYPE_CSHW = 0,
+	KBASE_IPA_CORE_TYPE_MEMSYS,
+	KBASE_IPA_CORE_TYPE_TILER,
+	KBASE_IPA_CORE_TYPE_SHADER,
+	KBASE_IPA_CORE_TYPE_NUM
+};
+
+/**
+ * Number of configurable counters per type of block on the IPA Control
+ * interface.
+ */
+#define KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS ((size_t)8)
+
+/**
+ * Total number of configurable counters existing on the IPA Control interface.
+ */
+#define KBASE_IPA_CONTROL_MAX_COUNTERS                                         \
+	((size_t)KBASE_IPA_CORE_TYPE_NUM * KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS)
+
+/**
+ * struct kbase_ipa_control_prfcnt - Session for a single performance counter
+ *
+ * @latest_raw_value: Latest raw value read from the counter.
+ * @scaling_factor:   Factor raw value shall be multiplied by.
+ * @accumulated_diff: Partial sum of scaled and normalized values from
+ *                    previous samples. This represent all the values
+ *                    that were read before the latest raw value.
+ * @type:             Type of counter block for performance counter.
+ * @select_idx:       Index of the performance counter as configured on
+ *                    the IPA Control interface.
+ * @gpu_norm:         Indicating whether values shall be normalized by
+ *                    GPU frequency. If true, returned values represent
+ *                    an interval of time expressed in seconds (when the
+ *                    scaling factor is set to 1).
+ */
+struct kbase_ipa_control_prfcnt {
+	u64 latest_raw_value;
+	u64 scaling_factor;
+	u64 accumulated_diff;
+	enum kbase_ipa_core_type type;
+	u8 select_idx;
+	bool gpu_norm;
+};
+
+/**
+ * struct kbase_ipa_control_session - Session for an IPA Control client
+ *
+ * @prfcnts:     Sessions for individual performance counters.
+ * @num_prfcnts: Number of performance counters.
+ * @active:      Status of the session.
+ */
+struct kbase_ipa_control_session {
+	struct kbase_ipa_control_prfcnt prfcnts[KBASE_IPA_CONTROL_MAX_COUNTERS];
+	size_t num_prfcnts;
+	bool active;
+};
+
+/**
+ * struct kbase_ipa_control_prfcnt_config - Performance counter configuration
+ *
+ * @idx:      Index of the performance counter inside the block, as specified
+ *            in the GPU architecture.
+ * @refcount: Number of client sessions bound to this counter.
+ *
+ * This structure represents one configurable performance counter of
+ * the IPA Control interface. The entry may be mapped to a specific counter
+ * by one or more client sessions. The counter is considered to be unused
+ * if it isn't part of any client session.
+ */
+struct kbase_ipa_control_prfcnt_config {
+	u8 idx;
+	u8 refcount;
+};
+
+/**
+ * struct kbase_ipa_control_prfcnt_block - Block of performance counters
+ *
+ * @select:                 Current performance counter configuration.
+ * @num_available_counters: Number of counters that are not already configured.
+ *
+ */
+struct kbase_ipa_control_prfcnt_block {
+	struct kbase_ipa_control_prfcnt_config
+		select[KBASE_IPA_CONTROL_NUM_BLOCK_COUNTERS];
+	size_t num_available_counters;
+};
+
+/**
+ * struct kbase_ipa_control - Manager of the IPA Control interface.
+ *
+ * @blocks:              Current configuration of performance counters
+ *                       for the IPA Control interface.
+ * @sessions:            State of client sessions, storing information
+ *                       like performance counters the client subscribed to
+ *                       and latest value read from each counter.
+ * @lock:                Spinlock to serialize access by concurrent clients.
+ * @rtm_listener_data:   Private data for allocating a GPU frequency change
+ *                       listener.
+ * @num_active_sessions: Number of sessions opened by clients.
+ * @cur_gpu_rate:        Current GPU top-level operating frequency, in Hz.
+ * @rtm_listener_data:   Private data for allocating a GPU frequency change
+ *                       listener.
+ */
+struct kbase_ipa_control {
+	struct kbase_ipa_control_prfcnt_block blocks[KBASE_IPA_CORE_TYPE_NUM];
+	struct kbase_ipa_control_session
+		sessions[KBASE_IPA_CONTROL_MAX_SESSIONS];
+	spinlock_t lock;
+	void *rtm_listener_data;
+	size_t num_active_sessions;
+	u32 cur_gpu_rate;
+};
+
+/**
+ * struct kbase_csf      -  Object representing CSF for an instance of GPU
+ *                          platform device.
  *
  * @mcu_mmu:                MMU page tables for the MCU firmware
  * @firmware_interfaces:    List of interfaces defined in the firmware image
@@ -794,6 +966,17 @@ struct kbase_csf_scheduler {
  *                          of the real Hw doorbell page for the active GPU
  *                          command queues after they are stopped or after the
  *                          GPU is powered down.
+ * @dummy_user_reg_page:    Address of the dummy page that is mapped in place
+ *                          of the real User register page just before the GPU
+ *                          is powered down. The User register page is mapped
+ *                          in the address space of every process, that created
+ *                          a Base context, to enable the access to LATEST_FLUSH
+ *                          register from userspace.
+ * @mali_file_inode:        Pointer to the inode corresponding to mali device
+ *                          file. This is needed in order to switch to the
+ *                          @dummy_user_reg_page on GPU power down.
+ *                          All instances of the mali device file will point to
+ *                          the same inode.
  * @reg_lock:               Lock to serialize the MCU firmware related actions
  *                          that affect all contexts such as allocation of
  *                          regions from shared interface area, assignment of
@@ -806,7 +989,7 @@ struct kbase_csf_scheduler {
  * @global_iface:           The result of parsing the global interface
  *                          structure set up by the firmware, including the
  *                          CSGs, CSs, and their properties
- * @scheduler:              The command stream scheduler instance.
+ * @scheduler:              The CS scheduler instance.
  * @reset:                  Contain members required for GPU reset handling.
  * @progress_timeout:       Maximum number of GPU clock cycles without forward
  *                          progress to allow, for all tasks running on
@@ -825,6 +1008,15 @@ struct kbase_csf_scheduler {
  * @glb_init_request_pending: Flag to indicate that Global requests have been
  *                            sent to the FW after MCU was re-enabled and their
  *                            acknowledgement is pending.
+ * @fw_error_work:          Work item for handling the firmware internal error
+ *                          fatal event.
+ * @ipa_control:            IPA Control component manager.
+ * @gpu_idle_hysteresis_ms: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of ms. The firmware does not use it
+ *                          directly.
+ * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
+ *                          interface required format, ready to be used
+ *                          directly in the firmware.
  */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -838,6 +1030,8 @@ struct kbase_csf_device {
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
+	struct tagged_addr dummy_user_reg_page;
+	struct inode *mali_file_inode;
 	struct mutex reg_lock;
 	wait_queue_head_t event_wait;
 	bool interrupt_received;
@@ -851,6 +1045,10 @@ struct kbase_csf_device {
 	bool firmware_reload_needed;
 	struct work_struct firmware_reload_work;
 	bool glb_init_request_pending;
+	struct work_struct fw_error_work;
+	struct kbase_ipa_control ipa_control;
+	u32 gpu_idle_hysteresis_ms;
+	u32 gpu_idle_dur_count;
 };
 
 /**
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 6203e18..e2067d2 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -25,22 +25,26 @@
 #include "mali_kbase_csf_trace_buffer.h"
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
-#include <mali_kbase_reset_gpu.h>
+#include "mali_kbase_reset_gpu.h"
+#include "mali_kbase_ctx_sched.h"
 #include "mali_kbase_csf_scheduler.h"
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "tl/mali_kbase_timeline_priv.h"
 #include "mali_kbase_csf_tl_reader.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
 #include <linux/mman.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
 #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/set_memory.h>
 #endif
 #include <mmu/mali_kbase_mmu.h>
+#include <asm/arch_timer.h>
 
 #define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
 
@@ -100,8 +104,9 @@ MODULE_PARM_DESC(fw_debug,
 #define CSF_FIRMWARE_BOOT_TIMEOUT_MS     (500)
 #define CSF_MAX_FW_STOP_LOOPS            (100000)
 
-#define CSF_GLB_REQ_CFG_MASK \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                   \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -671,8 +676,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 out:
 	if (allocated_pages) {
 		if (protected_mode) {
-			kbase_csf_protected_memory_free(kbdev,
-					interface->pma, num_pages);
+			kbase_csf_protected_memory_free(kbdev, pma, num_pages);
 		} else {
 			kbase_mem_pool_free_pages(
 				&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
@@ -917,7 +921,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 
 	if (ginfo->stream_num < MIN_SUPPORTED_STREAMS_PER_GROUP ||
 			ginfo->stream_num > MAX_SUPPORTED_STREAMS_PER_GROUP) {
-		dev_err(kbdev->dev, "CSG with %u streams out of range %u-%u",
+		dev_err(kbdev->dev, "CSG with %u CSs out of range %u-%u",
 				ginfo->stream_num,
 				MIN_SUPPORTED_STREAMS_PER_GROUP,
 				MAX_SUPPORTED_STREAMS_PER_GROUP);
@@ -928,7 +932,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 
 	if (ginfo->stream_num * ginfo->stream_stride > group_stride) {
 		dev_err(kbdev->dev,
-				"group stride of 0x%x exceeded by %u streams with stride 0x%x",
+				"group stride of 0x%x exceeded by %u CSs with stride 0x%x",
 				group_stride, ginfo->stream_num,
 				ginfo->stream_stride);
 		return -EINVAL;
@@ -948,7 +952,7 @@ static int parse_cmd_stream_group_info(struct kbase_device *kbdev,
 		err = parse_cmd_stream_info(kbdev, &ginfo->streams[sid],
 				stream_base);
 		if (err < 0) {
-			/* caller will free the memory for streams array */
+			/* caller will free the memory for CSs array */
 			return err;
 		}
 	}
@@ -1215,6 +1219,54 @@ u32 kbase_csf_firmware_global_output(
 	return val;
 }
 
+/**
+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
+ *
+ * @kbdev:  Pointer to kbase device
+ *
+ * Report group fatal error to user space for all GPU command queue groups
+ * in the device, terminate them and reset GPU.
+ */
+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
+{
+	int as;
+
+	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
+		struct kbase_context *kctx;
+		struct kbase_fault fault = {
+			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
+		};
+
+		if (as == MCU_AS_NR)
+			continue;
+
+		kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as);
+		if (!kctx)
+			continue;
+
+		kbase_csf_ctx_handle_fault(kctx, &fault);
+		kbase_ctx_sched_release_ctx_lock(kctx);
+	}
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+}
+
+/**
+ * firmware_error_worker - Worker function for handling firmware internal error
+ *
+ * @data: Pointer to a work_struct embedded in kbase device.
+ *
+ * Handle the CS internal firmware error
+ */
+static void firmware_error_worker(struct work_struct *const data)
+{
+	struct kbase_device *const kbdev =
+		container_of(data, struct kbase_device, csf.fw_error_work);
+
+	handle_internal_firmware_fatal(kbdev);
+}
+
 static bool global_request_complete(struct kbase_device *const kbdev,
 				    u32 const req_mask)
 {
@@ -1283,6 +1335,26 @@ static void enable_endpoints_global(
 	set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK);
 }
 
+static void enable_shader_poweroff_timer(
+	struct kbase_device *const kbdev,
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	u32 pwroff_reg = 0;
+
+	pwroff_reg = GLB_PWROFF_TIMER_TIMEOUT_SET(
+		pwroff_reg, DEFAULT_GLB_PWROFF_TIMER_TIMEOUT /
+				    GLB_PWROFF_TIMER_TIMEOUT_SCALE);
+
+	pwroff_reg = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(
+		pwroff_reg, GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
+					pwroff_reg);
+	set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
+
+	dev_dbg(kbdev->dev, "GLB_PWROFF_TIMER set to 0x%.8x\n", pwroff_reg);
+}
+
 static void set_timeout_global(
 	const struct kbase_csf_global_iface *const global_iface,
 	u64 const timeout)
@@ -1309,13 +1381,15 @@ static void set_coherency_mode(struct kbase_device *const kbdev)
 					protected_mode_coherency);
 }
 
-static void global_init(struct kbase_device *const kbdev, u32 req_mask)
+static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK  |
-			GLB_ACK_IRQ_MASK_PING_MASK |
-			GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK;
+	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
+				 GLB_ACK_IRQ_MASK_PING_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -1326,9 +1400,9 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 	/* Set the cohereny mode for protected mode execution */
 	set_coherency_mode(kbdev);
 
-	/* Enable endpoints on all present shader cores */
-	enable_endpoints_global(global_iface,
-		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+	/* Update shader core allocation enable mask */
+	enable_endpoints_global(global_iface, core_mask);
+	enable_shader_poweroff_timer(kbdev, global_iface);
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
@@ -1344,8 +1418,7 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 /**
  * global_init_on_boot - Sends a global request to control various features.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface
  *
  * Currently only the request to enable endpoints and timeout for GPU progress
  * timer is sent.
@@ -1354,19 +1427,25 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
  */
 static int global_init_on_boot(struct kbase_device *const kbdev)
 {
-	u32 const req_mask = CSF_GLB_REQ_CFG_MASK;
+	unsigned long flags;
+	u64 core_mask;
 
-	global_init(kbdev, req_mask);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	core_mask = kbase_pm_ca_get_core_mask(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	return wait_for_global_request(kbdev, req_mask);
+	global_init(kbdev, core_mask);
+
+	return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK);
 }
 
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev)
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	kbdev->csf.glb_init_request_pending = true;
-	global_init(kbdev, CSF_GLB_REQ_CFG_MASK);
+	global_init(kbdev, core_mask);
 }
 
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
@@ -1380,6 +1459,26 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
 	return !kbdev->csf.glb_init_request_pending;
 }
 
+void kbase_csf_firmware_update_core_mask(struct kbase_device *kbdev,
+					 u64 new_core_mask)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	enable_endpoints_global(&kbdev->csf.global_iface, new_core_mask);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_core_mask_updated(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK);
+}
+
 /**
  * This helper function will reload the firmware image and re-enable the MCU.
  * It is supposed to be called after MCU(GPU) has been reset.
@@ -1447,6 +1546,68 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+{
+#define HYSTERESIS_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_ms;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter format with firmware idle hysteresis!");
+	}
+
+	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.gpu_idle_hysteresis_ms;
+}
+
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
+		hysteresis_val);
+
+	return hysteresis_val;
+}
+
 int kbase_csf_firmware_init(struct kbase_device *kbdev)
 {
 	const struct firmware *firmware;
@@ -1479,9 +1640,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
+	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(kbdev,
+						FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n");
@@ -1588,6 +1754,10 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto error;
 
+	ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
+	if (ret != 0)
+		goto error;
+
 	ret = kbase_csf_timeout_init(kbdev);
 	if (ret != 0)
 		goto error;
@@ -1618,6 +1788,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 	unsigned long flags;
 	int ret = 0;
 
+	cancel_work_sync(&kbdev->csf.fw_error_work);
+
 	while (kbase_reset_gpu_is_active(kbdev) && !ret)
 		ret = kbase_reset_gpu_wait(kbdev);
 
@@ -1644,6 +1816,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 
 	kbase_csf_scheduler_term(kbdev);
 
+	kbase_csf_free_dummy_user_reg_page(kbdev);
+
 	kbase_csf_doorbell_mapping_term(kbdev);
 
 	free_global_iface(kbdev);
@@ -1699,6 +1873,49 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
 }
 
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	const u32 glb_req =
+		kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	/* The scheduler is assumed to only call the enable when its internal
+	 * state indicates that the idle timer has previously been disabled. So
+	 * on entry the expected field values are:
+	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
+	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
+	 */
+
+	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
+		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					GLB_REQ_REQ_IDLE_DISABLE,
+					GLB_REQ_IDLE_DISABLE_MASK);
+	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
 int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 {
 	const struct kbase_csf_global_iface *const global_iface =
@@ -1742,6 +1959,7 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	unsigned long flags;
 	unsigned int value;
+	int err;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 	value = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
@@ -1752,7 +1970,14 @@ void kbase_csf_enter_protected_mode(struct kbase_device *kbdev)
 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
-	wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+	err = wait_for_global_request(kbdev, GLB_REQ_PROTM_ENTER_MASK);
+
+	if (!err) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		kbdev->protected_mode = true;
+		kbase_ipa_protection_mode_switch_event(kbdev);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
 }
 
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
@@ -1772,20 +1997,19 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 }
 
 /**
- * copy_grp_and_stm - Copy command stream and/or group data
+ * copy_grp_and_stm - Copy CS and/or group data
  *
- * @iface:                Global command stream front-end interface provided by
- *                        the firmware.
+ * @iface:                Global CSF interface provided by the firmware.
  * @group_data:           Pointer where to store all the group data
  *                        (sequentially).
  * @max_group_num:        The maximum number of groups to be read. Can be 0, in
  *                        which case group_data is unused.
- * @stream_data:          Pointer where to store all the stream data
+ * @stream_data:          Pointer where to store all the CS data
  *                        (sequentially).
- * @max_total_stream_num: The maximum number of streams to be read.
+ * @max_total_stream_num: The maximum number of CSs to be read.
  *                        Can be 0, in which case stream_data is unused.
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  */
 static u32 copy_grp_and_stm(
 	const struct kbase_csf_global_iface * const iface,
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 470f178..3a05062 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -71,12 +71,12 @@
 /* All implementations of the host interface with major version 0 must comply
  * with these restrictions:
  */
-/* GLB_GROUP_NUM: At least 3 command stream groups, but no more than 31 */
+/* GLB_GROUP_NUM: At least 3 CSGs, but no more than 31 */
 #define MIN_SUPPORTED_CSGS 3
 #define MAX_SUPPORTED_CSGS 31
-/* GROUP_STREAM_NUM: At least 8 command streams per CSG, but no more than 32 */
+/* GROUP_STREAM_NUM: At least 8 CSs per CSG, but no more than 32 */
 #define MIN_SUPPORTED_STREAMS_PER_GROUP 8
-/* Maximum command streams per csg. */
+/* Maximum CSs per csg. */
 #define MAX_SUPPORTED_STREAMS_PER_GROUP 32
 
 struct kbase_device;
@@ -111,16 +111,15 @@ struct kbase_csf_trace_buffers {
 };
 
 /**
- * struct kbase_csf_cmd_stream_info - Command stream interface provided by the
- *                                    firmware.
+ * struct kbase_csf_cmd_stream_info - CSI provided by the firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
  *         this interface.
- * @features: Bit field of command stream features (e.g. which types of jobs
+ * @features: Bit field of CS features (e.g. which types of jobs
  *            are supported). Bits 7:0 specify the number of work registers(-1).
  *            Bits 11:8 specify the number of scoreboard entries(-1).
- * @input: Address of command stream interface input page.
- * @output: Address of command stream interface output page.
+ * @input: Address of CSI input page.
+ * @output: Address of CSI output page.
  */
 struct kbase_csf_cmd_stream_info {
 	struct kbase_device *kbdev;
@@ -130,9 +129,9 @@ struct kbase_csf_cmd_stream_info {
 };
 
 /**
- * kbase_csf_firmware_cs_input() - Set a word in a command stream's input page
+ * kbase_csf_firmware_cs_input() - Set a word in a CS's input page
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -140,22 +139,20 @@ void kbase_csf_firmware_cs_input(
 	const struct kbase_csf_cmd_stream_info *info, u32 offset, u32 value);
 
 /**
- * kbase_csf_firmware_cs_input_read() - Read a word in a command stream's input
- *                                      page
+ * kbase_csf_firmware_cs_input_read() - Read a word in a CS's input page
  *
- * Return: Value of the word read from the command stream's input page.
+ * Return: Value of the word read from the CS's input page.
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_cs_input_read(
 	const struct kbase_csf_cmd_stream_info *const info, const u32 offset);
 
 /**
- * kbase_csf_firmware_cs_input_mask() - Set part of a word in a command stream's
- *                                      input page
+ * kbase_csf_firmware_cs_input_mask() - Set part of a word in a CS's input page
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -165,19 +162,18 @@ void kbase_csf_firmware_cs_input_mask(
 	u32 value, u32 mask);
 
 /**
- * kbase_csf_firmware_cs_output() - Read a word in a command stream's output
- *                                  page
+ * kbase_csf_firmware_cs_output() - Read a word in a CS's output page
  *
- * Return: Value of the word read from the command stream's output page.
+ * Return: Value of the word read from the CS's output page.
  *
- * @info: Command stream interface provided by the firmware.
+ * @info: CSI provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_cs_output(
 	const struct kbase_csf_cmd_stream_info *info, u32 offset);
 /**
- * struct kbase_csf_cmd_stream_group_info - Command stream group interface
- *                                          provided by the firmware.
+ * struct kbase_csf_cmd_stream_group_info - CSG interface provided by the
+ *                                          firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
  *         this interface.
@@ -185,14 +181,13 @@ u32 kbase_csf_firmware_cs_output(
  *            be ignored.
  * @input: Address of global interface input page.
  * @output: Address of global interface output page.
- * @suspend_size: Size in bytes for normal suspend buffer for the command
- *                stream group.
+ * @suspend_size: Size in bytes for normal suspend buffer for the CSG
  * @protm_suspend_size: Size in bytes for protected mode suspend buffer
- *                      for the command stream group.
- * @stream_num: Number of command streams in the command stream group.
+ *                      for the CSG.
+ * @stream_num: Number of CSs in the CSG.
  * @stream_stride: Stride in bytes in JASID0 virtual address between
- *                 command stream capability structures.
- * @streams: Address of an array of command stream capability structures.
+ *                 CS capability structures.
+ * @streams: Address of an array of CS capability structures.
  */
 struct kbase_csf_cmd_stream_group_info {
 	struct kbase_device *kbdev;
@@ -207,10 +202,9 @@ struct kbase_csf_cmd_stream_group_info {
 };
 
 /**
- * kbase_csf_firmware_csg_input() - Set a word in a command stream group's
- *                                  input page
+ * kbase_csf_firmware_csg_input() - Set a word in a CSG's input page
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -219,22 +213,21 @@ void kbase_csf_firmware_csg_input(
 	u32 value);
 
 /**
- * kbase_csf_firmware_csg_input_read() - Read a word in a command stream group's
- *                                       input page
+ * kbase_csf_firmware_csg_input_read() - Read a word in a CSG's input page
  *
- * Return: Value of the word read from the command stream group's input page.
+ * Return: Value of the word read from the CSG's input page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_csg_input_read(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
 /**
- * kbase_csf_firmware_csg_input_mask() - Set part of a word in a command stream
- *                                       group's input page
+ * kbase_csf_firmware_csg_input_mask() - Set part of a word in a CSG's
+ *                                       input page
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -244,19 +237,18 @@ void kbase_csf_firmware_csg_input_mask(
 	u32 value, u32 mask);
 
 /**
- * kbase_csf_firmware_csg_output()- Read a word in a command stream group's
- *                                  output page
+ * kbase_csf_firmware_csg_output()- Read a word in a CSG's output page
  *
- * Return: Value of the word read from the command stream group's output page.
+ * Return: Value of the word read from the CSG's output page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_csg_output(
 	const struct kbase_csf_cmd_stream_group_info *info, u32 offset);
 
 /**
- * struct kbase_csf_global_iface - Global command stream front-end interface
+ * struct kbase_csf_global_iface - Global CSF interface
  *                                 provided by the firmware.
  *
  * @kbdev: Address of the instance of a GPU platform device that implements
@@ -268,11 +260,11 @@ u32 kbase_csf_firmware_csg_output(
  *            be suspended). Reserved bits should be 0, and should be ignored.
  * @input: Address of global interface input page.
  * @output: Address of global interface output page.
- * @group_num: Number of command stream groups supported.
+ * @group_num: Number of CSGs supported.
  * @group_stride: Stride in bytes in JASID0 virtual address between
- *                command stream group capability structures.
+ *                CSG capability structures.
  * @prfcnt_size: Performance counters size.
- * @groups: Address of an array of command stream group capability structures.
+ * @groups: Address of an array of CSG capability structures.
  */
 struct kbase_csf_global_iface {
 	struct kbase_device *kbdev;
@@ -289,7 +281,7 @@ struct kbase_csf_global_iface {
 /**
  * kbase_csf_firmware_global_input() - Set a word in the global input page
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be written, in bytes.
  * @value: Value to be written.
  */
@@ -300,7 +292,7 @@ void kbase_csf_firmware_global_input(
  * kbase_csf_firmware_global_input_mask() - Set part of a word in the global
  *                                          input page
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be modified, in bytes.
  * @value: Value to be written.
  * @mask: Bitmask with the bits to be modified set.
@@ -314,7 +306,7 @@ void kbase_csf_firmware_global_input_mask(
  *
  * Return: Value of the word read from the global input page.
  *
- * @info: Command stream group interface provided by the firmware.
+ * @info: CSG interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_global_input_read(
@@ -325,7 +317,7 @@ u32 kbase_csf_firmware_global_input_read(
  *
  * Return: Value of the word read from the global output page.
  *
- * @iface: Command stream front-end interface provided by the firmware.
+ * @iface: CSF interface provided by the firmware.
  * @offset: Offset of the word to be read, in bytes.
  */
 u32 kbase_csf_firmware_global_output(
@@ -405,8 +397,7 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev);
  *
  * The function sends the ping request to firmware to confirm it is alive.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: 0 on success, or negative on failure.
  */
@@ -415,8 +406,7 @@ int kbase_csf_firmware_ping(struct kbase_device *kbdev);
 /**
  * kbase_csf_firmware_set_timeout - Set a hardware endpoint progress timeout.
  *
- * @kbdev:   Instance of a GPU platform device that implements a command
- *           stream front-end interface.
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
  * @timeout: The maximum number of GPU cycles that is allowed to elapse
  *           without forward progress before the driver terminates a GPU
  *           command queue group.
@@ -433,8 +423,7 @@ int kbase_csf_firmware_set_timeout(struct kbase_device *kbdev, u64 timeout);
  *                                  enter protected mode and wait for its
  *                                  completion.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_enter_protected_mode(struct kbase_device *kbdev);
 
@@ -454,16 +443,14 @@ static inline bool kbase_csf_firmware_mcu_halted(struct kbase_device *kbdev)
  *                                       into a known internal state for warm
  *                                       boot later.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_firmware_enable_mcu - Send the command to enable MCU
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 {
@@ -477,8 +464,7 @@ static inline void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
 /**
  * kbase_csf_firmware_disable_mcu - Send the command to disable MCU
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
 {
@@ -489,8 +475,7 @@ static inline void kbase_csf_firmware_disable_mcu(struct kbase_device *kbdev)
  * kbase_csf_firmware_disable_mcu_wait - Wait for the MCU to reach disabled
  *                                       status.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
 
@@ -499,8 +484,7 @@ void kbase_csf_firmware_disable_mcu_wait(struct kbase_device *kbdev);
  *                                 cold boot case firmware image would be
  *                                 reloaded from filesystem into memory.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev);
 
@@ -508,8 +492,7 @@ void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev);
  * kbase_csf_firmware_reload_completed - The reboot of MCU firmware has
  *                                       completed.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev);
 
@@ -517,10 +500,11 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev);
  * kbase_csf_firmware_global_reinit - Send the Global configuration requests
  *                                    after the reboot of MCU firmware.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @core_mask: Mask of the enabled shader cores.
  */
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev);
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask);
 
 /**
  * kbase_csf_firmware_global_reinit_complete - Check the Global configuration
@@ -529,34 +513,50 @@ void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev);
  *
  * Return: true if the Global configuration requests completed otherwise false.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev);
 
 /**
+ * kbase_csf_firmware_update_core_mask - Send the Global configuration request
+ *                                  to update the mask of enabled shader cores.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @new_core_mask: New mask of the enabled cores.
+ */
+void kbase_csf_firmware_update_core_mask(struct kbase_device *kbdev,
+					 u64 new_core_mask);
+
+/**
+ * kbase_csf_firmware_core_mask_updated - Check the Global configuration
+ *                  request has completed or not, that was sent to update
+ *                  to update the mask of enabled shader cores.
+ *
+ * Return: true if the Global configuration request to update the mask of
+ *         enabled shader cores has completed, otherwise false.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+bool kbase_csf_firmware_core_mask_updated(struct kbase_device *kbdev);
+
+/**
  * Request the global control block of CSF interface capabilities
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  *
  * @kbdev:                 Kbase device.
  * @group_data:            Pointer where to store all the group data
  *                         (sequentially).
  * @max_group_num:         The maximum number of groups to be read.
  *                         Can be 0, in which case group_data is unused.
- * @stream_data:           Pointer where to store all the stream data
+ * @stream_data:           Pointer where to store all the CS data
  *                         (sequentially).
- * @max_total_stream_num:  The maximum number of streams to be read.
+ * @max_total_stream_num:  The maximum number of CSs to be read.
  *                         Can be 0, in which case stream_data is unused.
  * @glb_version:           Where to store the global interface version.
- *                         Bits 31:16 hold the major version number and
- *                         15:0 hold the minor version number.
- *                         A higher minor version is backwards-compatible
- *                         with a lower minor version for the same major
- *                         version.
  * @features:              Where to store a bit mask of features (e.g.
  *                         whether certain types of job can be suspended).
- * @group_num:             Where to store the number of command stream groups
+ * @group_num:             Where to store the number of CSGs
  *                         supported.
  * @prfcnt_size:           Where to store the size of CSF performance counters,
  *                         in bytes. Bits 31:16 hold the size of firmware
@@ -660,4 +660,57 @@ static inline long kbase_csf_timeout_in_jiffies(const unsigned int msecs)
 #endif
 }
 
+/**
+ * kbase_csf_firmware_enable_gpu_idle_timer() - Activate the idle hysteresis
+ *                                              monitoring operation
+ *
+ * Program the firmware interface with its configured hysteresis count value
+ * and enable the firmware to act on it. The Caller is
+ * assumed to hold the kbdev->csf.scheduler.interrupt_lock.
+ *
+ * @kbdev: Kbase device structure
+ */
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_disable_gpu_idle_timer() - Disable the idle time
+ *                                             hysteresis monitoring operation
+ *
+ * Program the firmware interface to disable the idle hysteresis timer. The
+ * Caller is assumed to hold the kbdev->csf.scheduler.interrupt_lock.
+ *
+ * @kbdev: Kbase device structure
+ */
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_get_gpu_idle_hysteresis_time - Get the firmware GPU idle
+ *                                               detection hysteresis duration
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Return: the internally recorded hysteresis (nominal) value.
+ */
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_set_gpu_idle_hysteresis_time - Set the firmware GPU idle
+ *                                               detection hysteresis duration
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @dur:     The duration value (unit: milliseconds) for the configuring
+ *           hysteresis field for GPU idle detection
+ *
+ * The supplied value will be recorded internally without any change. But the
+ * actual field value will be subject to hysteresis source frequency scaling
+ * and maximum value limiting. The default source will be SYSTEM_TIMESTAMP
+ * counter. But in case the platform is not able to supply it, the GPU
+ * CYCLE_COUNTER source will be used as an alternative. Bit-31 on the
+ * returned value is the source configuration flag, and it is set to '1'
+ * when CYCLE_COUNTER alternative source is used.
+ *
+ * Return: the actual internally configured hysteresis field value.
+ */
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur);
+
 #endif
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index b77980e..1891778 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -26,19 +26,23 @@
 #include "mali_kbase_csf_timeout.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_reset_gpu.h"
+#include "mali_kbase_ctx_sched.h"
 #include "device/mali_kbase_device.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
 #include "mali_kbase_csf_scheduler.h"
 #include "mmu/mali_kbase_mmu.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
 
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
 #include <linux/mman.h>
 #include <linux/string.h>
+#include <linux/mutex.h>
 #if (KERNEL_VERSION(4, 13, 0) <= LINUX_VERSION_CODE)
 #include <linux/set_memory.h>
 #endif
+#include <asm/arch_timer.h>
 
 #ifdef CONFIG_MALI_DEBUG
 /* Makes Driver wait indefinitely for an acknowledgment for the different
@@ -56,7 +60,7 @@ MODULE_PARM_DESC(fw_debug,
 #define DUMMY_FW_PAGE_SIZE SZ_4K
 
 /**
- * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware streams
+ * struct dummy_firmware_csi - Represents a dummy interface for MCU firmware CSs
  *
  * @cs_kernel_input:  CS kernel input memory region
  * @cs_kernel_output: CS kernel output memory region
@@ -67,7 +71,7 @@ struct dummy_firmware_csi {
 };
 
 /**
- * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware stream groups
+ * struct dummy_firmware_csg - Represents a dummy interface for MCU firmware CSGs
  *
  * @csg_input:  CSG kernel input memory region
  * @csg_output: CSG kernel output memory region
@@ -95,8 +99,9 @@ struct dummy_firmware_interface {
 	struct list_head node;
 } dummy_firmware_interface;
 
-#define CSF_GLB_REQ_CFG_MASK \
-	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK)
+#define CSF_GLB_REQ_CFG_MASK                                                   \
+	(GLB_REQ_CFG_ALLOC_EN_MASK | GLB_REQ_CFG_PROGRESS_TIMER_MASK |         \
+	 GLB_REQ_CFG_PWROFF_TIMER_MASK)
 
 static inline u32 input_page_read(const u32 *const input, const u32 offset)
 {
@@ -416,6 +421,54 @@ u32 kbase_csf_firmware_global_output(
 	return val;
 }
 
+/**
+ * handle_internal_firmware_fatal - Handler for CS internal firmware fault.
+ *
+ * @kbdev:  Pointer to kbase device
+ *
+ * Report group fatal error to user space for all GPU command queue groups
+ * in the device, terminate them and reset GPU.
+ */
+static void handle_internal_firmware_fatal(struct kbase_device *const kbdev)
+{
+	int as;
+
+	for (as = 0; as < kbdev->nr_hw_address_spaces; as++) {
+		struct kbase_context *kctx;
+		struct kbase_fault fault = {
+			.status = GPU_EXCEPTION_TYPE_SW_FAULT_1,
+		};
+
+		if (as == MCU_AS_NR)
+			continue;
+
+		kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as);
+		if (!kctx)
+			continue;
+
+		kbase_csf_ctx_handle_fault(kctx, &fault);
+		kbase_ctx_sched_release_ctx_lock(kctx);
+	}
+
+	if (kbase_prepare_to_reset_gpu(kbdev))
+		kbase_reset_gpu(kbdev);
+}
+
+/**
+ * firmware_error_worker - Worker function for handling firmware internal error
+ *
+ * @data: Pointer to a work_struct embedded in kbase device.
+ *
+ * Handle the CS internal firmware error
+ */
+static void firmware_error_worker(struct work_struct *const data)
+{
+	struct kbase_device *const kbdev =
+		container_of(data, struct kbase_device, csf.fw_error_work);
+
+	handle_internal_firmware_fatal(kbdev);
+}
+
 static bool global_request_complete(struct kbase_device *const kbdev,
 				    u32 const req_mask)
 {
@@ -464,7 +517,7 @@ static void set_global_request(
 {
 	u32 glb_req;
 
-	lockdep_assert_held(&global_iface->kbdev->csf.reg_lock);
+	kbase_csf_scheduler_spin_lock_assert_held(global_iface->kbdev);
 
 	glb_req = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
 	glb_req ^= req_mask;
@@ -484,6 +537,23 @@ static void enable_endpoints_global(
 	set_global_request(global_iface, GLB_REQ_CFG_ALLOC_EN_MASK);
 }
 
+static void enable_shader_poweroff_timer(
+	const struct kbase_csf_global_iface *const global_iface)
+{
+	u32 pwroff_reg = 0;
+
+	pwroff_reg = GLB_PWROFF_TIMER_TIMEOUT_SET(
+		pwroff_reg, DEFAULT_GLB_PWROFF_TIMER_TIMEOUT /
+				    GLB_PWROFF_TIMER_TIMEOUT_SCALE);
+
+	pwroff_reg = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(
+		pwroff_reg, GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
+					pwroff_reg);
+	set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
+}
+
 static void set_timeout_global(
 	const struct kbase_csf_global_iface *const global_iface,
 	u64 const timeout)
@@ -494,13 +564,15 @@ static void set_timeout_global(
 	set_global_request(global_iface, GLB_REQ_CFG_PROGRESS_TIMER_MASK);
 }
 
-static void global_init(struct kbase_device *const kbdev, u32 req_mask)
+static void global_init(struct kbase_device *const kbdev, u64 core_mask)
 {
-	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK  |
-			GLB_ACK_IRQ_MASK_PING_MASK |
-			GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
-			GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK;
+	u32 const ack_irq_mask = GLB_ACK_IRQ_MASK_CFG_ALLOC_EN_MASK |
+				 GLB_ACK_IRQ_MASK_PING_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PROGRESS_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_ENTER_MASK |
+				 GLB_ACK_IRQ_MASK_PROTM_EXIT_MASK |
+				 GLB_ACK_IRQ_MASK_CFG_PWROFF_TIMER_MASK |
+				 GLB_ACK_IRQ_MASK_IDLE_EVENT_MASK;
 
 	const struct kbase_csf_global_iface *const global_iface =
 		&kbdev->csf.global_iface;
@@ -508,9 +580,9 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
 
-	/* Enable endpoints on all present shader cores */
-	enable_endpoints_global(global_iface,
-		kbase_pm_get_present_cores(kbdev, KBASE_PM_CORE_SHADER));
+	/* Update shader core allocation enable mask */
+	enable_endpoints_global(global_iface, core_mask);
+	enable_shader_poweroff_timer(global_iface);
 
 	set_timeout_global(global_iface, kbase_csf_timeout_get(kbdev));
 
@@ -526,8 +598,7 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
 /**
  * global_init_on_boot - Sends a global request to control various features.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Currently only the request to enable endpoints and cycle counter is sent.
  *
@@ -535,19 +606,25 @@ static void global_init(struct kbase_device *const kbdev, u32 req_mask)
  */
 static int global_init_on_boot(struct kbase_device *const kbdev)
 {
-	u32 const req_mask = CSF_GLB_REQ_CFG_MASK;
+	unsigned long flags;
+	u64 core_mask;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	core_mask = kbase_pm_ca_get_core_mask(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
-	global_init(kbdev, req_mask);
+	global_init(kbdev, core_mask);
 
-	return wait_for_global_request(kbdev, req_mask);
+	return wait_for_global_request(kbdev, CSF_GLB_REQ_CFG_MASK);
 }
 
-void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev)
+void kbase_csf_firmware_global_reinit(struct kbase_device *kbdev,
+				      u64 core_mask)
 {
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
 	kbdev->csf.glb_init_request_pending = true;
-	global_init(kbdev, CSF_GLB_REQ_CFG_MASK);
+	global_init(kbdev, core_mask);
 }
 
 bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
@@ -561,6 +638,26 @@ bool kbase_csf_firmware_global_reinit_complete(struct kbase_device *kbdev)
 	return !kbdev->csf.glb_init_request_pending;
 }
 
+void kbase_csf_firmware_update_core_mask(struct kbase_device *kbdev,
+					 u64 new_core_mask)
+{
+	unsigned long flags;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	enable_endpoints_global(&kbdev->csf.global_iface, new_core_mask);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+bool kbase_csf_firmware_core_mask_updated(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	return global_request_complete(kbdev, GLB_REQ_CFG_ALLOC_EN_MASK);
+}
+
 static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 {
 	struct kbase_device *kbdev = container_of(work, struct kbase_device,
@@ -604,6 +701,68 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+{
+#define HYSTERESIS_VAL_UNIT_SHIFT (10)
+	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
+	u64 freq = arch_timer_get_cntfrq();
+	u64 dur_val = dur_ms;
+	u32 cnt_val_u32, reg_val_u32;
+	bool src_system_timestamp = freq > 0;
+
+	if (!src_system_timestamp) {
+		/* Get the cycle_counter source alternative */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (kbdev->pm.clk_rtm.clks[0])
+			freq = kbdev->pm.clk_rtm.clks[0]->clock_val;
+		else
+			dev_warn(kbdev->dev, "No GPU clock, unexpected intregration issue!");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		dev_info(kbdev->dev, "Can't get the timestamp frequency, "
+			 "use cycle counter format with firmware idle hysteresis!");
+	}
+
+	/* Formula for dur_val = ((dur_ms/1000) * freq_HZ) >> 10) */
+	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
+	dur_val = div_u64(dur_val, 1000);
+
+	/* Interface limits the value field to S32_MAX */
+	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+
+	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
+	/* add the source flag */
+	if (src_system_timestamp)
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
+	else
+		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
+				GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+
+	return reg_val_u32;
+}
+
+u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
+{
+	return kbdev->csf.gpu_idle_hysteresis_ms;
+}
+
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+{
+	unsigned long flags;
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	kbdev->csf.gpu_idle_hysteresis_ms = dur;
+	kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
+		hysteresis_val);
+
+	return hysteresis_val;
+}
+
 int kbase_csf_firmware_init(struct kbase_device *kbdev)
 {
 	int ret;
@@ -629,9 +788,14 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
 	INIT_WORK(&kbdev->csf.firmware_reload_work,
 		  kbase_csf_firmware_reload_worker);
+	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
 
+	kbdev->csf.gpu_idle_hysteresis_ms = FIRMWARE_IDLE_HYSTERESIS_TIME_MS;
+	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(kbdev,
+						FIRMWARE_IDLE_HYSTERESIS_TIME_MS);
+
 	ret = kbase_mcu_shared_interface_region_tracker_init(kbdev);
 	if (ret != 0) {
 		dev_err(kbdev->dev, "Failed to setup the rb tree for managing shared interface segment\n");
@@ -659,6 +823,10 @@ int kbase_csf_firmware_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto error;
 
+	ret = kbase_csf_setup_dummy_user_reg_page(kbdev);
+	if (ret != 0)
+		goto error;
+
 	ret = kbase_csf_scheduler_init(kbdev);
 	if (ret != 0)
 		goto error;
@@ -680,6 +848,8 @@ error:
 
 void kbase_csf_firmware_term(struct kbase_device *kbdev)
 {
+	cancel_work_sync(&kbdev->csf.fw_error_work);
+
 	kbase_csf_timeout_term(kbdev);
 
 	/* NO_MALI: Don't stop firmware or unload MMU tables */
@@ -688,6 +858,8 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 
 	kbase_csf_scheduler_term(kbdev);
 
+	kbase_csf_free_dummy_user_reg_page(kbdev);
+
 	kbase_csf_doorbell_mapping_term(kbdev);
 
 	free_global_iface(kbdev);
@@ -721,6 +893,50 @@ void kbase_csf_firmware_term(struct kbase_device *kbdev)
 	kbase_mcu_shared_interface_region_tracker_term(kbdev);
 }
 
+void kbase_csf_firmware_enable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+	u32 glb_req;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	/* The scheduler is assumed to only call the enable when its internal
+	 * state indicates that the idle timer has previously been disabled. So
+	 * on entry the expected field values are:
+	 *   1. GLOBAL_INPUT_BLOCK.GLB_REQ.IDLE_ENABLE: 0
+	 *   2. GLOBAL_OUTPUT_BLOCK.GLB_ACK.IDLE_ENABLE: 0, or, on 1 -> 0
+	 */
+
+	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	if (glb_req & GLB_REQ_IDLE_ENABLE_MASK)
+		dev_err(kbdev->dev, "Incoherent scheduler state on REQ_IDLE_ENABLE!");
+
+	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
+					kbdev->csf.gpu_idle_dur_count);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+				GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
+		kbdev->csf.gpu_idle_dur_count);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_firmware_disable_gpu_idle_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					GLB_REQ_REQ_IDLE_DISABLE,
+					GLB_REQ_IDLE_DISABLE_MASK);
+
+	dev_dbg(kbdev->dev, "Sending request to disable gpu idle timer");
+
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
 int kbase_csf_firmware_ping(struct kbase_device *const kbdev)
 {
 	const struct kbase_csf_global_iface *const global_iface =
@@ -794,9 +1010,9 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
 }
 
 /**
- * copy_grp_and_stm - Copy command stream and/or group data
+ * copy_grp_and_stm - Copy CS and/or group data
  *
- * @iface:                Global command stream front-end interface provided by
+ * @iface:                Global CSF interface provided by
  *                        the firmware.
  * @group_data:           Pointer where to store all the group data
  *                        (sequentially).
@@ -807,7 +1023,7 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
  * @max_total_stream_num: The maximum number of streams to be read.
  *                        Can be 0, in which case stream_data is unused.
  *
- * Return: Total number of command streams, summed across all groups.
+ * Return: Total number of CSs, summed across all groups.
  */
 static u32 copy_grp_and_stm(
 	const struct kbase_csf_global_iface * const iface,
diff --git a/mali_kbase/csf/mali_kbase_csf_ioctl.h b/mali_kbase/csf/mali_kbase_csf_ioctl.h
index e9bb8d2..d7db345 100644
--- a/mali_kbase/csf/mali_kbase_csf_ioctl.h
+++ b/mali_kbase/csf/mali_kbase_csf_ioctl.h
@@ -46,8 +46,6 @@ struct kbase_ioctl_version_check {
 	__u16 minor;
 };
 
-#define KBASE_IOCTL_VERSION_CHECK \
-	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
 
 #define KBASE_IOCTL_VERSION_CHECK_RESERVED \
 	_IOWR(KBASE_IOCTL_TYPE, 0, struct kbase_ioctl_version_check)
@@ -92,7 +90,7 @@ struct kbase_ioctl_cs_queue_kick {
  * @group_handle: Handle of the group to which the queue should be bound
  * @csi_index: Index of the CSF interface the queue should be bound to
  * @padding: Currently unused, must be zero
- * @mmap_handle: Handle to be used for creating the mapping of command stream
+ * @mmap_handle: Handle to be used for creating the mapping of CS
  *               input/output pages
  *
  * @in: Input parameters
@@ -134,7 +132,7 @@ struct kbase_ioctl_cs_queue_terminate {
  * @tiler_mask:		Mask of tiler endpoints the group is allowed to use.
  * @fragment_mask:	Mask of fragment endpoints the group is allowed to use.
  * @compute_mask:	Mask of compute endpoints the group is allowed to use.
- * @cs_min:		Minimum number of command streams required.
+ * @cs_min:		Minimum number of CSs required.
  * @priority:		Queue group's priority within a process.
  * @tiler_max:		Maximum number of tiler endpoints the group is allowed
  *			to use.
@@ -293,22 +291,19 @@ struct kbase_ioctl_cs_tiler_heap_term {
  *
  * @max_group_num:        The maximum number of groups to be read. Can be 0, in
  *                        which case groups_ptr is unused.
- * @max_total_stream_num: The maximum number of streams to be read. Can be 0, in
+ * @max_total_stream_num: The maximum number of CSs to be read. Can be 0, in
  *                        which case streams_ptr is unused.
  * @groups_ptr:       Pointer where to store all the group data (sequentially).
- * @streams_ptr:      Pointer where to store all the stream data (sequentially).
- * @glb_version:      Global interface version. Bits 31:16 hold the major
- *                    version number and 15:0 hold the minor version number.
- *                    A higher minor version is backwards-compatible with a
- *                    lower minor version for the same major version.
+ * @streams_ptr:      Pointer where to store all the CS data (sequentially).
+ * @glb_version:      Global interface version.
  * @features:         Bit mask of features (e.g. whether certain types of job
  *                    can be suspended).
- * @group_num:        Number of command stream groups supported.
+ * @group_num:        Number of CSGs supported.
  * @prfcnt_size:      Size of CSF performance counters, in bytes. Bits 31:16
  *                    hold the size of firmware performance counter data
  *                    and 15:0 hold the size of hardware performance counter
  *                    data.
- * @total_stream_num: Total number of command streams, summed across all groups.
+ * @total_stream_num: Total number of CSs, summed across all groups.
  * @padding:          Will be zeroed.
  *
  * @in: Input parameters
@@ -335,6 +330,18 @@ union kbase_ioctl_cs_get_glb_iface {
 #define KBASE_IOCTL_CS_GET_GLB_IFACE \
 	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_ioctl_cs_get_glb_iface)
 
+struct kbase_ioctl_cs_cpu_queue_info {
+	__u64 buffer;
+	__u64 size;
+};
+
+#define KBASE_IOCTL_VERSION_CHECK \
+	_IOWR(KBASE_IOCTL_TYPE, 52, struct kbase_ioctl_version_check)
+
+#define KBASE_IOCTL_CS_CPU_QUEUE_DUMP \
+	_IOW(KBASE_IOCTL_TYPE, 53, struct kbase_ioctl_cs_cpu_queue_info)
+
+
 /***************
  * test ioctls *
  ***************/
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index e1263d5..b9c2597 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -228,15 +228,6 @@ static int kbase_kcpu_jit_allocate_process(
 
 	/* Now start the allocation loop */
 	for (i = 0, info = alloc_info->info; i < count; i++, info++) {
-		if (kctx->jit_alloc[info->id]) {
-			/* The JIT ID is duplicated in this command. Roll back
-			 * previous allocations and fail.
-			 */
-			dev_warn(kctx->kbdev->dev, "JIT ID is duplicated\n");
-			ret = -EINVAL;
-			goto fail;
-		}
-
 		/* Create a JIT allocation */
 		reg = kbase_jit_allocate(kctx, info, true);
 		if (!reg) {
@@ -368,6 +359,18 @@ static int kbase_kcpu_jit_allocate_prepare(
 			goto out_free;
 	}
 
+	/* Search for duplicate JIT ids */
+	for (i = 0; i < (count - 1); i++) {
+		u32 j;
+
+		for (j = (i + 1); j < count; j++) {
+			if (info[i].id == info[j].id) {
+				ret = -EINVAL;
+				goto out_free;
+			}
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_ALLOC;
 	list_add_tail(&current_command->info.jit_alloc.node,
 			&kctx->csf.kcpu_queues.jit_cmds_head);
@@ -432,38 +435,54 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
 		queue_work(kctx->csf.kcpu_queues.wq, &blocked_queue->work);
 }
 
-static int kbase_kcpu_jit_free_process(struct kbase_context *kctx,
-		struct kbase_kcpu_command *const cmd)
+static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
+				       struct kbase_kcpu_command *const cmd)
 {
-	struct kbase_kcpu_command_jit_free_info *const free_info =
-			&cmd->info.jit_free;
-	u8 *ids = free_info->ids;
-	u32 count = free_info->count;
+	struct kbase_kcpu_command_jit_free_info const *const free_info =
+		&cmd->info.jit_free;
+	u8 const *const ids = free_info->ids;
+	u32 const count = free_info->count;
 	u32 i;
+	int rc = 0;
+	struct kbase_context *kctx = queue->kctx;
 
 	if (WARN_ON(!ids))
 		return -EINVAL;
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	for (i = 0; i < count; i++, ids++) {
-		if ((*ids == 0) || (kctx->jit_alloc[*ids] == NULL)) {
+	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
+		queue->kctx->kbdev, queue);
+
+	for (i = 0; i < count; i++) {
+		u64 pages_used = 0;
+		int item_err = 0;
+
+		if (!kctx->jit_alloc[ids[i]]) {
 			dev_warn(kctx->kbdev->dev, "invalid JIT free ID\n");
+			rc = -EINVAL;
+			item_err = rc;
 		} else {
-			/* If the ID is valid but the allocation request
-			 * failed, still succeed this command but don't
-			 * try and free the allocation.
+			struct kbase_va_region *const reg = kctx->jit_alloc[ids[i]];
+
+			/*
+			 * If the ID is valid but the allocation request failed, still
+			 * succeed this command but don't try and free the allocation.
 			 */
-			if (kctx->jit_alloc[*ids] !=
-					KBASE_RESERVED_REG_JIT_ALLOC)
-				kbase_jit_free(kctx, kctx->jit_alloc[*ids]);
+			if (reg != KBASE_RESERVED_REG_JIT_ALLOC) {
+				pages_used = reg->gpu_alloc->nents;
+				kbase_jit_free(kctx, reg);
+			}
 
-			kctx->jit_alloc[*ids] = NULL;
+			kctx->jit_alloc[ids[i]] = NULL;
 		}
+
+		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(
+			queue->kctx->kbdev, queue, item_err, pages_used);
 	}
 
 	/* Free the list of ids */
-	kfree(free_info->ids);
+	kfree(ids);
 
 	/**
 	 * Remove this command from the jit_cmds_head list and retry pending
@@ -472,7 +491,7 @@ static int kbase_kcpu_jit_free_process(struct kbase_context *kctx,
 	list_del(&cmd->info.jit_free.node);
 	kbase_kcpu_jit_retry_pending_allocs(kctx);
 
-	return 0;
+	return rc;
 }
 
 static int kbase_kcpu_jit_free_prepare(
@@ -520,6 +539,18 @@ static int kbase_kcpu_jit_free_prepare(
 		}
 	}
 
+	/* Search for duplicate JIT ids */
+	for (i = 0; i < (count - 1); i++) {
+		u32 j;
+
+		for (j = (i + 1); j < count; j++) {
+			if (ids[i] == ids[j]) {
+				ret = -EINVAL;
+				goto out_free;
+			}
+		}
+	}
+
 	current_command->type = BASE_KCPU_COMMAND_TYPE_JIT_FREE;
 	list_add_tail(&current_command->info.jit_free.node,
 			&kctx->csf.kcpu_queues.jit_cmds_head);
@@ -642,13 +673,9 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	if (WARN_ON(!cqs_wait->nr_objs))
-		return -EINVAL;
-
 	if (WARN_ON(!cqs_wait->objs))
 		return -EINVAL;
 
-
 	/* Skip the CQS waits that have already been signaled when processing */
 	for (i = find_first_zero_bit(cqs_wait->signaled, cqs_wait->nr_objs); i < cqs_wait->nr_objs; i++) {
 		if (!test_bit(i, cqs_wait->signaled)) {
@@ -663,7 +690,9 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 				queue->command_started = true;
 			}
 
-			if (WARN_ON(!evt)) {
+			if (!evt) {
+				dev_warn(kbdev->dev,
+					"Sync memory %llx already freed", cqs_wait->objs[i].addr);
 				queue->has_error = true;
 				return -EINVAL;
 			}
@@ -676,7 +705,10 @@ static int kbase_kcpu_cqs_wait_process(struct kbase_device *kbdev,
 					queue->has_error = true;
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(
-					kbdev, queue);
+					kbdev, queue,
+					queue->has_error ?
+						evt[BASEP_EVENT_ERR_INDEX] :
+						0);
 				queue->command_started = false;
 			}
 
@@ -702,7 +734,10 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	if (cqs_wait_info->nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
 		return -EINVAL;
 
 	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
@@ -719,6 +754,7 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 		if (kbase_csf_event_wait_add(queue->kctx,
 				event_cqs_callback, queue)) {
 			kfree(objs);
+			queue->cqs_wait_count--;
 			return -ENOMEM;
 		}
 	}
@@ -731,8 +767,15 @@ static int kbase_kcpu_cqs_wait_prepare(struct kbase_kcpu_command_queue *queue,
 
 	current_command->info.cqs_wait.signaled = kcalloc(BITS_TO_LONGS(nr_objs),
 		sizeof(*current_command->info.cqs_wait.signaled), GFP_KERNEL);
-	if (!current_command->info.cqs_wait.signaled)
+	if (!current_command->info.cqs_wait.signaled) {
+		if (--queue->cqs_wait_count == 0) {
+			kbase_csf_event_wait_remove(queue->kctx,
+				event_cqs_callback, queue);
+		}
+
+		kfree(objs);
 		return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -745,17 +788,24 @@ static void kbase_kcpu_cqs_set_process(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&queue->kctx->csf.kcpu_queues.lock);
 
-	WARN_ON(!cqs_set->nr_objs);
-	WARN_ON(!cqs_set->objs);
+	if (WARN_ON(!cqs_set->objs))
+		return;
 
 	for (i = 0; i < cqs_set->nr_objs; i++) {
 		struct kbase_vmap_struct *mapping;
-		u32 *evt = (u32 *)kbase_phy_alloc_mapping_get(queue->kctx,
-					cqs_set->objs[i].addr, &mapping);
-		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue);
-		if (WARN_ON(!evt))
+		u32 *evt;
+
+		evt = (u32 *)kbase_phy_alloc_mapping_get(
+			queue->kctx, cqs_set->objs[i].addr, &mapping);
+
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(kbdev, queue,
+								  evt ? 0 : 1);
+
+		if (!evt) {
+			dev_warn(kbdev->dev,
+				"Sync memory %llx already freed", cqs_set->objs[i].addr);
 			queue->has_error = true;
-		else {
+		} else {
 			if (cqs_set->propagate_flags & (1 << i))
 				evt[BASEP_EVENT_ERR_INDEX] = queue->has_error;
 			else
@@ -783,7 +833,10 @@ static int kbase_kcpu_cqs_set_prepare(
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
-	if (cqs_set_info->nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+	if (nr_objs > BASEP_KCPU_CQS_MAX_NUM_OBJS)
+		return -EINVAL;
+
+	if (!nr_objs)
 		return -EINVAL;
 
 	objs = kcalloc(nr_objs, sizeof(*objs), GFP_KERNEL);
@@ -1096,7 +1149,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue,
 	const struct kbase_kcpu_command_jit_alloc_info *jit_alloc,
-	bool alloc_success)
+	int alloc_status)
 {
 	u8 i;
 
@@ -1108,8 +1161,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 		u64 gpu_alloc_addr = 0;
 		u64 mmu_flags = 0;
 
-		if (alloc_success && !WARN_ON(!reg) &&
-			!WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) {
+		if ((alloc_status == 0) && !WARN_ON(!reg) &&
+		    !WARN_ON(reg == KBASE_RESERVED_REG_JIT_ALLOC)) {
 #ifdef CONFIG_MALI_VECTOR_DUMP
 			struct tagged_addr phy = {0};
 #endif /* CONFIG_MALI_VECTOR_DUMP */
@@ -1123,7 +1176,7 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
 #endif /* CONFIG_MALI_VECTOR_DUMP */
 		}
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
-			kbdev, queue, gpu_alloc_addr, mmu_flags);
+			kbdev, queue, alloc_status, gpu_alloc_addr, mmu_flags);
 	}
 }
 
@@ -1135,30 +1188,6 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
 		kbdev, queue);
 }
 
-static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_INFO(
-	struct kbase_device *kbdev,
-	const struct kbase_kcpu_command_queue *queue,
-	const struct kbase_kcpu_command_jit_free_info *jit_free)
-{
-	u8 i;
-
-	KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-		kbdev, queue);
-	for (i = 0; i < jit_free->count; i++) {
-		const u8 id = jit_free->ids[i];
-		u64 pages_used = 0;
-
-		if (id != 0) {
-			const struct kbase_va_region *reg =
-				queue->kctx->jit_alloc[id];
-			if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC))
-				pages_used = reg->gpu_alloc->nents;
-		}
-		KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(
-			kbdev, queue, pages_used);
-	}
-}
-
 static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(
 	struct kbase_device *kbdev,
 	const struct kbase_kcpu_command_queue *queue)
@@ -1189,10 +1218,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				queue->command_started = true;
 			}
 
-#ifdef CONFIG_SYNC_FILE
 			status = 0;
-
-
+#ifdef CONFIG_SYNC_FILE
 			if (ignore_waits) {
 				kbase_kcpu_fence_wait_cancel(queue,
 					&cmd->info.fence);
@@ -1208,11 +1235,14 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 #else
 			dev_warn(kbdev->dev,
 				"unexpected fence wait command found\n");
+
+			status = -EINVAL;
+			queue->has_error = true;
 #endif
 
 			if (process_next) {
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(
-					kbdev, queue);
+					kbdev, queue, status < 0 ? status : 0);
 				queue->command_started = false;
 			}
 			break;
@@ -1220,16 +1250,24 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_START(
 				kbdev, queue);
 
+			status = 0;
+
 #ifdef CONFIG_SYNC_FILE
-			kbase_kcpu_fence_signal_process(queue,
-						&cmd->info.fence);
+			status = kbase_kcpu_fence_signal_process(
+				queue, &cmd->info.fence);
+
+			if (status < 0)
+				queue->has_error = true;
 #else
 			dev_warn(kbdev->dev,
 				"unexpected fence signal command found\n");
+
+			status = -EINVAL;
+			queue->has_error = true;
 #endif
 
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(
-				kbdev, queue);
+				kbdev, queue, status);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 			status = kbase_kcpu_cqs_wait_process(kbdev, queue,
@@ -1252,48 +1290,77 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			kbase_kcpu_cqs_set_process(kbdev, queue,
 				&cmd->info.cqs_set);
 
-			/* CQS sets are only traced before execution */
 			break;
 		case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
 			/* Clear the queue's error state */
 			queue->has_error = false;
+
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(
+				kbdev, queue);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
+		case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT: {
+			struct kbase_ctx_ext_res_meta *meta = NULL;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START(
 				kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_acquire(queue->kctx,
-						cmd->info.import.gpu_va);
+			meta = kbase_sticky_resource_acquire(
+				queue->kctx, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (meta == NULL) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to map an external resource\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(
-				kbdev, queue);
+				kbdev, queue, meta ? 0 : 1);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT:
+		}
+		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT: {
+			bool ret;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START(
 				kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_release(queue->kctx, NULL,
-						cmd->info.import.gpu_va);
+			ret = kbase_sticky_resource_release(
+				queue->kctx, NULL, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (ret == false) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to release the reference. resource not found\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(
-				kbdev, queue);
+				kbdev, queue, ret ? 0 : 1);
 			break;
-		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE:
+		}
+		case BASE_KCPU_COMMAND_TYPE_UNMAP_IMPORT_FORCE: {
+			bool ret;
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START(
 					kbdev, queue);
 
 			kbase_gpu_vm_lock(queue->kctx);
-			kbase_sticky_resource_release_force(queue->kctx, NULL,
-						cmd->info.import.gpu_va);
+			ret = kbase_sticky_resource_release_force(
+				queue->kctx, NULL, cmd->info.import.gpu_va);
 			kbase_gpu_vm_unlock(queue->kctx);
 
+			if (ret == false) {
+				queue->has_error = true;
+				dev_warn(kbdev->dev,
+						"failed to release the reference. resource not found\n");
+			}
+
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(
-					kbdev, queue);
+				kbdev, queue, ret ? 0 : 1);
 			break;
+		}
 		case BASE_KCPU_COMMAND_TYPE_JIT_ALLOC:
 		{
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START(
@@ -1307,7 +1374,8 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 					queue->has_error = true;
 
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_INFO(
-						kbdev, queue, &cmd->info.jit_alloc, (status == 0));
+					kbdev, queue, &cmd->info.jit_alloc,
+					status);
 
 				kbase_kcpu_jit_allocate_finish(queue, cmd);
 				KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(
@@ -1319,10 +1387,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_START(
 				kbdev, queue);
 
-			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_INFO(
-				kbdev, queue, &cmd->info.jit_free);
-
-			status = kbase_kcpu_jit_free_process(queue->kctx, cmd);
+			status = kbase_kcpu_jit_free_process(queue, cmd);
 			if (status)
 				queue->has_error = true;
 
@@ -1330,6 +1395,9 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				kbdev, queue);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(
+				kbdev, queue);
+
 			status = kbase_csf_queue_group_suspend_process(
 					queue->kctx,
 					cmd->info.suspend_buf_copy.sus_buf,
@@ -1337,6 +1405,9 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 			if (status)
 				queue->has_error = true;
 
+			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(
+				kbdev, queue, status);
+
 			kfree(cmd->info.suspend_buf_copy.sus_buf->pages);
 			kfree(cmd->info.suspend_buf_copy.sus_buf);
 			break;
@@ -1390,11 +1461,13 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 	case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
 	{
 		const struct base_cqs_wait *waits = cmd->info.cqs_wait.objs;
+		u32 inherit_err_flags = cmd->info.cqs_wait.inherit_err_flags;
 		unsigned int i;
 
 		for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
 			KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(
-				kbdev, queue, waits[i].addr, waits[i].val);
+				kbdev, queue, waits[i].addr, waits[i].val,
+				inherit_err_flags & ((u32)1 << i));
 		}
 		break;
 	}
@@ -1410,7 +1483,8 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER:
-		/* No implemented tracepoint */
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(kbdev,
+									queue);
 		break;
 	case BASE_KCPU_COMMAND_TYPE_MAP_IMPORT:
 		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT(
@@ -1435,11 +1509,11 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 				&cmd->info.jit_alloc.info[i];
 
 			KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
-				kbdev, queue,
-				info->gpu_alloc_addr, info->va_pages,
-				info->commit_pages, info->extent, info->id,
-				info->bin_id, info->max_allocations,
-				info->flags, info->usage_id);
+				kbdev, queue, info->gpu_alloc_addr,
+				info->va_pages, info->commit_pages,
+				info->extension, info->id, info->bin_id,
+				info->max_allocations, info->flags,
+				info->usage_id);
 		}
 		KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC(
 			kbdev, queue);
@@ -1460,7 +1534,9 @@ static void KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_COMMAND(
 		break;
 	}
 	case BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND:
-		/* No implemented tracepoint */
+		KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(
+			kbdev, queue, cmd->info.suspend_buf_copy.sus_buf,
+			cmd->info.suspend_buf_copy.group_handle);
 		break;
 	}
 }
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 45c76af..6c4c2d2 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -159,7 +159,7 @@ struct kbase_suspend_copy_buffer {
  *
  * @sus_buf:		Pointer to the structure which contains details of the
  *			user buffer and its kernel pinned pages.
- * @group_handle:	Handle to the mapping of command stream group.
+ * @group_handle:	Handle to the mapping of CSG.
  */
 struct kbase_kcpu_command_group_suspend_info {
 	struct kbase_suspend_copy_buffer *sus_buf;
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index f1a318d..4a88ffc 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -28,6 +28,7 @@
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_regs_history_debugfs.h>
 #include <csf/mali_kbase_csf_trace_buffer.h>
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 
 /* Waiting timeout for GPU reset to complete */
 #define GPU_RESET_TIMEOUT_MS (5000) /* 5 seconds */
@@ -177,6 +178,11 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 			kbase_csf_dump_firmware_trace_buffer(kbdev);
 	}
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbdev->protected_mode = false;
+	kbase_ipa_control_handle_gpu_reset_pre(kbdev);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	/* Reset the GPU */
 	err = kbase_pm_init_hw(kbdev, 0);
 
@@ -188,6 +194,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_ctx_sched_restore_all_as(kbdev);
+	kbase_ipa_control_handle_gpu_reset_post(kbdev);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 0352d63..a78088a 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -25,7 +25,6 @@
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_as_fault_debugfs.h>
-#include <mali_kbase_bits.h>
 #include "mali_kbase_csf.h"
 #include "../tl/mali_kbase_tracepoints.h"
 #include "backend/gpu/mali_kbase_pm_internal.h"
@@ -76,10 +75,10 @@
  */
 #define CSF_SCHEDULER_TIME_TOCK_JIFFIES 1 /* 1 jiffies-time */
 
-/* Command stream suspended and is idle (empty ring buffer) */
+/* CS suspended and is idle (empty ring buffer) */
 #define CS_IDLE_FLAG (1 << 0)
 
-/* Command stream suspended and is wait for a CQS condition */
+/* CS suspended and is wait for a CQS condition */
 #define CS_WAIT_SYNC_FLAG (1 << 1)
 
 /* This is to avoid the immediate power down of GPU when then are no groups
@@ -88,6 +87,14 @@
  */
 #define GPU_IDLE_POWEROFF_HYSTERESIS_DELAY msecs_to_jiffies((u32)10)
 
+/* This is a workaround before MIDHARC-3065, for avoiding some corner
+ * cases where a hang is possible for gpu-queues that happen to be on
+ * deferred commands when suspended.
+ * The MIDHARC-3065 is scheduled with header 10.x.7/11.x.4.
+ */
+
+#define USE_PRE_MIDHARC_3065_WORKAROUND (1)
+
 static int scheduler_group_schedule(struct kbase_queue_group *group);
 static void remove_group_from_idle_wait(struct kbase_queue_group *const group);
 static
@@ -176,7 +183,7 @@ static void assign_user_doorbell_to_queue(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->csf.reg_lock);
 
 	/* If bind operation for the queue hasn't completed yet, then the
-	 * the command stream interface can't be programmed for the queue
+	 * the CSI can't be programmed for the queue
 	 * (even in stopped state) and so the doorbell also can't be assigned
 	 * to it.
 	 */
@@ -225,8 +232,7 @@ static u32 get_nr_active_csgs(struct kbase_device *kbdev)
 /**
  * csgs_active - returns true if any of CSG slots are in use
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: the interface is actively engaged flag.
  */
@@ -238,7 +244,7 @@ bool csgs_active(struct kbase_device *kbdev)
 	nr_active_csgs = get_nr_active_csgs(kbdev);
 	mutex_unlock(&kbdev->csf.scheduler.lock);
 
-	/* Right now if any of the command stream group interfaces are in use
+	/* Right now if any of the CSG interfaces are in use
 	 * then we need to assume that there is some work pending.
 	 * In future when we have IDLE notifications from firmware implemented
 	 * then we would have a better idea of the pending work.
@@ -250,8 +256,7 @@ bool csgs_active(struct kbase_device *kbdev)
  * csg_slot_in_use - returns true if a queue group has been programmed on a
  *                   given CSG slot.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @slot:  Index/number of the CSG slot in question.
  *
  * Return: the interface is actively engaged flag.
@@ -314,6 +319,69 @@ static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
 	return kbdev->csf.scheduler.timer_enabled;
 }
 
+static void enable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (scheduler->gpu_idle_fw_timer_enabled)
+		return;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+
+	/* Update the timer_enabled flag requires holding interrupt_lock */
+	scheduler->gpu_idle_fw_timer_enabled = true;
+
+	/* Only send the enable when walkaround is not used, before
+	 * MIDHARC-3065 is supported. This is for avoiding some
+	 * corner cases where a hang is possible for gpu-queues that
+	 * happen to be on some deferred commands when suspended.
+	 */
+#if (!USE_PRE_MIDHARC_3065_WORKAROUND)
+	kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
+#endif
+
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
+static void disable_gpu_idle_fw_timer_locked(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+	lockdep_assert_held(&scheduler->interrupt_lock);
+
+	/* Update of the timer_enabled flag requires holding interrupt_lock */
+	if (scheduler->gpu_idle_fw_timer_enabled) {
+		scheduler->gpu_idle_fw_timer_enabled = false;
+
+		/* Disable can always be sent, even with build configured
+		 * to use USE_PRE_MIDHARC_3065_WORKAROUND. There is no adverse
+		 * side effect for disabling a not yet enabled item with the
+		 * firmware interface call - it just has no effect (i.e. NOP).
+		 */
+
+		kbase_csf_firmware_disable_gpu_idle_timer(kbdev);
+	}
+}
+
+static void disable_gpu_idle_fw_timer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+	unsigned long flags;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (!scheduler->gpu_idle_fw_timer_enabled)
+		return;
+
+	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+	disable_gpu_idle_fw_timer_locked(kbdev);
+	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+}
+
 static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
@@ -321,7 +389,7 @@ static void scheduler_wakeup(struct kbase_device *kbdev, bool kick)
 	lockdep_assert_held(&scheduler->lock);
 
 	if (scheduler->state == SCHED_SUSPENDED) {
-		dev_info(kbdev->dev, "Re-activating the Scheduler");
+		dev_dbg(kbdev->dev, "Re-activating the Scheduler");
 		kbase_csf_scheduler_pm_active(kbdev);
 		scheduler->state = SCHED_INACTIVE;
 
@@ -374,7 +442,7 @@ static void update_idle_suspended_group_state(struct kbase_queue_group *group)
 			return;
 	}
 
-	atomic_inc(&scheduler->non_idle_suspended_grps);
+	atomic_inc(&scheduler->non_idle_offslot_grps);
 }
 
 int kbase_csf_scheduler_group_get_slot_locked(struct kbase_queue_group *group)
@@ -456,6 +524,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 	struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	struct kbase_csf_cmd_stream_info *stream;
+	int csi_index = queue->csi_index;
 	long remaining =
 		kbase_csf_timeout_in_jiffies(CSF_STATE_WAIT_TIMEOUT_MS);
 
@@ -465,7 +534,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 	ginfo = &global_iface->groups[group->csg_nr];
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 
 	if (CS_REQ_STATE_GET(kbase_csf_firmware_cs_input_read(stream, CS_REQ)) ==
 			CS_REQ_STATE_START) {
@@ -476,7 +545,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 
 		if (!remaining) {
 			dev_warn(kbdev->dev, "Timed out waiting for queue to start on csi %d bound to group %d on slot %d",
-				queue->csi_index, group->handle, group->csg_nr);
+				 csi_index, group->handle, group->csg_nr);
 			if (kbase_prepare_to_reset_gpu(kbdev))
 				kbase_reset_gpu(kbdev);
 
@@ -492,7 +561,7 @@ static int halt_stream_sync(struct kbase_queue *queue)
 					 CS_REQ_STATE_MASK);
 
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_STOP_REQUESTED, group, queue, 0u);
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr);
 
 	/* Timed wait */
 	remaining = wait_event_timeout(kbdev->csf.event_wait,
@@ -536,7 +605,7 @@ static bool can_halt_stream(struct kbase_device *kbdev,
  * @queue: Pointer to the GPU queue to stop.
  *
  * This function handles stopping gpu queues for groups that are either not on
- * a command stream group slot or are on the slot but undergoing transition to
+ * a CSG slot or are on the slot but undergoing transition to
  * resume or suspend states.
  * It waits until the queue group is scheduled on a slot and starts running,
  * which is needed as groups that were suspended may need to resume all queues
@@ -628,14 +697,14 @@ retry:
 		slot = kbase_csf_scheduler_group_get_slot(group);
 
 		/* If the group is still on slot and slot is in running state
-		 * then explicitly stop the command stream interface of the
+		 * then explicitly stop the CSI of the
 		 * queue. Otherwise there are different cases to consider
 		 *
 		 * - If the queue group was already undergoing transition to
 		 *   resume/start state when this function was entered then it
-		 *   would not have disabled the command stream interface of the
+		 *   would not have disabled the CSI of the
 		 *   queue being stopped and the previous wait would have ended
-		 *   once the slot was in a running state with command stream
+		 *   once the slot was in a running state with CS
 		 *   interface still enabled.
 		 *   Now the group is going through another transition either
 		 *   to a suspend state or to a resume state (it could have
@@ -643,17 +712,17 @@ retry:
 		 *   In both scenarios need to wait again for the group to
 		 *   come on a slot and that slot to reach the running state,
 		 *   as that would guarantee that firmware will observe the
-		 *   command stream interface as disabled.
+		 *   CSI as disabled.
 		 *
 		 * - If the queue group was either off the slot or was
 		 *   undergoing transition to suspend state on entering this
 		 *   function, then the group would have been resumed with the
-		 *   queue's command stream interface in disabled state.
+		 *   queue's CSI in disabled state.
 		 *   So now if the group is undergoing another transition
 		 *   (after the resume) then just need to wait for the state
-		 *   bits in the ACK register of command stream interface to be
+		 *   bits in the ACK register of CSI to be
 		 *   set to STOP value. It is expected that firmware will
-		 *   process the stop/disable request of the command stream
+		 *   process the stop/disable request of the CS
 		 *   interface after resuming the group before it processes
 		 *   another state change request of the group.
 		 */
@@ -785,6 +854,7 @@ static void program_cs(struct kbase_device *kbdev,
 	struct kbase_queue_group *group = queue->group;
 	struct kbase_csf_cmd_stream_group_info *ginfo;
 	struct kbase_csf_cmd_stream_info *stream;
+	int csi_index = queue->csi_index;
 	u64 user_input;
 	u64 user_output;
 
@@ -798,8 +868,8 @@ static void program_cs(struct kbase_device *kbdev,
 
 	ginfo = &kbdev->csf.global_iface.groups[group->csg_nr];
 
-	if (WARN_ON(queue->csi_index < 0) ||
-	    WARN_ON(queue->csi_index >= ginfo->stream_num))
+	if (WARN_ON(csi_index < 0) ||
+	    WARN_ON(csi_index >= ginfo->stream_num))
 		return;
 
 	assign_user_doorbell_to_queue(kbdev, queue);
@@ -811,7 +881,7 @@ static void program_cs(struct kbase_device *kbdev,
 	if (queue->enabled && queue_group_suspended_locked(group))
 		program_cs_extract_init(queue);
 
-	stream = &ginfo->streams[queue->csi_index];
+	stream = &ginfo->streams[csi_index];
 
 	kbase_csf_firmware_cs_input(stream, CS_BASE_LO,
 				    queue->base_addr & 0xFFFFFFFF);
@@ -839,8 +909,8 @@ static void program_cs(struct kbase_device *kbdev,
 	kbase_csf_firmware_cs_input(stream, CS_ACK_IRQ_MASK, ~((u32)0));
 
 	/*
-	 * Enable the CSG idle notification once the stream's ringbuffer
-	 * becomes empty or the stream becomes sync_idle, waiting sync update
+	 * Enable the CSG idle notification once the CS's ringbuffer
+	 * becomes empty or the CS becomes sync_idle, waiting sync update
 	 * or protected mode switch.
 	 */
 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ,
@@ -854,7 +924,7 @@ static void program_cs(struct kbase_device *kbdev,
 
 	KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_START, group, queue, queue->enabled);
 
-	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue);
+	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, group->csg_nr);
 	update_hw_active(queue, true);
 }
 
@@ -1085,7 +1155,6 @@ static void suspend_csg_slot(struct kbase_queue_group *group)
  */
 static bool evaluate_sync_update(struct kbase_queue *queue)
 {
-	enum kbase_csf_group_state run_state;
 	struct kbase_vmap_struct *mapping;
 	bool updated = false;
 	u32 *sync_ptr;
@@ -1094,12 +1163,6 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 	if (WARN_ON(!queue))
 		return false;
 
-	run_state = queue->group->run_state;
-
-	if (WARN_ON((run_state != KBASE_CSF_GROUP_IDLE) &&
-		    (run_state != KBASE_CSF_GROUP_SUSPENDED_ON_WAIT_SYNC)))
-		return false;
-
 	lockdep_assert_held(&queue->kctx->kbdev->csf.scheduler.lock);
 
 	sync_ptr = kbase_phy_alloc_mapping_get(queue->kctx, queue->sync_ptr,
@@ -1138,7 +1201,7 @@ static bool evaluate_sync_update(struct kbase_queue *queue)
 /**
  * save_slot_cs() -  Save the state for blocked GPU command queue.
  *
- * @ginfo: Pointer to the command stream group interface used by the group
+ * @ginfo: Pointer to the CSG interface used by the group
  *         the queue is bound to.
  * @queue: Pointer to the GPU command queue.
  *
@@ -1158,8 +1221,6 @@ bool save_slot_cs(struct kbase_csf_cmd_stream_group_info const *const ginfo,
 	u32 status = kbase_csf_firmware_cs_output(stream, CS_STATUS_WAIT);
 	bool is_waiting = false;
 
-	WARN_ON(queue->group->run_state != KBASE_CSF_GROUP_IDLE);
-
 	if (CS_STATUS_WAIT_SYNC_WAIT_GET(status)) {
 		queue->status_wait = status;
 		queue->sync_ptr = kbase_csf_firmware_cs_output(stream,
@@ -1345,7 +1406,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 	scheduler->total_runnable_grps--;
 	if (!scheduler->total_runnable_grps &&
 	    scheduler->state != SCHED_SUSPENDED) {
-		dev_dbg(kctx->kbdev->dev, "Scheduler idle as no runnable groups");
+		dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
 		mod_delayed_work(system_wq, &scheduler->gpu_idle_work,
 				 GPU_IDLE_POWEROFF_HYSTERESIS_DELAY);
 	}
@@ -1396,6 +1457,45 @@ static void deschedule_idle_wait_group(struct kbase_csf_scheduler *scheduler,
 	insert_group_to_idle_wait(group);
 }
 
+static void update_offslot_non_idle_cnt_for_onslot_grp(struct kbase_queue_group *group)
+{
+	struct kbase_device *kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	WARN_ON(group->csg_nr < 0);
+
+	if (group->prepared_seq_num < scheduler->non_idle_scanout_grps)
+		atomic_dec(&scheduler->non_idle_offslot_grps);
+}
+
+static void update_offslot_non_idle_cnt_on_grp_suspend(
+				struct kbase_queue_group *group)
+{
+	struct kbase_device *kbdev = group->kctx->kbdev;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if (scheduler->state == SCHED_BUSY || scheduler->apply_async_protm) {
+		/* active phase or, async entering the protected mode */
+		if (group->prepared_seq_num >=
+		    scheduler->non_idle_scanout_grps) {
+			/* At scanout, it was tagged as on-slot idle */
+			if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
+				atomic_inc(&scheduler->non_idle_offslot_grps);
+		} else {
+			if (group->run_state != KBASE_CSF_GROUP_SUSPENDED)
+				atomic_dec(&scheduler->non_idle_offslot_grps);
+		}
+	} else {
+		/* async phases */
+		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
+			atomic_inc(&scheduler->non_idle_offslot_grps);
+	}
+}
+
 static bool confirm_cs_idle(struct kbase_queue *queue)
 {
 	u64 *input_addr = (u64 *)queue->user_io_addr;
@@ -1425,47 +1525,55 @@ static void save_csg_slot(struct kbase_queue_group *group)
 	if (!WARN_ON((state != CSG_ACK_STATE_SUSPEND) &&
 		     (state != CSG_ACK_STATE_TERMINATE))) {
 		int i;
+		bool sync_wait = false;
+		bool idle = true;
 
 #ifdef CONFIG_MALI_NO_MALI
 		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
 			update_hw_active(group->bound_queues[i], false);
 #endif
-		if (group->run_state == KBASE_CSF_GROUP_IDLE) {
-			bool sync_wait = false;
-			bool idle = true;
-
-			/* Loop through all bound CSs & save their context */
-			for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
-				struct kbase_queue *const queue =
-					group->bound_queues[i];
-
-				if (queue && queue->enabled) {
-					if (save_slot_cs(ginfo, queue))
-						sync_wait = true;
-					else if (idle)
-						idle = confirm_cs_idle(queue);
-				}
+		/* The CSG idle status on suspension will be simplified when
+		 * MIDHARC_3065 is applied. Prior to it, we will have to
+		 * evaluate per-CS at the best effort (i.e. in case the
+		 * CS is executing some deffered instructions, the host
+		 * has no way to be sure a CS is actually idle). However
+		 * it is considered that the best effort approach is able to
+		 * handle the tests from base, where the CS commands are
+		 * generally simple ones.
+		 * With the inclusion of support for MIDHARC_3064, the CSG
+		 * run-state assertion previously in this function and its
+		 * utility helpers need to be dropped. This is because the
+		 * suspension actions can be triggered with the idle_worker
+		 * thread asynchronously to the scheduler ticks.
+		 */
+		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
+			struct kbase_queue *const queue =
+				group->bound_queues[i];
+
+			if (queue && queue->enabled) {
+				if (save_slot_cs(ginfo, queue))
+					sync_wait = true;
+				else if (idle)
+					idle = confirm_cs_idle(queue);
 			}
+		}
 
-			/* Take the suspended group out of the runnable_groups
-			 * list of the context and move it to the
-			 * idle_wait_groups list.
-			 */
-			if (sync_wait && idle)
-				deschedule_idle_wait_group(scheduler, group);
-			else if (idle) {
-				group->run_state =
-					KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
-				dev_dbg(kbdev->dev, "Group-%d suspended: idle\n",
-					group->handle);
-			} else {
-				group->run_state = KBASE_CSF_GROUP_SUSPENDED;
-				atomic_inc(&scheduler->non_idle_suspended_grps);
-			}
+		/* Take the suspended group out of the runnable_groups
+		 * list of the context and move it to the
+		 * idle_wait_groups list.
+		 */
+		if (sync_wait && idle)
+			deschedule_idle_wait_group(scheduler, group);
+		else if (idle) {
+			group->run_state =
+				KBASE_CSF_GROUP_SUSPENDED_ON_IDLE;
+			dev_dbg(kbdev->dev, "Group-%d suspended: idle\n",
+				group->handle);
 		} else {
 			group->run_state = KBASE_CSF_GROUP_SUSPENDED;
-			atomic_inc(&scheduler->non_idle_suspended_grps);
 		}
+
+		update_offslot_non_idle_cnt_on_grp_suspend(group);
 	}
 }
 
@@ -1567,6 +1675,9 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
 
 	group->run_state = KBASE_CSF_GROUP_RUNNABLE;
 
+	/* Update consumes a group from scanout */
+	update_offslot_non_idle_cnt_for_onslot_grp(group);
+
 	if (csg_slot->priority == prio)
 		return;
 
@@ -1578,9 +1689,9 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
 
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
-	csg_req ^= CSG_REQ_EP_CFG;
+	csg_req ^= CSG_REQ_EP_CFG_MASK;
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
-					  CSG_REQ_EP_CFG);
+					  CSG_REQ_EP_CFG_MASK);
 	spin_unlock_irqrestore(&kbdev->csf.scheduler.interrupt_lock, flags);
 
 	csg_slot->priority = prio;
@@ -1704,16 +1815,13 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
-	csg_req ^= CSG_REQ_EP_CFG;
+	csg_req ^= CSG_REQ_EP_CFG_MASK;
 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, csg_req,
-					  CSG_REQ_EP_CFG);
+					  CSG_REQ_EP_CFG_MASK);
 
 	/* Set state to START/RESUME */
 	if (queue_group_suspended_locked(group)) {
 		state = CSG_REQ_STATE_RESUME;
-		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
-			atomic_dec(
-				&kbdev->csf.scheduler.non_idle_suspended_grps);
 	} else {
 		WARN_ON(group->run_state != KBASE_CSF_GROUP_RUNNABLE);
 		state = CSG_REQ_STATE_START;
@@ -1741,6 +1849,9 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 				(state & (CSG_REQ_STATE_MASK >> CS_REQ_STATE_SHIFT)));
 
 	kbase_csf_ring_csg_doorbell(kbdev, slot);
+
+	/* Programming a slot consumes a group from scanout */
+	update_offslot_non_idle_cnt_for_onslot_grp(group);
 }
 
 static void remove_scheduled_group(struct kbase_device *kbdev,
@@ -1760,7 +1871,8 @@ static void remove_scheduled_group(struct kbase_device *kbdev,
 	group->kctx->csf.sched.ngrp_to_schedule--;
 }
 
-static void sched_evict_group(struct kbase_queue_group *group, bool fault)
+static void sched_evict_group(struct kbase_queue_group *group, bool fault,
+			      bool update_non_idle_offslot_grps_cnt)
 {
 	struct kbase_context *kctx = group->kctx;
 	struct kbase_device *kbdev = kctx->kbdev;
@@ -1771,8 +1883,10 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault)
 	if (queue_group_scheduled_locked(group)) {
 		u32 i;
 
-		if (group->run_state == KBASE_CSF_GROUP_SUSPENDED)
-			atomic_dec(&scheduler->non_idle_suspended_grps);
+		if (update_non_idle_offslot_grps_cnt &&
+		    (group->run_state == KBASE_CSF_GROUP_SUSPENDED ||
+		     group->run_state == KBASE_CSF_GROUP_RUNNABLE))
+			atomic_dec(&scheduler->non_idle_offslot_grps);
 
 		for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) {
 			if (group->bound_queues[i])
@@ -1845,7 +1959,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
 		bool reset = kbase_reset_gpu_is_active(kbdev);
 
 		if (!kbasep_csf_scheduler_group_is_on_slot_locked(group)) {
-			sched_evict_group(group, false);
+			sched_evict_group(group, false, true);
 		} else if (reset || saved_state == SCHED_INACTIVE || force) {
 			bool as_faulty;
 
@@ -1854,7 +1968,7 @@ void kbase_csf_scheduler_group_deschedule(struct kbase_queue_group *group)
 			/* Treat the csg been terminated */
 			as_faulty = cleanup_csg_slot(group);
 			/* remove from the scheduler list */
-			sched_evict_group(group, as_faulty);
+			sched_evict_group(group, as_faulty, false);
 		}
 
 		/* waiting scheduler state to change */
@@ -1908,6 +2022,8 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 	} else if (!queue_group_scheduled_locked(group)) {
 		insert_group_to_runnable(&kbdev->csf.scheduler, group,
 			KBASE_CSF_GROUP_RUNNABLE);
+		/* A new group into the scheduler */
+		atomic_inc(&kbdev->csf.scheduler.non_idle_offslot_grps);
 	}
 
 	/* Since a group has become active now, check if GPU needs to be
@@ -1919,14 +2035,14 @@ static int scheduler_group_schedule(struct kbase_queue_group *group)
 }
 
 /**
- * set_max_csg_slots() - Set the number of available command stream group slots
+ * set_max_csg_slots() - Set the number of available CSG slots
  *
  * @kbdev: Pointer of the GPU device.
  *
- * This function would set/limit the number of command stream group slots that
- * can be used in the given tick/tock. It would be less than the total command
- * stream group slots supported by firmware if the number of GPU address space
- * slots required to utilize all the CSG slots is more than the available
+ * This function would set/limit the number of CSG slots that
+ * can be used in the given tick/tock. It would be less than the total CSG
+ * slots supported by firmware if the number of GPU address space slots
+ * required to utilize all the CSG slots is more than the available
  * address space slots.
  */
 static inline void set_max_csg_slots(struct kbase_device *kbdev)
@@ -1949,7 +2065,7 @@ static inline void set_max_csg_slots(struct kbase_device *kbdev)
  * @kctx: Pointer of the Kbase context.
  *
  * This function would update the counter that is tracking the number of GPU
- * address space slots that would be required to program the command stream
+ * address space slots that would be required to program the CS
  * group slots from the groups at the head of groups_to_schedule list.
  */
 static inline void count_active_address_space(struct kbase_device *kbdev,
@@ -2012,11 +2128,11 @@ static void update_resident_groups_priority(struct kbase_device *kbdev)
  * program_group_on_vacant_csg_slot() - Program a non-resident group on the
  *                                      given vacant CSG slot.
  * @kbdev:    Pointer to the GPU device.
- * @slot:     Vacant command stream group slot number.
+ * @slot:     Vacant CSG slot number.
  *
  * This function will program a non-resident group at the head of
- * kbase_csf_scheduler.groups_to_schedule list on the given vacant command
- * stream group slot, provided the initial position of the non-resident
+ * kbase_csf_scheduler.groups_to_schedule list on the given vacant
+ * CSG slot, provided the initial position of the non-resident
  * group in the list is less than the number of CSG slots and there is
  * an available GPU address space slot.
  * kbase_csf_scheduler.head_slot_priority would also be adjusted after
@@ -2059,11 +2175,11 @@ static void program_group_on_vacant_csg_slot(struct kbase_device *kbdev,
  *                             group and update the priority of resident groups.
  *
  * @kbdev:    Pointer to the GPU device.
- * @slot:     Vacant command stream group slot number.
+ * @slot:     Vacant CSG slot number.
  *
  * This function will first update the priority of all resident queue groups
  * that are at the head of groups_to_schedule list, preceding the first
- * non-resident group, it will then try to program the given command stream
+ * non-resident group, it will then try to program the given CS
  * group slot with the non-resident group. Finally update the priority of all
  * resident queue groups following the non-resident group.
  *
@@ -2121,12 +2237,12 @@ static bool slots_state_changed(struct kbase_device *kbdev,
  * @kbdev:    Pointer to the GPU device.
  *
  * This function will first wait for the ongoing suspension to complete on a
- * command stream group slot and will then program the vacant slot with the
+ * CSG slot and will then program the vacant slot with the
  * non-resident queue group inside the groups_to_schedule list.
  * The programming of the non-resident queue group on the vacant slot could
  * fail due to unavailability of free GPU address space slot and so the
  * programming is re-attempted after the ongoing suspension has completed
- * for all the command stream group slots.
+ * for all the CSG slots.
  * The priority of resident groups before and after the non-resident group
  * in the groups_to_schedule list would also be updated.
  * This would be repeated for all the slots undergoing suspension.
@@ -2180,7 +2296,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 					as_fault = cleanup_csg_slot(group);
 					/* If AS fault detected, evict it */
 					if (as_fault) {
-						sched_evict_group(group, true);
+						sched_evict_group(group, true, true);
 						set_bit(i, evicted_mask);
 					}
 				}
@@ -2284,15 +2400,15 @@ static void wait_csg_slots_start(struct kbase_device *kbdev)
 }
 
 /**
- * group_on_slot_is_idle() - Check if the queue group resident on a command
- *                           stream group slot is idle.
+ * group_on_slot_is_idle() - Check if the queue group resident on a CSG slot
+ *                           is idle.
  *
  * This function is called at the start of scheduling tick to check the
- * idle status of a queue group resident on a command sream group slot.
+ * idle status of a queue group resident on a CSG slot.
  * The group's idleness is determined by looping over all the bound command
  * queues and checking their respective CS_STATUS_WAIT register as well as
  * the insert and extract offsets.
-
+ *
  * This function would be simplified in future after the changes under
  * consideration with MIDHARC-3065 are introduced.
  *
@@ -2495,7 +2611,7 @@ void kbase_csf_scheduler_evict_ctx_slots(struct kbase_device *kbdev,
 		group = scheduler->csg_slots[slot].resident_group;
 		as_fault = cleanup_csg_slot(group);
 		/* remove the group from the scheduler list */
-		sched_evict_group(group, as_fault);
+		sched_evict_group(group, as_fault, false);
 		/* return the evicted group to the caller */
 		list_add_tail(&group->link, evicted_groups);
 	}
@@ -2606,17 +2722,17 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 
-	/* Firmware samples the PROTM_PEND ACK bit for command streams when
+	/* Firmware samples the PROTM_PEND ACK bit for CSs when
 	 * Host sends PROTM_ENTER global request. So if PROTM_PEND ACK bit
-	 * is set for a command stream after Host has sent the PROTM_ENTER
+	 * is set for a CS after Host has sent the PROTM_ENTER
 	 * Global request, then there is no guarantee that firmware will
 	 * notice that prior to switching to protected mode. And firmware
-	 * may not again raise the PROTM_PEND interrupt for that command
-	 * stream later on. To avoid that uncertainty PROTM_PEND ACK bit
-	 * is not set for a command stream if the request to enter protected
+	 * may not again raise the PROTM_PEND interrupt for that CS
+	 * later on. To avoid that uncertainty PROTM_PEND ACK bit
+	 * is not set for a CS if the request to enter protected
 	 * mode has already been sent. It will be set later (after the exit
 	 * from protected mode has taken place) when the group to which
-	 * command stream is bound becomes the top group.
+	 * CS is bound becomes the top group.
 	 *
 	 * The actual decision of entering protected mode is hinging on the
 	 * input group is the top priority group, or, in case the previous
@@ -2647,9 +2763,13 @@ static void scheduler_group_check_protm_enter(struct kbase_device *const kbdev,
 					 * GPUCORE-21394.
 					 */
 
+					/* Disable the idle timer */
+					disable_gpu_idle_fw_timer_locked(kbdev);
+
 					/* Switch to protected mode */
 					scheduler->active_protm_grp = input_grp;
 					KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_ENTER_PROTM, input_grp, 0u);
+
 					spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 					kbase_csf_enter_protected_mode(kbdev);
 					return;
@@ -2872,20 +2992,20 @@ static void scheduler_rotate_ctxs(struct kbase_device *kbdev)
 }
 
 /**
- * scheduler_update_idle_slots_status() - Get the status update for the command
- *                       stream group slots for which the IDLE notification was
- *                       received previously.
+ * scheduler_update_idle_slots_status() - Get the status update for the CSG
+ *                       slots for which the IDLE notification was received
+ *                        previously.
  *
- * This function sends a CSG status update request for all the command stream
- * group slots present in the bitmap scheduler->csg_slots_idle_mask and wait
- * for the request to complete.
+ * This function sends a CSG status update request for all the CSG slots
+ * present in the bitmap scheduler->csg_slots_idle_mask and wait for the
+ * request to complete.
  * The bits set in the scheduler->csg_slots_idle_mask bitmap are cleared by
  * this function.
  *
  * @kbdev:             Pointer to the GPU device.
- * @csg_bitmap:        Bitmap of the command stream group slots for which
+ * @csg_bitmap:        Bitmap of the CSG slots for which
  *                     the status update request completed successfully.
- * @failed_csg_bitmap: Bitmap of the command stream group slots for which
+ * @failed_csg_bitmap: Bitmap of the CSG slots for which
  *                     the status update request timedout.
  */
 static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
@@ -2938,23 +3058,23 @@ static void scheduler_update_idle_slots_status(struct kbase_device *kbdev,
 			bitmap_copy(failed_csg_bitmap, csg_bitmap, num_groups);
 			csg_bitmap[0] = ~csg_bitmap[0] & db_slots;
 		} else {
-                       csg_bitmap[0] = db_slots;
+			csg_bitmap[0] = db_slots;
 		}
 	}
 }
 
 /**
  * scheduler_handle_idle_slots() - Update the idle status of queue groups
- *                    resident on command stream group slots for which the
+ *                    resident on CSG slots for which the
  *                    IDLE notification was received previously.
  *
  * This function is called at the start of scheduling tick/tock to reconfirm
- * the idle status of queue groups resident on command sream group slots for
+ * the idle status of queue groups resident on CSG slots for
  * which idle notification was received previously, i.e. all the CSG slots
  * present in the bitmap scheduler->csg_slots_idle_mask.
  * The confirmation is done by sending the CSG status update request to the
  * firmware. The idleness of a CSG is determined by looping over all the
- * bound command streams and checking their respective CS_STATUS_WAIT register
+ * bound CSs and checking their respective CS_STATUS_WAIT register
  * as well as the insert and extract offset.
  * The run state of the groups resident on still idle CSG slots is changed to
  * KBASE_CSF_GROUP_IDLE and the bitmap scheduler->csg_slots_idle_mask is
@@ -3094,12 +3214,44 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev,
 	/* Check if the groups became active whilst the suspend was ongoing,
 	 * but only for the case where the system suspend is not in progress
 	 */
-	if (!is_suspend && atomic_read(&scheduler->non_idle_suspended_grps))
+	if (!is_suspend && atomic_read(&scheduler->non_idle_offslot_grps))
 		return -1;
 
 	return 0;
 }
 
+static bool scheduler_idle_suspendable(struct kbase_device *kbdev)
+{
+	bool suspendable;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	if  (scheduler->state == SCHED_SUSPENDED)
+		return false;
+
+	if (scheduler->total_runnable_grps) {
+#if USE_PRE_MIDHARC_3065_WORKAROUND
+		suspendable = false;
+#else
+		unsigned long flags;
+
+		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
+
+		/* Check both on-slots and off-slots groups idle status */
+		suspendable = kbase_csf_scheduler_all_csgs_idle(kbdev) &&
+			      !atomic_read(&scheduler->non_idle_offslot_grps);
+
+		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+#endif
+	} else {
+		/* No runnables inside the scheduler, can suspend */
+		suspendable = true;
+	}
+
+	return suspendable;
+}
+
 static void gpu_idle_worker(struct work_struct *work)
 {
 	struct kbase_device *kbdev = container_of(
@@ -3108,13 +3260,22 @@ static void gpu_idle_worker(struct work_struct *work)
 
 	mutex_lock(&scheduler->lock);
 
-	if (!scheduler->total_runnable_grps) {
-		if (scheduler->state != SCHED_SUSPENDED) {
+	/* Cycle completed, disable the firmware idle timer */
+	disable_gpu_idle_fw_timer(kbdev);
+	if (scheduler_idle_suspendable(kbdev) &&
+	    !kbase_reset_gpu_is_active(kbdev)) {
+		int ret = suspend_active_groups_on_powerdown(kbdev, false);
+
+		if (!ret) {
+			dev_dbg(kbdev->dev, "Scheduler becomes idle suspended now");
 			scheduler_suspend(kbdev);
-			dev_info(kbdev->dev, "Scheduler now suspended");
+		} else {
+			dev_dbg(kbdev->dev, "Aborting suspend scheduler (grps: %d)",
+				atomic_read(&scheduler->non_idle_offslot_grps));
+			/* Bring forward the next tick */
+			mod_delayed_work(scheduler->wq,
+						&scheduler->tick_work, 0);
 		}
-	} else {
-		dev_dbg(kbdev->dev, "Scheduler couldn't be suspended");
 	}
 
 	mutex_unlock(&scheduler->lock);
@@ -3156,7 +3317,20 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 			scheduler_ctx_scan_groups(kbdev, kctx, i);
 	}
 
+	/* Update this tick's non-idle groups */
+	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
+
+	/* Initial number of non-idle off-slot groups, before the scheduler's
+	 * scheduler_apply() operation. This gives a sensible start point view
+	 * of the tick. It will be subject to up/downs during the scheduler
+	 * active phase.
+	 */
+	atomic_set(&scheduler->non_idle_offslot_grps,
+		   scheduler->non_idle_scanout_grps);
+
+	/* Adds those idle but runnable groups to the scanout list */
 	scheduler_scan_idle_groups(kbdev);
+
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_TOP_GRP, scheduler->top_grp,
 			scheduler->num_active_address_spaces |
 			(((u64)scheduler->ngrp_to_schedule) << 32));
@@ -3181,6 +3355,31 @@ static void scheduler_wait_protm_quit(struct kbase_device *kbdev)
 		dev_warn(kbdev->dev, "Timeout, protm_quit wait skipped");
 }
 
+static void scheduler_handle_idle_timer_onoff(struct kbase_device *kbdev)
+{
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+	lockdep_assert_held(&scheduler->lock);
+
+	/* After the scheduler apply operation, the internal variable
+	 * scheduler->non_idle_offslot_grps reflects the end-point view
+	 * of the count at the end of the active phase.
+	 *
+	 * Any changes that follow (after the scheduler has dropped the
+	 * scheduler->lock), reflects async operations to the scheduler,
+	 * such as a group gets killed (evicted) or a new group inserted,
+	 * cqs wait-sync triggered state transtion etc.
+	 *
+	 * The condition for enable the idle timer is that there is no
+	 * non-idle groups off-slots. If there is non-idle group off-slot,
+	 * the timer should be disabled.
+	 */
+	if (atomic_read(&scheduler->non_idle_offslot_grps))
+		disable_gpu_idle_fw_timer(kbdev);
+	else
+		enable_gpu_idle_fw_timer(kbdev);
+}
+
 static void schedule_actions(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
@@ -3237,6 +3436,12 @@ static void schedule_actions(struct kbase_device *kbdev)
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
 		scheduler_apply(kbdev);
+
+		/* Post-apply, all the committed groups in this tick are on
+		 * slots, time to arrange the idle timer on/off decision.
+		 */
+		scheduler_handle_idle_timer_onoff(kbdev);
+
 		/* Scheduler is dropping the exec of the previous protm_grp,
 		 * Until the protm quit completes, the GPU is effectively
 		 * locked in the secure mode.
@@ -3309,7 +3514,6 @@ static void schedule_on_tick(struct work_struct *work)
 	}
 
 	scheduler->state = SCHED_BUSY;
-
 	/* Do scheduling stuff */
 	scheduler_rotate(kbdev);
 
@@ -3381,7 +3585,7 @@ int wait_csg_slots_suspend(struct kbase_device *kbdev,
 					 */
 					save_csg_slot(group);
 					if (cleanup_csg_slot(group))
-						sched_evict_group(group, true);
+						sched_evict_group(group, true, true);
 				}
 			}
 		} else {
@@ -3424,31 +3628,35 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
 	int ret;
+	int ret2;
 
 	mutex_lock(&scheduler->lock);
 
 	ret = suspend_active_queue_groups(kbdev, slot_mask);
+
 	if (ret) {
 		dev_warn(kbdev->dev, "Timed out waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
 			 kbdev->csf.global_iface.group_num, slot_mask);
 	}
 
-	if (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
-		int ret2;
-
-		/* Need to flush the GPU cache to ensure suspend buffer
-		 * contents are not lost on reset of GPU.
-		 * Do this even if suspend operation had timedout for some of
-		 * the CSG slots.
-		 */
-		kbase_gpu_start_cache_clean(kbdev);
-		ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
-				DEFAULT_RESET_TIMEOUT_MS);
-		if (ret2) {
-			dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
-			if (!ret)
-				ret = ret2;
-		}
+	/* Need to flush the GPU cache to ensure suspend buffer
+	 * contents are not lost on reset of GPU.
+	 * Do this even if suspend operation had timed out for some of
+	 * the CSG slots.
+	 * In case the scheduler already in suspended state, the
+	 * cache clean is required as the async reset request from
+	 * the debugfs may race against the scheduler suspend operation
+	 * due to the extra context ref-count, which prevents the
+	 * L2 powering down cache clean operation in the non racing
+	 * case.
+	 */
+	kbase_gpu_start_cache_clean(kbdev);
+	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
+			DEFAULT_RESET_TIMEOUT_MS);
+	if (ret2) {
+		dev_warn(kbdev->dev, "Timed out waiting for cache clean to complete before reset");
+		if (!ret)
+			ret = ret2;
 	}
 
 	mutex_unlock(&scheduler->lock);
@@ -3465,6 +3673,7 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
 	WARN_ON(csgs_active(kbdev));
 
 	/* Cancel any potential queued delayed work(s) */
+	cancel_delayed_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
 	cancel_delayed_work_sync(&scheduler->tick_work);
 	cancel_delayed_work_sync(&scheduler->tock_work);
 	cancel_delayed_work_sync(&scheduler->ping_work);
@@ -3797,8 +4006,11 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 	 * GPUCORE-24491 the on slot groups other than the top group have to
 	 * be suspended first before entering protected mode.
 	 */
-	if (scheduler_get_protm_enter_async_group(kbdev, group))
+	if (scheduler_get_protm_enter_async_group(kbdev, group)) {
+		scheduler->apply_async_protm = true;
 		schedule_actions(kbdev);
+		scheduler->apply_async_protm = false;
+	}
 
 	mutex_unlock(&scheduler->lock);
 }
@@ -3956,10 +4168,12 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 	scheduler->last_schedule = 0;
 	scheduler->tock_pending_request = false;
 	scheduler->active_protm_grp = NULL;
+	scheduler->gpu_idle_fw_timer_enabled = false;
+	scheduler->apply_async_protm = false;
 	scheduler_doorbell_init(kbdev);
 
 	INIT_DEFERRABLE_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
-	atomic_set(&scheduler->non_idle_suspended_grps, 0);
+	atomic_set(&scheduler->non_idle_offslot_grps, 0);
 
 	return 0;
 }
@@ -3967,9 +4181,12 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 {
 	if (kbdev->csf.scheduler.csg_slots) {
+		WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
 		WARN_ON(csgs_active(kbdev));
 		cancel_delayed_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
+		cancel_delayed_work_sync(&kbdev->csf.scheduler.tick_work);
+		cancel_delayed_work_sync(&kbdev->csf.scheduler.tock_work);
 		destroy_workqueue(kbdev->csf.scheduler.wq);
 		mutex_destroy(&kbdev->csf.scheduler.lock);
 		kfree(kbdev->csf.scheduler.csg_slots);
@@ -3980,8 +4197,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 /**
  * scheduler_enable_tick_timer_nolock - Enable the scheduler tick timer.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will restart the scheduler tick so that regular scheduling can
  * be resumed without any explicit trigger (like kicking of GPU queues). This
@@ -3999,7 +4215,6 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 
 	WARN_ON((scheduler->state != SCHED_INACTIVE) &&
 		(scheduler->state != SCHED_SUSPENDED));
-	WARN_ON(delayed_work_pending(&scheduler->tick_work));
 
 	if (scheduler->total_runnable_grps > 0) {
 		mod_delayed_work(scheduler->wq, &scheduler->tick_work, 0);
@@ -4077,7 +4292,7 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 
 	mutex_lock(&scheduler->lock);
 
-	WARN_ON(!kbase_pm_is_suspending(kbdev));
+	disable_gpu_idle_fw_timer(kbdev);
 
 	if (scheduler->state != SCHED_SUSPENDED) {
 		suspend_active_groups_on_powerdown(kbdev, true);
@@ -4086,6 +4301,7 @@ void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev)
 	}
 	mutex_unlock(&scheduler->lock);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_suspend);
 
 void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 {
@@ -4093,8 +4309,6 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 
 	mutex_lock(&scheduler->lock);
 
-	WARN_ON(kbase_pm_is_suspending(kbdev));
-
 	if (scheduler->total_runnable_grps > 0) {
 		WARN_ON(scheduler->state != SCHED_SUSPENDED);
 		dev_info(kbdev->dev, "Scheduler PM resume");
@@ -4102,6 +4316,7 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev)
 	}
 	mutex_unlock(&scheduler->lock);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_resume);
 
 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
 {
@@ -4118,6 +4333,7 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev)
 	else
 		WARN_ON(prev_count == U32_MAX);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_active);
 
 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
 {
@@ -4133,3 +4349,4 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
 	else
 		WARN_ON(prev_count == 0);
 }
+KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 1b1c068..24103b7 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -31,11 +31,10 @@
  *
  * @queue: Pointer to the GPU command queue to be started.
  *
- * This function would enable the start of a command stream interface, within a
- * command stream group, to which the @queue was bound.
- * If the command stream group is already scheduled and resident, the command
- * stream interface will be started right away, otherwise once the group is
- * made resident.
+ * This function would enable the start of a CSI, within a
+ * CSG, to which the @queue was bound.
+ * If the CSG is already scheduled and resident, the CSI will be started
+ * right away, otherwise once the group is made resident.
  *
  * Return: 0 on success, or negative on failure.
  */
@@ -47,8 +46,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue);
  *
  * @queue: Pointer to the GPU command queue to be stopped.
  *
- * This function would stop the command stream interface, within a command
- * stream group, to which the @queue was bound.
+ * This function would stop the CSI, within a CSG, to which @queue was bound.
  *
  * Return: 0 on success, or negative on failure.
  */
@@ -69,7 +67,7 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group);
 
 /**
  * kbase_csf_scheduler_group_get_slot() - Checks if a queue group is
- *                           programmed on a firmware Command Stream Group slot
+ *                           programmed on a firmware CSG slot
  *                           and returns the slot number.
  *
  * @group: The command queue group.
@@ -84,7 +82,7 @@ int kbase_csf_scheduler_group_get_slot(struct kbase_queue_group *group);
 
 /**
  * kbase_csf_scheduler_group_get_slot_locked() - Checks if a queue group is
- *                           programmed on a firmware Command Stream Group slot
+ *                           programmed on a firmware CSG slot
  *                           and returns the slot number.
  *
  * @group: The command queue group.
@@ -112,7 +110,7 @@ bool kbase_csf_scheduler_group_events_enabled(struct kbase_device *kbdev,
 
 /**
  * kbase_csf_scheduler_get_group_on_slot()- Gets the queue group that has been
- *                          programmed to a firmware Command Stream Group slot.
+ *                          programmed to a firmware CSG slot.
  *
  * @kbdev: The GPU device.
  * @slot:  The slot for which to get the queue group.
@@ -166,10 +164,9 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx);
 /**
  * kbase_csf_scheduler_init - Initialize the CSF scheduler
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
- * The scheduler does the arbitration for the command stream group slots
+ * The scheduler does the arbitration for the CSG slots
  * provided by the firmware between the GPU command queue groups created
  * by the Clients.
  *
@@ -190,8 +187,7 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx);
 /**
  * kbase_csf_scheduler_term - Terminate the CSF scheduler.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This should be called when unload of firmware is done on device
  * termination.
@@ -202,8 +198,7 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev);
  * kbase_csf_scheduler_reset - Reset the state of all active GPU command
  *                             queue groups.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will first iterate through all the active/scheduled GPU
  * command queue groups and suspend them (to avoid losing work for groups
@@ -223,8 +218,7 @@ void kbase_csf_scheduler_reset(struct kbase_device *kbdev);
 /**
  * kbase_csf_scheduler_enable_tick_timer - Enable the scheduler tick timer.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will restart the scheduler tick so that regular scheduling can
  * be resumed without any explicit trigger (like kicking of GPU queues).
@@ -251,8 +245,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 /**
  * kbase_csf_scheduler_lock - Acquire the global Scheduler lock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will take the global scheduler lock, in order to serialize
  * against the Scheduler actions, for access to CS IO pages.
@@ -265,8 +258,7 @@ static inline void kbase_csf_scheduler_lock(struct kbase_device *kbdev)
 /**
  * kbase_csf_scheduler_unlock - Release the global Scheduler lock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev)
 {
@@ -276,8 +268,7 @@ static inline void kbase_csf_scheduler_unlock(struct kbase_device *kbdev)
 /**
  * kbase_csf_scheduler_spin_lock - Acquire Scheduler interrupt spinlock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @flags: Pointer to the memory location that would store the previous
  *         interrupt state.
  *
@@ -293,8 +284,7 @@ static inline void kbase_csf_scheduler_spin_lock(struct kbase_device *kbdev,
 /**
  * kbase_csf_scheduler_spin_unlock - Release Scheduler interrupt spinlock.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  * @flags: Previously stored interrupt state when Scheduler interrupt
  *         spinlock was acquired.
  */
@@ -308,8 +298,7 @@ static inline void kbase_csf_scheduler_spin_unlock(struct kbase_device *kbdev,
  * kbase_csf_scheduler_spin_lock_assert_held - Assert if the Scheduler
  *                                          interrupt spinlock is held.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 static inline void
 kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
@@ -342,8 +331,7 @@ void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
  *
  * Note: This function is only effective if the scheduling timer is disabled.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_kick(struct kbase_device *kbdev);
 
@@ -367,8 +355,7 @@ static inline bool kbase_csf_scheduler_protected_mode_in_use(
  * Note: This function will increase the scheduler's internal pm_active_count
  * value, ensuring that both GPU and MCU are powered for access.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev);
 
@@ -378,16 +365,14 @@ void kbase_csf_scheduler_pm_active(struct kbase_device *kbdev);
  * Note: This function will decrease the scheduler's internal pm_active_count
  * value. On reaching 0, the MCU and GPU could be powered off.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  */
 void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
 
 /**
  * kbase_csf_scheduler_pm_resume - Reactivate the scheduler on system resume
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will make the scheduler resume the scheduling of queue groups
  * and take the power managemenet reference, if there are any runnable groups.
@@ -397,12 +382,27 @@ void kbase_csf_scheduler_pm_resume(struct kbase_device *kbdev);
 /**
  * kbase_csf_scheduler_pm_suspend - Idle the scheduler on system suspend
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * This function will make the scheduler suspend all the running queue groups
  * and drop its power managemenet reference.
  */
 void kbase_csf_scheduler_pm_suspend(struct kbase_device *kbdev);
 
+/**
+ * kbase_csf_scheduler_all_csgs_idle() - Check if the scheduler internal
+ * runtime used slots are all tagged as idle command queue groups.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * Return: true if all the used slots are tagged as idle CSGs.
+ */
+static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
+{
+	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
+	return bitmap_equal(kbdev->csf.scheduler.csg_slots_idle_mask,
+			    kbdev->csf.scheduler.csg_inuse_bitmap,
+			    kbdev->csf.global_iface.group_num);
+}
+
 #endif /* _KBASE_CSF_SCHEDULER_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index adb6f2d..a3c9826 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -513,49 +513,53 @@ int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
  * on the settings provided by userspace when the heap was created and the
  * heap's statistics (like number of render passes in-flight).
  *
- * @heap:         Pointer to the tiler heap.
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
- *                 chunk allocated for the heap.
+ * @heap:               Pointer to the tiler heap.
+ * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
+ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
+ *                      The minimum value is zero but it must be less or equal to
+ *                      the total number of render passes in flight
+ * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
+ *                      chunk allocated for the heap.
  *
  * Return: 0 if a new chunk was allocated otherwise an appropriate negative
  *         error code.
  */
 static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
-		u32 nr_in_flight, u64 *new_chunk_ptr)
+		u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
 {
 	int err = -ENOMEM;
 
 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
 
-	if (!nr_in_flight)
+	if (WARN_ON(!nr_in_flight) ||
+		WARN_ON(pending_frag_count > nr_in_flight))
 		return -EINVAL;
 
-	if ((nr_in_flight <= heap->target_in_flight) &&
-	    (heap->chunk_count < heap->max_chunks)) {
-		/* Not exceeded the target number of render passes yet so be
-		 * generous with memory.
-		 */
-		err = create_chunk(heap, false);
-
-		if (likely(!err)) {
-			struct kbase_csf_tiler_heap_chunk *new_chunk =
-							get_last_chunk(heap);
-			if (!WARN_ON(!new_chunk)) {
-				*new_chunk_ptr =
-					encode_chunk_ptr(heap->chunk_size,
-							 new_chunk->gpu_va);
-				return 0;
+	if (nr_in_flight <= heap->target_in_flight) {
+		if (heap->chunk_count < heap->max_chunks) {
+			/* Not exceeded the target number of render passes yet so be
+			 * generous with memory.
+			 */
+			err = create_chunk(heap, false);
+
+			if (likely(!err)) {
+				struct kbase_csf_tiler_heap_chunk *new_chunk =
+								get_last_chunk(heap);
+				if (!WARN_ON(!new_chunk)) {
+					*new_chunk_ptr =
+						encode_chunk_ptr(heap->chunk_size,
+								 new_chunk->gpu_va);
+					return 0;
+				}
 			}
+		} else if (pending_frag_count > 0) {
+			err = -EBUSY;
+		} else {
+			err = -ENOMEM;
 		}
-	}
-
-	/* A new chunk wasn't allocated this time, check if the allocation can
-	 * be retried later.
-	 */
-	if (nr_in_flight > 1) {
-		/* Can retry as there are some ongoing fragment
-		 * jobs which are expected to free up chunks.
+	} else {
+		/* Reached target number of render passes in flight.
+		 * Wait for some of them to finish
 		 */
 		err = -EBUSY;
 	}
@@ -564,7 +568,7 @@ static int alloc_new_chunk(struct kbase_csf_tiler_heap *heap,
 }
 
 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
-	u64 gpu_heap_va, u32 nr_in_flight, u64 *new_chunk_ptr)
+	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
 {
 	struct kbase_csf_tiler_heap *heap;
 	int err = -EINVAL;
@@ -574,7 +578,7 @@ int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
 	heap = find_tiler_heap(kctx, gpu_heap_va);
 
 	if (likely(heap)) {
-		err = alloc_new_chunk(heap, nr_in_flight,
+		err = alloc_new_chunk(heap, nr_in_flight, pending_frag_count,
 			new_chunk_ptr);
 	}
 
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
index 1a4729d..d85ac11 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.h
@@ -97,11 +97,14 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
  * It would return an appropriate error code if a new chunk couldn't be
  * allocated.
  *
- * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
- * @gpu_heap_va:  GPU virtual address of the heap context.
- * @nr_in_flight: Number of render passes that are in-flight, must not be zero.
- * @new_chunk_ptr: Where to store the GPU virtual address & size of the new
- *                 chunk allocated for the heap.
+ * @kctx:               Pointer to the kbase context in which the tiler heap was initialized.
+ * @gpu_heap_va:        GPU virtual address of the heap context.
+ * @nr_in_flight:       Number of render passes that are in-flight, must not be zero.
+ * @pending_frag_count: Number of render passes in-flight with completed vertex/tiler stage.
+ *                      The minimum value is zero but it must be less or equal to
+ *                      the total number of render passes in flight
+ * @new_chunk_ptr:      Where to store the GPU virtual address & size of the new
+ *                      chunk allocated for the heap.
  *
  * Return: 0 if a new chunk was allocated otherwise an appropriate negative
  *         error code (like -EBUSY when a free chunk is expected to be
@@ -109,5 +112,5 @@ int kbase_csf_tiler_heap_term(struct kbase_context *kctx, u64 gpu_heap_va);
  *         invalid value was passed for one of the argument).
  */
 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
-	u64 gpu_heap_va, u32 nr_in_flight, u64 *new_chunk_ptr);
+	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr);
 #endif
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c
index 495ff28..43b63bd 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.c
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.c
@@ -35,8 +35,7 @@
 /**
  * set_timeout - set a new global progress timeout.
  *
- * @kbdev:   Instance of a GPU platform device that implements a command
- *           stream front-end interface.
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
  * @timeout: the maximum number of GPU cycles without forward progress to allow
  *           to elapse before terminating a GPU command queue group.
  *
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.h b/mali_kbase/csf/mali_kbase_csf_timeout.h
index d0156c0..dc6f2f2 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.h
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,8 +28,8 @@ struct kbase_device;
 /**
  * kbase_csf_timeout_init - Initialize the progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface. Must be zero-initialized.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *         Must be zero-initialized.
  *
  * The progress timeout is the number of GPU clock cycles allowed to elapse
  * before the driver terminates a GPU command queue group in which a task is
@@ -46,8 +46,7 @@ int kbase_csf_timeout_init(struct kbase_device *kbdev);
 /**
  * kbase_csf_timeout_term - Terminate the progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Removes the sysfs file which allowed the timeout to be reconfigured.
  * Does nothing if called on a zero-initialized object.
@@ -57,8 +56,7 @@ void kbase_csf_timeout_term(struct kbase_device *kbdev);
 /**
  * kbase_csf_timeout_get - get the current global progress timeout.
  *
- * @kbdev: Instance of a GPU platform device that implements a command
- *         stream front-end interface.
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
  *
  * Return: the maximum number of GPU cycles that is allowed to elapse without
  *         forward progress before the driver terminates a GPU command queue
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index 4d68766..c39c789 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -30,6 +30,12 @@
 #include <linux/list.h>
 #include <linux/mman.h>
 
+#ifdef CONFIG_DEBUG_FS
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
+#define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
+#endif
+#endif
+
 /**
  * struct firmware_trace_buffer - Trace Buffer within the MCU firmware
  *
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.h
index e167c08..6c539b7 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_csf.h
@@ -28,8 +28,7 @@
  */
 #if KBASE_KTRACE_TARGET_RBUF
 /**
- * kbasep_ktrace_add_csf - internal function to add trace about Command Stream
- *                        Frontend
+ * kbasep_ktrace_add_csf - internal function to add trace about CSF
  * @kbdev:    kbase device
  * @code:     trace code
  * @group:    queue group, or NULL if no queue group
diff --git a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
index f265fe9..722b410 100644
--- a/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
+++ b/mali_kbase/debug/backend/mali_kbase_debug_ktrace_defs_csf.h
@@ -61,13 +61,13 @@
  *                during dumping of the message.
  * @group_handle: Handle identifying the associated queue group. Only valid
  *                when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
- * @csg_nr:       Number/index of the associated queue group's command stream
+ * @csg_nr:       Number/index of the associated queue group's CS
  *                group to which it is mapped, or negative if none associated.
  *                Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_GROUP.
  * @slot_prio:    The priority of the slot for the associated group, if it was
  *                scheduled. Hence, only valid when @csg_nr >=0 and @flags
  *                contains KBASE_KTRACE_FLAG_CSF_GROUP.
- * @csi_index:    ID of the associated queue's Command Stream HW interface.
+ * @csi_index:    ID of the associated queue's CS HW interface.
  *                Only valid when @flags contains KBASE_KTRACE_FLAG_CSF_QUEUE.
  */
 struct kbase_ktrace_backend {
diff --git a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
index b50bcee..dd0ad06 100644
--- a/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
+++ b/mali_kbase/debug/mali_kbase_debug_ktrace_codes.h
@@ -115,6 +115,7 @@ int dummy_array[] = {
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_DESIRED_TILER),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_TILER),
+	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_CHANGE_AVAILABLE_L2),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_CORES_AVAILABLE_TILER),
 	KBASE_KTRACE_CODE_MAKE_CODE(PM_DESIRED_REACHED),
diff --git a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
index 27f687f..a7310eb 100644
--- a/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
+++ b/mali_kbase/debug/mali_kbase_debug_linux_ktrace.h
@@ -86,6 +86,7 @@ DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE);
 DEFINE_MALI_ADD_EVENT(PM_CORES_AVAILABLE_TILER);
 DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE);
 DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_TILER);
+DEFINE_MALI_ADD_EVENT(PM_CORES_CHANGE_AVAILABLE_L2);
 DEFINE_MALI_ADD_EVENT(PM_GPU_ON);
 DEFINE_MALI_ADD_EVENT(PM_GPU_OFF);
 DEFINE_MALI_ADD_EVENT(PM_SET_POLICY);
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 6acd575..7cd3e4a 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -24,11 +24,11 @@
 #include "../mali_kbase_device_internal.h"
 #include "../mali_kbase_device.h"
 
-#include <mali_kbase_config_defaults.h>
 #include <mali_kbase_hwaccess_backend.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include <csf/mali_kbase_csf.h>
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
 
 #ifdef CONFIG_MALI_NO_MALI
 #include <mali_kbase_model_linux.h>
@@ -36,13 +36,12 @@
 
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
-#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 
 static void kbase_device_csf_firmware_term(struct kbase_device *kbdev)
 {
-	kbase_clk_rate_trace_manager_term(kbdev);
 	kbase_csf_firmware_term(kbdev);
 }
 
@@ -64,9 +63,6 @@ static int kbase_device_csf_firmware_init(struct kbase_device *kbdev)
 	 */
 	kbase_pm_context_idle(kbdev);
 
-	if (!err)
-		kbase_clk_rate_trace_manager_init(kbdev);
-
 	return err;
 }
 
@@ -106,6 +102,15 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 #endif /* !CONFIG_MALI_NO_MALI */
 #endif /* CONFIG_MALI_DEBUG */
 
+	kbase_ipa_control_init(kbdev);
+
+	/* Initialise the metrics subsystem, it couldn't be initialized earlier
+	 * due to dependency on kbase_ipa_control.
+	 */
+	err = kbasep_pm_metrics_init(kbdev);
+	if (err)
+		goto fail_pm_metrics_init;
+
 	/* Do the initialisation of devfreq.
 	 * Devfreq needs backend_timer_init() for completion of its
 	 * initialisation and it also needs to catch the first callback
@@ -127,7 +132,11 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	return 0;
 
 fail_update_l2_features:
+	kbase_backend_devfreq_term(kbdev);
 fail_devfreq_init:
+	kbasep_pm_metrics_term(kbdev);
+fail_pm_metrics_init:
+	kbase_ipa_control_term(kbdev);
 
 #ifdef CONFIG_MALI_DEBUG
 #ifndef CONFIG_MALI_NO_MALI
@@ -153,6 +162,8 @@ fail_reset_gpu_init:
 static void kbase_backend_late_term(struct kbase_device *kbdev)
 {
 	kbase_backend_devfreq_term(kbdev);
+	kbasep_pm_metrics_term(kbdev);
+	kbase_ipa_control_term(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
 	kbase_reset_gpu_term(kbdev);
 	kbase_hwaccess_pm_term(kbdev);
@@ -246,6 +257,7 @@ static void kbase_device_term_partial(struct kbase_device *kbdev,
 
 void kbase_device_term(struct kbase_device *kbdev)
 {
+	kbdev->csf.mali_file_inode = NULL;
 	kbase_device_term_partial(kbdev, ARRAY_SIZE(dev_init));
 	kbase_mem_halt(kbdev);
 }
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 97bcc1d..ff899be 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -108,8 +108,22 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev)))
+		if (!WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(
+			    kbdev))) {
+			struct base_gpu_queue_group_error const
+				err_payload = { .error_type =
+							BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+						.payload = {
+							.fatal_group = {
+								.status =
+									GPU_EXCEPTION_TYPE_SW_FAULT_0,
+							} } };
+
 			scheduler->active_protm_grp->faulted = true;
+			kbase_csf_add_fatal_error_to_kctx(
+				scheduler->active_protm_grp, &err_payload);
+			kbase_event_wakeup(scheduler->active_protm_grp->kctx);
+		}
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 		if (kbase_prepare_to_reset_gpu(kbdev))
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
index f7e9b12..8dde2b1 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_fault_csf.c
@@ -29,14 +29,14 @@ const char *kbase_gpu_exception_name(u32 const exception_code)
 	const char *e;
 
 	switch (exception_code) {
-	/* Command Stream exceptions */
+	/* CS exceptions */
 	case CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED:
 		e = "CS_RESOURCE_TERMINATED";
 		break;
 	case CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT:
 		e = "CS_INHERIT_FAULT";
 		break;
-	/* Command Stream fatal exceptions */
+	/* CS fatal exceptions */
 	case CS_FATAL_EXCEPTION_TYPE_CS_CONFIG_FAULT:
 		e = "CS_CONFIG_FAULT";
 		break;
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
index ff6e4ae..4a5f97f 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -27,6 +27,42 @@
 #error "Cannot be compiled with JM"
 #endif
 
+/* IPA control registers */
+
+#define IPA_CONTROL_BASE       0x40000
+#define IPA_CONTROL_REG(r)     (IPA_CONTROL_BASE+(r))
+#define COMMAND                0x000 /* (WO) Command register */
+#define STATUS                 0x004 /* (RO) Status register */
+#define TIMER                  0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO         0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI         0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO       0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI       0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO        0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI        0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO       0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI       0x02C /* (RW) Counter select for Shader cores, high word */
+
+/* Accumulated counter values for CS hardware */
+#define VALUE_CSHW_BASE        0x100
+#define VALUE_CSHW_REG_LO(n)   (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n)   (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
+
+/* Accumulated counter values for memory system */
+#define VALUE_MEMSYS_BASE      0x140
+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+#define VALUE_TILER_BASE       0x180
+#define VALUE_TILER_REG_LO(n)  (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n)  (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
+
+#define VALUE_SHADER_BASE      0x1C0
+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+
+
 #include "csf/mali_gpu_csf_control_registers.h"
 #define GPU_CONTROL_MCU_REG(r)  (GPU_CONTROL_MCU + (r))
 
@@ -68,7 +104,7 @@
 /* Normal memory, shared between MCU and Host */
 #define AS_MEMATTR_INDEX_SHARED                6
 
-/* Configuration bits for the Command Stream Frontend. */
+/* Configuration bits for the CSF. */
 #define CSF_CONFIG 0xF00
 
 /* CSF_CONFIG register */
@@ -95,7 +131,7 @@
 				 */
 
 #define PRFCNT_CSHW_EN   0x06C  /* (RW) Performance counter
-				 * enable for Command Stream Hardware
+				 * enable for CS Hardware
 				 */
 
 #define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
@@ -128,7 +164,7 @@
  */
 #define GPU_COMMAND_RESET_PAYLOAD_FAST_RESET 0x00
 
-/* This will leave the state of active command streams UNDEFINED, but will leave the external bus in a defined and
+/* This will leave the state of active CSs UNDEFINED, but will leave the external bus in a defined and
  * idle state.
  */
 #define GPU_COMMAND_RESET_PAYLOAD_SOFT_RESET 0x01
diff --git a/mali_kbase/gpu/mali_kbase_gpu_id.h b/mali_kbase/gpu/mali_kbase_gpu_id.h
index 31d5526..446b83a 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_id.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_id.h
@@ -106,7 +106,6 @@
 #define GPU_ID2_PRODUCT_LODX              GPU_ID2_MODEL_MAKE(10, 7)
 #define GPU_ID2_PRODUCT_TTUX              GPU_ID2_MODEL_MAKE(11, 2)
 #define GPU_ID2_PRODUCT_LTUX              GPU_ID2_MODEL_MAKE(11, 3)
-#define GPU_ID2_PRODUCT_TE2X              GPU_ID2_MODEL_MAKE(11, 1)
 
 /* Helper macro to create a GPU_ID assuming valid values for id, major,
    minor, status */
diff --git a/mali_kbase/ipa/Kbuild b/mali_kbase/ipa/Kbuild
index 3d9cf80..91bb48e 100644
--- a/mali_kbase/ipa/Kbuild
+++ b/mali_kbase/ipa/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2016-2018 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2016-2018, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,8 +21,16 @@
 
 mali_kbase-y += \
 	ipa/mali_kbase_ipa_simple.o \
-	ipa/mali_kbase_ipa.o \
-	ipa/mali_kbase_ipa_vinstr_g7x.o \
-	ipa/mali_kbase_ipa_vinstr_common.o
+	ipa/mali_kbase_ipa.o
 
-mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
-\ No newline at end of file
+mali_kbase-$(CONFIG_DEBUG_FS) += ipa/mali_kbase_ipa_debugfs.o
+
+ifeq ($(MALI_USE_CSF),1)
+	mali_kbase-y += \
+		ipa/backend/mali_kbase_ipa_counter_csf.o \
+		ipa/backend/mali_kbase_ipa_counter_common_csf.o
+else
+	mali_kbase-y += \
+		ipa/backend/mali_kbase_ipa_counter_jm.o \
+		ipa/backend/mali_kbase_ipa_counter_common_jm.o
+endif
+\ No newline at end of file
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
new file mode 100644
index 0000000..2d8f963
--- /dev/null
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ipa_counter_common_csf.h"
+#include "ipa/mali_kbase_ipa_debugfs.h"
+
+#define DEFAULT_SCALING_FACTOR 5
+
+/* If the value of GPU_ACTIVE is below this, use the simple model
+ * instead, to avoid extrapolating small amounts of counter data across
+ * large sample periods.
+ */
+#define DEFAULT_MIN_SAMPLE_CYCLES 10000
+
+static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
+{
+	s64 rtn;
+
+	if (a > 0 && (S64_MAX - a) < b)
+		rtn = S64_MAX;
+	else if (a < 0 && (S64_MIN - a) > b)
+		rtn = S64_MIN;
+	else
+		rtn = a + b;
+
+	return rtn;
+}
+
+static s64 kbase_ipa_group_energy(s32 coeff, u64 counter_value)
+{
+	/* Range: 0 < counter_value < 2^27 */
+	if (counter_value > U32_MAX)
+		counter_value = U32_MAX;
+
+	/* Range: -2^49 < ret < 2^49 */
+	return counter_value * (s64)coeff;
+}
+
+/**
+ * kbase_ipa_attach_ipa_control() - register with kbase_ipa_control
+ * @model_data: Pointer to counter model data
+ *
+ * Register IPA counter model as a client of kbase_ipa_control, which
+ * provides an interface to retreive the accumulated value of hardware
+ * counters to calculate energy consumption.
+ *
+ * Return: 0 on success, or an error code.
+ */
+static int
+kbase_ipa_attach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
+{
+	struct kbase_device *kbdev = model_data->kbdev;
+	struct kbase_ipa_control_perf_counter *perf_counters;
+	size_t num_counters = model_data->counters_def_num;
+	int err;
+	size_t i;
+
+	perf_counters =
+		kcalloc(num_counters, sizeof(*perf_counters), GFP_KERNEL);
+
+	if (!perf_counters) {
+		dev_err(kbdev->dev,
+			"Failed to allocate memory for perf_counters array");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_counters; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->counters_def[i];
+
+		perf_counters[i].type = counter->counter_block_type;
+		perf_counters[i].idx = counter->counter_block_offset;
+		perf_counters[i].gpu_norm = false;
+		perf_counters[i].scaling_factor = 1;
+	}
+
+	err = kbase_ipa_control_register(kbdev, perf_counters, num_counters,
+					 &model_data->ipa_control_cli);
+	if (err)
+		dev_err(kbdev->dev,
+			"Failed to register IPA with kbase_ipa_control");
+
+	kfree(perf_counters);
+	return err;
+}
+
+/**
+ * kbase_ipa_detach_ipa_control() - De-register from kbase_ipa_control.
+ * @model_data: Pointer to counter model data
+ */
+static void
+kbase_ipa_detach_ipa_control(struct kbase_ipa_counter_model_data *model_data)
+{
+	if (model_data->ipa_control_cli) {
+		kbase_ipa_control_unregister(model_data->kbdev,
+					     model_data->ipa_control_cli);
+		model_data->ipa_control_cli = NULL;
+	}
+}
+
+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
+{
+	struct kbase_ipa_counter_model_data *model_data =
+		(struct kbase_ipa_counter_model_data *)model->model_data;
+	s64 energy = 0;
+	size_t i;
+	u64 coeff = 0, coeff_mul = 0;
+	u32 active_cycles;
+	u64 ret;
+
+	/* The last argument is supposed to be a pointer to the location that
+	 * will store the time for which GPU has been in protected mode since
+	 * last query. This can be passed as NULL as counter model itself will
+	 * not be used when GPU enters protected mode, as IPA is supposed to
+	 * switch to the simple power model.
+	 */
+	ret = kbase_ipa_control_query(model->kbdev, model_data->ipa_control_cli,
+				      model_data->counter_values,
+				      model_data->counters_def_num, NULL);
+	if (WARN_ON(ret))
+		return ret;
+
+	/* Range: 0 (GPU not used at all), to the max sampling interval, say
+	 * 1s, * max GPU frequency (GPU 100% utilized).
+	 * 0 <= active_cycles <= 1 * ~2GHz
+	 * 0 <= active_cycles < 2^31
+	 */
+	active_cycles = model_data->get_active_cycles(model_data);
+
+	/* If the value of the active_cycles is less than the threshold, then
+	 * return an error so that IPA framework can approximate using the
+	 * cached simple model results instead. This may be more accurate
+	 * than extrapolating using a very small counter dump.
+	 */
+	if (active_cycles < (u32)max(model_data->min_sample_cycles, 0))
+		return -ENODATA;
+
+	/* Range: 1 <= active_cycles < 2^31 */
+	active_cycles = max(1u, active_cycles);
+
+	/* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
+	 * -2^57 < energy < 2^57
+	 */
+	for (i = 0; i < model_data->counters_def_num; i++) {
+		s32 coeff = model_data->counter_coeffs[i];
+		u64 counter_value = model_data->counter_values[i];
+		s64 group_energy = kbase_ipa_group_energy(coeff, counter_value);
+
+		energy = kbase_ipa_add_saturate(energy, group_energy);
+	}
+
+	/* Range: 0 <= coeff < 2^57 */
+	if (energy > 0)
+		coeff = energy;
+
+	/* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
+	 * can be constrained further: Counter values can only be increased by
+	 * a theoretical maximum of about 64k per clock cycle. Beyond this,
+	 * we'd have to sample every 1ms to avoid them overflowing at the
+	 * lowest clock frequency (say 100MHz). Therefore, we can write the
+	 * range of 'coeff' in terms of active_cycles:
+	 *
+	 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
+	 * coeff <= SUM(coeffN * counterN) * max_num_cores
+	 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
+	 *       (substitute max_counter = 2^16 * active_cycles)
+	 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
+	 * coeff <=    2^3         *    2^22   * 2^16 * active_cycles * 2^5
+	 * coeff <= 2^46 * active_cycles
+	 *
+	 * So after the division: 0 <= coeff <= 2^46
+	 */
+	coeff = div_u64(coeff, active_cycles);
+
+	/* Not all models were derived at the same reference voltage. Voltage
+	 * scaling is done by multiplying by V^2, so we need to *divide* by
+	 * Vref^2 here.
+	 * Range: 0 <= coeff <= 2^49
+	 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+	/* Range: 0 <= coeff <= 2^52 */
+	coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
+
+	/* Scale by user-specified integer factor.
+	 * Range: 0 <= coeff_mul < 2^57
+	 */
+	coeff_mul = coeff * model_data->scaling_factor;
+
+	/* The power models have results with units
+	 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
+	 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
+	 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
+	 * by 1000.
+	 * Range: 0 <= coeff_mul < 2^47
+	 */
+	coeff_mul = div_u64(coeff_mul, 1000u);
+
+	/* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
+	*coeffp = clamp(coeff_mul, (u64)0, (u64)1 << 16);
+	return 0;
+}
+
+int kbase_ipa_counter_common_model_init(
+	struct kbase_ipa_model *model,
+	const struct kbase_ipa_counter *ipa_counters_def,
+	size_t ipa_num_counters,
+	kbase_ipa_get_active_cycles_callback get_active_cycles,
+	s32 reference_voltage)
+{
+	int err = 0;
+	size_t i;
+	struct kbase_ipa_counter_model_data *model_data;
+
+	if (!model || !ipa_counters_def || !ipa_num_counters ||
+	    !get_active_cycles)
+		return -EINVAL;
+
+	model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
+	if (!model_data)
+		return -ENOMEM;
+
+	model_data->kbdev = model->kbdev;
+	model_data->counters_def = ipa_counters_def;
+	model_data->counters_def_num = ipa_num_counters;
+	model_data->get_active_cycles = get_active_cycles;
+
+	model->model_data = (void *)model_data;
+
+	for (i = 0; i < model_data->counters_def_num; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->counters_def[i];
+
+		model_data->counter_coeffs[i] = counter->coeff_default_value;
+		err = kbase_ipa_model_add_param_s32(
+			model, counter->name, &model_data->counter_coeffs[i], 1,
+			false);
+		if (err)
+			goto exit;
+	}
+
+	model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
+	err = kbase_ipa_model_add_param_s32(
+		model, "scale", &model_data->scaling_factor, 1, false);
+	if (err)
+		goto exit;
+
+	model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
+	err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
+					    &model_data->min_sample_cycles, 1,
+					    false);
+	if (err)
+		goto exit;
+
+	model_data->reference_voltage = reference_voltage;
+	err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
+					    &model_data->reference_voltage, 1,
+					    false);
+	if (err)
+		goto exit;
+
+	err = kbase_ipa_attach_ipa_control(model_data);
+
+exit:
+	if (err) {
+		kbase_ipa_model_param_free_all(model);
+		kfree(model_data);
+	}
+	return err;
+}
+
+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model)
+{
+	struct kbase_ipa_counter_model_data *model_data =
+		(struct kbase_ipa_counter_model_data *)model->model_data;
+
+	kbase_ipa_detach_ipa_control(model_data);
+	kfree(model_data);
+}
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.h
new file mode 100644
index 0000000..064c101
--- /dev/null
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_csf.h
@@ -0,0 +1,145 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_IPA_COUNTER_COMMON_CSF_H_
+#define _KBASE_IPA_COUNTER_COMMON_CSF_H_
+
+#include "mali_kbase.h"
+#include "csf/ipa_control/mali_kbase_csf_ipa_control.h"
+
+/* Maximum number of HW counters used by the IPA counter model. */
+#define KBASE_IPA_MAX_COUNTER_DEF_NUM 16
+
+struct kbase_ipa_counter_model_data;
+
+typedef u32 (*kbase_ipa_get_active_cycles_callback)(
+	struct kbase_ipa_counter_model_data *);
+
+/**
+ * struct kbase_ipa_counter_model_data - IPA counter model context per device
+ * @kbdev:               Pointer to kbase device
+ * @ipa_control_cli:     Handle returned on registering IPA counter model as a
+ *                       client of kbase_ipa_control.
+ * @counters_def:        Array of description of HW counters used by the IPA
+ *                       counter model.
+ * @counters_def_num:    Number of elements in the array of HW counters.
+ * @get_active_cycles:   Callback to return number of active cycles during
+ *                       counter sample period.
+ * @counter_coeffs:      Buffer to store coefficient value used for HW counters
+ * @counter_values:      Buffer to store the accumulated value of HW counters
+ *                       retreived from kbase_ipa_control.
+ * @reference_voltage:   voltage, in mV, of the operating point used when
+ *                       deriving the power model coefficients. Range approx
+ *                       0.1V - 5V (~= 8V): 2^7 <= reference_voltage <= 2^13
+ * @scaling_factor:      User-specified power scaling factor. This is an
+ *                       integer, which is multiplied by the power coefficient
+ *                       just before OPP scaling.
+ *                       Range approx 0-32: 0 < scaling_factor < 2^5
+ * @min_sample_cycles:   If the value of the GPU_ACTIVE counter (the number of
+ *                       cycles the GPU was working) is less than
+ *                       min_sample_cycles, the counter model will return an
+ *                       error, causing the IPA framework to approximate using
+ *                       the cached simple model results instead. This may be
+ *                       more accurate than extrapolating using a very small
+ *                       counter dump.
+ */
+struct kbase_ipa_counter_model_data {
+	struct kbase_device *kbdev;
+	void *ipa_control_cli;
+	const struct kbase_ipa_counter *counters_def;
+	size_t counters_def_num;
+	kbase_ipa_get_active_cycles_callback get_active_cycles;
+	s32 counter_coeffs[KBASE_IPA_MAX_COUNTER_DEF_NUM];
+	u64 counter_values[KBASE_IPA_MAX_COUNTER_DEF_NUM];
+	s32 reference_voltage;
+	s32 scaling_factor;
+	s32 min_sample_cycles;
+};
+
+/**
+ * struct kbase_ipa_counter - represents a single HW counter used by IPA model
+ * @name:                 Name of the HW counter used by IPA counter model
+ *                        for energy estimation.
+ * @coeff_default_value:  Default value of coefficient for the counter.
+ *                        Coefficients are interpreted as fractions where the
+ *                        denominator is 1000000.
+ * @counter_block_offset: Index to the counter within the counter block of
+ *                        type @counter_block_type.
+ * @counter_block_type:   Type of the counter block.
+ */
+struct kbase_ipa_counter {
+	const char *name;
+	s32 coeff_default_value;
+	u32 counter_block_offset;
+	enum kbase_ipa_core_type counter_block_type;
+};
+
+/**
+ * kbase_ipa_counter_dynamic_coeff() - calculate dynamic power based on HW counters
+ * @model:		pointer to instantiated model
+ * @coeffp:		pointer to location where calculated power, in
+ *			pW/(Hz V^2), is stored.
+ *
+ * This is a GPU-agnostic implementation of the get_dynamic_coeff()
+ * function of an IPA model. It relies on the model being populated
+ * with GPU-specific attributes at initialization time.
+ *
+ * Return: 0 on success, or an error code.
+ */
+int kbase_ipa_counter_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp);
+
+/**
+ * kbase_ipa_counter_common_model_init() - initialize ipa power model
+ * @model:		ipa power model to initialize
+ * @ipa_counters_def:	Array corresponding to the HW counters used in the
+ *                      IPA counter model, contains the counter index, default
+ *                      value of the coefficient.
+ * @ipa_num_counters:   number of elements in the array @ipa_counters_def
+ * @get_active_cycles:  callback to return the number of cycles the GPU was
+ *			active during the counter sample period.
+ * @reference_voltage:  voltage, in mV, of the operating point used when
+ *                      deriving the power model coefficients.
+ *
+ * This initialization function performs initialization steps common
+ * for ipa models based on counter values. In each call, the model
+ * passes its specific coefficient values per ipa counter group via
+ * @ipa_counters_def array.
+ *
+ * Return: 0 on success, error code otherwise
+ */
+int kbase_ipa_counter_common_model_init(
+	struct kbase_ipa_model *model,
+	const struct kbase_ipa_counter *ipa_counters_def,
+	size_t ipa_num_counters,
+	kbase_ipa_get_active_cycles_callback get_active_cycles,
+	s32 reference_voltage);
+
+/**
+ * kbase_ipa_counter_common_model_term() - terminate ipa power model
+ * @model: ipa power model to terminate
+ *
+ * This function performs all necessary steps to terminate ipa power model
+ * including clean up of resources allocated to hold model data.
+ */
+void kbase_ipa_counter_common_model_term(struct kbase_ipa_model *model);
+
+#endif /* _KBASE_IPA_COUNTER_COMMON_CSF_H_ */
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
index 702db16..ef1e526 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.c
@@ -20,8 +20,8 @@
  *
  */
 
-#include "mali_kbase_ipa_vinstr_common.h"
-#include "mali_kbase_ipa_debugfs.h"
+#include "mali_kbase_ipa_counter_common_jm.h"
+#include "ipa/mali_kbase_ipa_debugfs.h"
 
 #define DEFAULT_SCALING_FACTOR 5
 
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
index 46e3cd4..02e735f 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_common.h
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_common_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017-2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,8 +20,8 @@
  *
  */
 
-#ifndef _KBASE_IPA_VINSTR_COMMON_H_
-#define _KBASE_IPA_VINSTR_COMMON_H_
+#ifndef _KBASE_IPA_COUNTER_COMMON_JM_H_
+#define _KBASE_IPA_COUNTER_COMMON_JM_H_
 
 #include "mali_kbase.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
@@ -214,4 +214,4 @@ int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
  */
 void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model);
 
-#endif /* _KBASE_IPA_VINSTR_COMMON_H_ */
+#endif /* _KBASE_IPA_COUNTER_COMMON_JM_H_ */
diff --git a/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
new file mode 100644
index 0000000..8cd47a1
--- /dev/null
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_csf.c
@@ -0,0 +1,171 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ipa_counter_common_csf.h"
+#include "mali_kbase.h"
+
+/* CSHW counter block offsets */
+#define GPU_ACTIVE (4)
+
+/* MEMSYS counter block offsets */
+#define MEMSYS_L2_ANY_LOOKUP (25)
+
+/* SC counter block offsets */
+#define SC_EXEC_INSTR_FMA          (27)
+#define SC_EXEC_INSTR_MSG          (30)
+#define SC_TEX_FILT_NUM_OPERATIONS (39)
+
+/**
+ * get_active_cycles() - return the GPU_ACTIVE counter
+ * @model_data:          Pointer to GPU model data.
+ *
+ * Return: the number of cycles the GPU was active during the counter sampling
+ * period.
+ */
+static u32 kbase_csf_get_active_cycles(
+	struct kbase_ipa_counter_model_data *model_data)
+{
+	size_t i;
+
+	for (i = 0; i < model_data->counters_def_num; ++i) {
+		const struct kbase_ipa_counter *counter =
+			&model_data->counters_def[i];
+
+		if (!strcmp(counter->name, "gpu_active"))
+			return model_data->counter_values[i];
+	}
+
+	WARN_ON_ONCE(1);
+
+	return 0;
+}
+
+/** Table of description of HW counters used by IPA counter model.
+ *
+ * This table provides a description of each performance counter
+ * used by the IPA counter model for energy estimation.
+ */
+static const struct kbase_ipa_counter ipa_counters_def_todx[] = {
+	{
+		.name = "l2_access",
+		.coeff_default_value = 599800,
+		.counter_block_offset = MEMSYS_L2_ANY_LOOKUP,
+		.counter_block_type = KBASE_IPA_CORE_TYPE_MEMSYS,
+	},
+	{
+		.name = "exec_instr_msg",
+		.coeff_default_value = 1830200,
+		.counter_block_offset = SC_EXEC_INSTR_MSG,
+		.counter_block_type = KBASE_IPA_CORE_TYPE_SHADER,
+	},
+	{
+		.name = "exec_instr_fma",
+		.coeff_default_value = 407300,
+		.counter_block_offset = SC_EXEC_INSTR_FMA,
+		.counter_block_type = KBASE_IPA_CORE_TYPE_SHADER,
+	},
+	{
+		.name = "tex_filt_num_operations",
+		.coeff_default_value = 224500,
+		.counter_block_offset = SC_TEX_FILT_NUM_OPERATIONS,
+		.counter_block_type = KBASE_IPA_CORE_TYPE_SHADER,
+	},
+	{
+		.name = "gpu_active",
+		.coeff_default_value = 153800,
+		.counter_block_offset = GPU_ACTIVE,
+		.counter_block_type = KBASE_IPA_CORE_TYPE_CSHW,
+	},
+};
+
+#define IPA_POWER_MODEL_OPS(gpu, init_token) \
+	const struct kbase_ipa_model_ops kbase_ ## gpu ## _ipa_model_ops = { \
+		.name = "mali-" #gpu "-power-model", \
+		.init = kbase_ ## init_token ## _power_model_init, \
+		.term = kbase_ipa_counter_common_model_term, \
+		.get_dynamic_coeff = kbase_ipa_counter_dynamic_coeff, \
+	}; \
+	KBASE_EXPORT_TEST_API(kbase_ ## gpu ## _ipa_model_ops)
+
+#define STANDARD_POWER_MODEL(gpu, reference_voltage) \
+	static int kbase_ ## gpu ## _power_model_init(\
+			struct kbase_ipa_model *model) \
+	{ \
+		BUILD_BUG_ON(ARRAY_SIZE(ipa_counters_def_ ## gpu) > \
+				KBASE_IPA_MAX_COUNTER_DEF_NUM); \
+		return kbase_ipa_counter_common_model_init(model, \
+				ipa_counters_def_ ## gpu, \
+				ARRAY_SIZE(ipa_counters_def_ ## gpu), \
+				kbase_csf_get_active_cycles, \
+				(reference_voltage)); \
+	} \
+	IPA_POWER_MODEL_OPS(gpu, gpu)
+
+
+#define ALIAS_POWER_MODEL(gpu, as_gpu) \
+	IPA_POWER_MODEL_OPS(gpu, as_gpu)
+
+/* Currently tBEx energy model is being used, for which reference voltage
+ * value is 1000 mV.
+ */
+STANDARD_POWER_MODEL(todx, 1000);
+
+/* Assuming LODX is an alias of TODX for IPA */
+ALIAS_POWER_MODEL(lodx, todx);
+
+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
+	&kbase_todx_ipa_model_ops,
+	&kbase_lodx_ipa_model_ops
+};
+
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+		struct kbase_device *kbdev, const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) {
+		const struct kbase_ipa_model_ops *ops =
+			ipa_counter_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+	case GPU_ID2_PRODUCT_TODX:
+		return "mali-todx-power-model";
+	case GPU_ID2_PRODUCT_LODX:
+		return "mali-lodx-power-model";
+	default:
+		return NULL;
+	}
+}
+\ No newline at end of file
diff --git a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
index 83174eb..6799d8e 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_vinstr_g7x.c
+++ b/mali_kbase/ipa/backend/mali_kbase_ipa_counter_jm.c
@@ -21,7 +21,7 @@
  */
 #include <linux/thermal.h>
 
-#include "mali_kbase_ipa_vinstr_common.h"
+#include "mali_kbase_ipa_counter_common_jm.h"
 #include "mali_kbase.h"
 
 
@@ -488,3 +488,68 @@ STANDARD_POWER_MODEL(tbax, 1000);
 ALIAS_POWER_MODEL(g52, g76);
 /* tnax is an alias of g77 (TTRX) for IPA */
 ALIAS_POWER_MODEL(tnax, g77);
+
+static const struct kbase_ipa_model_ops *ipa_counter_model_ops[] = {
+	&kbase_g71_ipa_model_ops,
+	&kbase_g72_ipa_model_ops,
+	&kbase_g76_ipa_model_ops,
+	&kbase_g52_ipa_model_ops,
+	&kbase_g52_r1_ipa_model_ops,
+	&kbase_g51_ipa_model_ops,
+	&kbase_g77_ipa_model_ops,
+	&kbase_tnax_ipa_model_ops,
+	&kbase_tbex_ipa_model_ops,
+	&kbase_tbax_ipa_model_ops
+};
+
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+		struct kbase_device *kbdev, const char *name)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ipa_counter_model_ops); ++i) {
+		const struct kbase_ipa_model_ops *ops =
+			ipa_counter_model_ops[i];
+
+		if (!strcmp(ops->name, name))
+			return ops;
+	}
+
+	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
+
+	return NULL;
+}
+
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id)
+{
+	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
+			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
+
+	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
+	case GPU_ID2_PRODUCT_TMIX:
+		return "mali-g71-power-model";
+	case GPU_ID2_PRODUCT_THEX:
+		return "mali-g72-power-model";
+	case GPU_ID2_PRODUCT_TNOX:
+		return "mali-g76-power-model";
+	case GPU_ID2_PRODUCT_TSIX:
+		return "mali-g51-power-model";
+	case GPU_ID2_PRODUCT_TGOX:
+		if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
+				(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
+			/* g52 aliased to g76 power-model's ops */
+			return "mali-g52-power-model";
+		else
+			return "mali-g52_r1-power-model";
+	case GPU_ID2_PRODUCT_TNAX:
+		return "mali-tnax-power-model";
+	case GPU_ID2_PRODUCT_TTRX:
+		return "mali-g77-power-model";
+	case GPU_ID2_PRODUCT_TBEX:
+		return "mali-tbex-power-model";
+	case GPU_ID2_PRODUCT_TBAX:
+		return "mali-tbax-power-model";
+	default:
+		return NULL;
+	}
+}
+\ No newline at end of file
diff --git a/mali_kbase/ipa/mali_kbase_ipa.c b/mali_kbase/ipa/mali_kbase_ipa.c
index 1ee7376..5946e4a 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.c
+++ b/mali_kbase/ipa/mali_kbase_ipa.c
@@ -39,20 +39,6 @@
 
 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
 
-static const struct kbase_ipa_model_ops *kbase_ipa_all_model_ops[] = {
-	&kbase_simple_ipa_model_ops,
-	&kbase_g71_ipa_model_ops,
-	&kbase_g72_ipa_model_ops,
-	&kbase_g76_ipa_model_ops,
-	&kbase_g52_ipa_model_ops,
-	&kbase_g52_r1_ipa_model_ops,
-	&kbase_g51_ipa_model_ops,
-	&kbase_g77_ipa_model_ops,
-	&kbase_tnax_ipa_model_ops,
-	&kbase_tbex_ipa_model_ops,
-	&kbase_tbax_ipa_model_ops
-};
-
 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
 {
 	int err = 0;
@@ -72,55 +58,24 @@ int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
 }
 
 const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
-							    const char *name)
+							   const char *name)
 {
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(kbase_ipa_all_model_ops); ++i) {
-		const struct kbase_ipa_model_ops *ops = kbase_ipa_all_model_ops[i];
-
-		if (!strcmp(ops->name, name))
-			return ops;
-	}
+	if (!strcmp(name, kbase_simple_ipa_model_ops.name))
+		return &kbase_simple_ipa_model_ops;
 
-	dev_err(kbdev->dev, "power model \'%s\' not found\n", name);
-
-	return NULL;
+	return kbase_ipa_counter_model_ops_find(kbdev, name);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
 
 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
 {
-	const u32 prod_id = (gpu_id & GPU_ID_VERSION_PRODUCT_ID) >>
-			GPU_ID_VERSION_PRODUCT_ID_SHIFT;
-
-	switch (GPU_ID2_MODEL_MATCH_VALUE(prod_id)) {
-	case GPU_ID2_PRODUCT_TMIX:
-		return "mali-g71-power-model";
-	case GPU_ID2_PRODUCT_THEX:
-		return "mali-g72-power-model";
-	case GPU_ID2_PRODUCT_TNOX:
-		return "mali-g76-power-model";
-	case GPU_ID2_PRODUCT_TSIX:
-		return "mali-g51-power-model";
-	case GPU_ID2_PRODUCT_TGOX:
-		if ((gpu_id & GPU_ID2_VERSION_MAJOR) ==
-				(0 << GPU_ID2_VERSION_MAJOR_SHIFT))
-			/* g52 aliased to g76 power-model's ops */
-			return "mali-g52-power-model";
-		else
-			return "mali-g52_r1-power-model";
-	case GPU_ID2_PRODUCT_TNAX:
-		return "mali-tnax-power-model";
-	case GPU_ID2_PRODUCT_TTRX:
-		return "mali-g77-power-model";
-	case GPU_ID2_PRODUCT_TBEX:
-		return "mali-tbex-power-model";
-	case GPU_ID2_PRODUCT_TBAX:
-		return "mali-tbax-power-model";
-	default:
+	const char* model_name =
+		kbase_ipa_counter_model_name_from_id(gpu_id);
+
+	if (!model_name)
 		return KBASE_IPA_FALLBACK_MODEL_NAME;
-	}
+	else
+		return model_name;
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
 
diff --git a/mali_kbase/ipa/mali_kbase_ipa.h b/mali_kbase/ipa/mali_kbase_ipa.h
index 10565cc..5f04169 100644
--- a/mali_kbase/ipa/mali_kbase_ipa.h
+++ b/mali_kbase/ipa/mali_kbase_ipa.h
@@ -164,6 +164,17 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *
 							   const char *name);
 
 /**
+ * kbase_ipa_counter_model_ops_find - Lookup an IPA counter model using its name
+ * @kbdev:      pointer to kbase device
+ * @name:       name of counter model to lookup
+ *
+ * Return: Pointer to counter model's 'ops' structure, or NULL if the lookup
+ *         failed.
+ */
+const struct kbase_ipa_model_ops *kbase_ipa_counter_model_ops_find(
+	struct kbase_device *kbdev, const char *name);
+
+/**
  * kbase_ipa_model_name_from_id - Find the best model for a given GPU ID
  * @gpu_id:     GPU ID of GPU the model will be used for
  *
@@ -173,6 +184,16 @@ const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *
 const char *kbase_ipa_model_name_from_id(u32 gpu_id);
 
 /**
+ * kbase_ipa_counter_model_name_from_id - Find the best counter model for a
+ *                                        given GPU ID
+ * @gpu_id:     GPU ID of GPU the counter model will be used for
+ *
+ * Return: The name of the appropriate counter-based model, or NULL if the
+ *         no counter model exists.
+ */
+const char *kbase_ipa_counter_model_name_from_id(u32 gpu_id);
+
+/**
  * kbase_ipa_init_model - Initilaize the particular IPA model
  * @kbdev:      pointer to kbase device
  * @ops:        pointer to object containing model specific methods.
@@ -183,7 +204,7 @@ const char *kbase_ipa_model_name_from_id(u32 gpu_id);
  * Return: pointer to kbase_ipa_model on success, NULL on error
  */
 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
-					     const struct kbase_ipa_model_ops *ops);
+					const struct kbase_ipa_model_ops *ops);
 /**
  * kbase_ipa_term_model - Terminate the particular IPA model
  * @model:      pointer to the IPA model object, already initialized
@@ -202,17 +223,6 @@ void kbase_ipa_term_model(struct kbase_ipa_model *model);
  */
 void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev);
 
-extern const struct kbase_ipa_model_ops kbase_g71_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g72_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g76_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g52_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g52_r1_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g51_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_g77_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tnax_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tbex_ipa_model_ops;
-extern const struct kbase_ipa_model_ops kbase_tbax_ipa_model_ops;
-
 /**
  * kbase_get_real_power() - get the real power consumption of the GPU
  * @df: dynamic voltage and frequency scaling information for the GPU.
diff --git a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
index 30a3b7d..267a586 100644
--- a/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
+++ b/mali_kbase/ipa/mali_kbase_ipa_debugfs.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,7 @@
 #include "mali_kbase_ipa.h"
 #include "mali_kbase_ipa_debugfs.h"
 
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0))
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0))
 #define DEFINE_DEBUGFS_ATTRIBUTE DEFINE_SIMPLE_ATTRIBUTE
 #endif
 
diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h
index 9367cc5..ddac90a 100644
--- a/mali_kbase/jm/mali_base_jm_kernel.h
+++ b/mali_kbase/jm/mali_base_jm_kernel.h
@@ -121,9 +121,9 @@
 #define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
 
 /**
- * Memory starting from the end of the initial commit is aligned to 'extent'
- * pages, where 'extent' must be a power of 2 and no more than
- * BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ * Memory starting from the end of the initial commit is aligned to 'extension'
+ * pages, where 'extension' must be a power of 2 and no more than
+ * BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
  */
 #define BASE_MEM_TILER_ALIGN_TOP ((base_mem_alloc_flags)1 << 20)
 
@@ -201,8 +201,8 @@
 						BASE_MEM_COOKIE_BASE)
 
 /* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
- * initial commit is aligned to 'extent' pages, where 'extent' must be a power
- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ * initial commit is aligned to 'extension' pages, where 'extension' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES
  */
 #define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
 
diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h
index 305a9eb..76ac278 100644
--- a/mali_kbase/jm/mali_kbase_jm_ioctl.h
+++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h
@@ -106,9 +106,9 @@
  *   'scheduling'.
  * 11.25:
  * - Enabled JIT pressure limit in base/kbase by default
- * 11.26:
+ * 11.26
  * - Added kinstr_jm API
- * 11.27:
+ * 11.27
  * - Backwards compatible extension to HWC ioctl.
  * 11.28:
  * - Added kernel side cache ops needed hint
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 0dc0838..c0ce65f 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -483,33 +483,4 @@ static const enum base_hw_feature base_hw_features_tTUx[] = {
 	BASE_HW_FEATURE_END
 };
 
-static const enum base_hw_feature base_hw_features_tE2x[] = {
-	BASE_HW_FEATURE_JOBCHAIN_DISAMBIGUATION,
-	BASE_HW_FEATURE_PWRON_DURING_PWROFF_TRANS,
-	BASE_HW_FEATURE_XAFFINITY,
-	BASE_HW_FEATURE_WARPING,
-	BASE_HW_FEATURE_INTERPIPE_REG_ALIASING,
-	BASE_HW_FEATURE_32_BIT_UNIFORM_ADDRESS,
-	BASE_HW_FEATURE_ATTR_AUTO_TYPE_INFERRAL,
-	BASE_HW_FEATURE_BRNDOUT_CC,
-	BASE_HW_FEATURE_BRNDOUT_KILL,
-	BASE_HW_FEATURE_LD_ST_LEA_TEX,
-	BASE_HW_FEATURE_LD_ST_TILEBUFFER,
-	BASE_HW_FEATURE_LINEAR_FILTER_FLOAT,
-	BASE_HW_FEATURE_MRT,
-	BASE_HW_FEATURE_MSAA_16X,
-	BASE_HW_FEATURE_NEXT_INSTRUCTION_TYPE,
-	BASE_HW_FEATURE_OUT_OF_ORDER_EXEC,
-	BASE_HW_FEATURE_T7XX_PAIRING_RULES,
-	BASE_HW_FEATURE_TEST4_DATUM_MODE,
-	BASE_HW_FEATURE_FLUSH_REDUCTION,
-	BASE_HW_FEATURE_PROTECTED_DEBUG_MODE,
-	BASE_HW_FEATURE_COHERENCY_REG,
-	BASE_HW_FEATURE_AARCH64_MMU,
-	BASE_HW_FEATURE_IDVS_GROUP_SIZE,
-	BASE_HW_FEATURE_L2_CONFIG,
-	BASE_HW_FEATURE_CLEAN_ONLY_SAFE,
-	BASE_HW_FEATURE_END
-};
-
 #endif /* _BASE_HWCONFIG_FEATURES_H_ */
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index c1ad3ac..d0a3545 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -659,26 +659,4 @@ static const enum base_hw_issue base_hw_issues_model_tTUx[] = {
 	BASE_HW_ISSUE_END
 };
 
-static const enum base_hw_issue base_hw_issues_tE2x_r0p0[] = {
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_921,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
-static const enum base_hw_issue base_hw_issues_model_tE2x[] = {
-	BASE_HW_ISSUE_5736,
-	BASE_HW_ISSUE_9435,
-	BASE_HW_ISSUE_TSIX_2033,
-	BASE_HW_ISSUE_TTRX_1337,
-	BASE_HW_ISSUE_TTRX_3414,
-	BASE_HW_ISSUE_TTRX_3083,
-	BASE_HW_ISSUE_GPU2019_3212,
-	BASE_HW_ISSUE_END
-};
-
 #endif /* _BASE_HWCONFIG_ISSUES_H_ */
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
index 086171a..aa6fb9f 100644
--- a/mali_kbase/mali_base_kernel.h
+++ b/mali_kbase/mali_base_kernel.h
@@ -147,15 +147,15 @@ struct base_mem_import_user_buffer {
 /* Mask to detect 4GB boundary (in page units) alignment */
 #define BASE_MEM_PFN_MASK_4GB  (BASE_MEM_MASK_4GB >> LOCAL_PAGE_SHIFT)
 
-/* Limit on the 'extent' parameter for an allocation with the
+/* Limit on the 'extension' parameter for an allocation with the
  * BASE_MEM_TILER_ALIGN_TOP flag set
  *
  * This is the same as the maximum limit for a Buffer Descriptor's chunk size
  */
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2 \
-		(21u - (LOCAL_PAGE_SHIFT))
-#define BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES \
-		(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES_LOG2))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2                      \
+	(21u - (LOCAL_PAGE_SHIFT))
+#define BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES                           \
+	(1ull << (BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES_LOG2))
 
 /* Bit mask of cookies used for for memory allocation setup */
 #define KBASE_COOKIE_MASK  ~1UL /* bit 0 is reserved */
@@ -226,7 +226,7 @@ struct base_jit_alloc_info_10_2 {
 	u64 gpu_alloc_addr;
 	u64 va_pages;
 	u64 commit_pages;
-	u64 extent;
+	u64 extension;
 	u8 id;
 };
 
@@ -253,7 +253,7 @@ struct base_jit_alloc_info_11_5 {
 	u64 gpu_alloc_addr;
 	u64 va_pages;
 	u64 commit_pages;
-	u64 extent;
+	u64 extension;
 	u8 id;
 	u8 bin_id;
 	u8 max_allocations;
@@ -270,7 +270,7 @@ struct base_jit_alloc_info_11_5 {
  * @va_pages:                   The minimum number of virtual pages required.
  * @commit_pages:               The minimum number of physical pages which
  *                              should back the allocation.
- * @extent:                     Granularity of physical pages to grow the
+ * @extension:                     Granularity of physical pages to grow the
  *                              allocation by during a fault.
  * @id:                         Unique ID provided by the caller, this is used
  *                              to pair allocation and free requests.
@@ -308,7 +308,7 @@ struct base_jit_alloc_info {
 	u64 gpu_alloc_addr;
 	u64 va_pages;
 	u64 commit_pages;
-	u64 extent;
+	u64 extension;
 	u8 id;
 	u8 bin_id;
 	u8 max_allocations;
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 8189d02..fd176a5 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -93,11 +93,11 @@
 #endif
 
 #if MALI_USE_CSF
-/* Physical memory group ID for command stream frontend user I/O.
+/* Physical memory group ID for CSF user I/O.
  */
 #define KBASE_MEM_GROUP_CSF_IO BASE_MEM_GROUP_DEFAULT
 
-/* Physical memory group ID for command stream frontend firmware.
+/* Physical memory group ID for CSF firmware.
  */
 #define KBASE_MEM_GROUP_CSF_FW BASE_MEM_GROUP_DEFAULT
 #endif
@@ -155,9 +155,9 @@ void kbase_release_device(struct kbase_device *kbdev);
  * the flag @ref KBASE_REG_TILER_ALIGN_TOP (check the flags of the kbase
  * region):
  * - alignment offset is set to the difference between the kbase region
- * extent (converted from the original value in pages to bytes) and the kbase
+ * extension (converted from the original value in pages to bytes) and the kbase
  * region initial_commit (also converted from the original value in pages to
- * bytes); alignment mask is set to the kbase region extent in bytes and
+ * bytes); alignment mask is set to the kbase region extension in bytes and
  * decremented by 1.
  *
  * Return: if successful, address of the unmapped area aligned as required;
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index e079281..48c728e 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -88,29 +88,38 @@ enum {
 };
 
 /**
- * Default period for DVFS sampling
+ * Default period for DVFS sampling (can be overridden by platform header)
  */
+#ifndef DEFAULT_PM_DVFS_PERIOD
 #define DEFAULT_PM_DVFS_PERIOD 100 /* 100ms */
+#endif
 
 /**
  * Power Management poweroff tick granuality. This is in nanoseconds to
- * allow HR timer support.
+ * allow HR timer support (can be overridden by platform header).
  *
  * On each scheduling tick, the power manager core may decide to:
  * -# Power off one or more shader cores
  * -# Power off the entire GPU
  */
+#ifndef DEFAULT_PM_GPU_POWEROFF_TICK_NS
 #define DEFAULT_PM_GPU_POWEROFF_TICK_NS (400000) /* 400us */
+#endif
 
 /**
  * Power Manager number of ticks before shader cores are powered off
+ * (can be overridden by platform header).
  */
+#ifndef DEFAULT_PM_POWEROFF_TICK_SHADER
 #define DEFAULT_PM_POWEROFF_TICK_SHADER (2) /* 400-800us */
+#endif
 
 /**
- * Default scheduling tick granuality
+ * Default scheduling tick granuality (can be overridden by platform header)
  */
+#ifndef DEFAULT_JS_SCHEDULING_PERIOD_NS
 #define DEFAULT_JS_SCHEDULING_PERIOD_NS    (100000000u) /* 100ms */
+#endif
 
 /**
  * Default minimum number of scheduling ticks before jobs are soft-stopped.
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index 99ab414..dcdac19 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -37,9 +37,9 @@
 #include <backend/gpu/mali_kbase_model_dummy.h>
 #endif /* CONFIG_MALI_NO_MALI */
 #include "mali_kbase_mem_profile_debugfs_buf_size.h"
-#include "mali_kbase_debug_mem_view.h"
 #include "mali_kbase_mem.h"
 #include "mali_kbase_mem_pool_debugfs.h"
+#include "mali_kbase_mem_pool_group.h"
 #include "mali_kbase_debugfs_helper.h"
 #if !MALI_CUSTOMER_RELEASE
 #include "mali_kbase_regs_dump_debugfs.h"
@@ -50,10 +50,9 @@
 #if !MALI_USE_CSF
 #include <mali_kbase_hwaccess_jm.h>
 #endif /* !MALI_USE_CSF */
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 #include <mali_kbase_hwaccess_instr.h>
 #endif
-#include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_reset_gpu.h>
 #include "mali_kbase_ioctl.h"
 #if !MALI_USE_CSF
@@ -66,8 +65,8 @@
 #if MALI_USE_CSF
 #include "csf/mali_kbase_csf_firmware.h"
 #include "csf/mali_kbase_csf_tiler_heap.h"
-#include "csf/mali_kbase_csf_kcpu_debugfs.h"
 #include "csf/mali_kbase_csf_csg_debugfs.h"
+#include "csf/mali_kbase_csf_cpu_queue_debugfs.h"
 #endif
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include "arbiter/mali_kbase_arbiter_pm.h"
@@ -79,6 +78,7 @@
 #include "mali_kbase_gwt.h"
 #endif
 #include "mali_kbase_pm_internal.h"
+#include "mali_kbase_dvfs_debugfs.h"
 
 #include <linux/module.h>
 #include <linux/init.h>
@@ -891,10 +891,8 @@ static int kbase_api_mem_alloc(struct kbase_context *kctx,
 	}
 #endif
 
-	reg = kbase_mem_alloc(kctx, alloc->in.va_pages,
-			alloc->in.commit_pages,
-			alloc->in.extent,
-			&flags, &gpu_va);
+	reg = kbase_mem_alloc(kctx, alloc->in.va_pages, alloc->in.commit_pages,
+			      alloc->in.extension, &flags, &gpu_va);
 
 	if (!reg)
 		return -ENOMEM;
@@ -1577,6 +1575,14 @@ static int kbase_ioctl_cs_get_glb_iface(struct kbase_context *kctx,
 	kfree(stream_data);
 	return err;
 }
+
+static int kbasep_ioctl_cs_cpu_queue_dump(struct kbase_context *kctx,
+			struct kbase_ioctl_cs_cpu_queue_info *cpu_queue_info)
+{
+	return kbase_csf_cpu_queue_dump(kctx, cpu_queue_info->buffer,
+					cpu_queue_info->size);
+}
+
 #endif /* MALI_USE_CSF */
 
 #define KBASE_HANDLE_IOCTL(cmd, function, arg)    \
@@ -1980,6 +1986,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 				union kbase_ioctl_cs_get_glb_iface,
 				kctx);
 		break;
+	case KBASE_IOCTL_CS_CPU_QUEUE_DUMP:
+		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_CS_CPU_QUEUE_DUMP,
+				kbasep_ioctl_cs_cpu_queue_dump,
+				struct kbase_ioctl_cs_cpu_queue_info,
+				kctx);
+		break;
 #endif /* MALI_USE_CSF */
 #if MALI_UNIT_TEST
 	case KBASE_IOCTL_TLSTREAM_TEST:
@@ -2022,13 +2034,17 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 		read_error = kbase_csf_read_error(kctx, &event_data);
 
 	if (!read_event && !read_error) {
+		bool dump = kbase_csf_cpu_queue_read_dump_req(kctx,
+							&event_data);
 		/* This condition is not treated as an error.
 		 * It is possible that event handling thread was woken up due
 		 * to a fault/error that occurred for a queue group, but before
 		 * the corresponding fault data was read by the thread the
 		 * queue group was already terminated by the userspace.
 		 */
-		dev_dbg(kctx->kbdev->dev, "Neither event nor error signaled");
+		if (!dump)
+			dev_dbg(kctx->kbdev->dev,
+				"Neither event nor error signaled");
 	}
 
 	if (copy_to_user(buf, &event_data, data_size) != 0) {
@@ -2119,7 +2135,8 @@ int kbase_event_pending(struct kbase_context *ctx)
 	WARN_ON_ONCE(!ctx);
 
 	return (atomic_read(&ctx->event_count) != 0) ||
-		kbase_csf_error_pending(ctx);
+		kbase_csf_error_pending(ctx) ||
+		kbase_csf_cpu_queue_dump_needed(ctx);
 }
 #else
 int kbase_event_pending(struct kbase_context *ctx)
@@ -2302,6 +2319,7 @@ static DEVICE_ATTR(power_policy, S_IRUGO | S_IWUSR, show_policy, set_policy);
 static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr, char * const buf)
 {
 	struct kbase_device *kbdev;
+	unsigned long flags;
 	ssize_t ret = 0;
 
 	kbdev = to_kbase_device(dev);
@@ -2309,6 +2327,19 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 	if (!kbdev)
 		return -ENODEV;
 
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+#if MALI_USE_CSF
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current debug core mask : 0x%llX\n",
+			 kbdev->pm.debug_core_mask);
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current desired core mask : 0x%llX\n",
+			 kbase_pm_ca_get_core_mask(kbdev));
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+			 "Current in use core mask : 0x%llX\n",
+			 kbdev->pm.backend.shaders_avail);
+#else
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Current core mask (JS0) : 0x%llX\n",
 			kbdev->pm.debug_core_mask[0]);
@@ -2318,10 +2349,14 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Current core mask (JS2) : 0x%llX\n",
 			kbdev->pm.debug_core_mask[2]);
+#endif /* MALI_USE_CSF */
+
 	ret += scnprintf(buf + ret, PAGE_SIZE - ret,
 			"Available core mask : 0x%llX\n",
 			kbdev->gpu_props.props.raw_props.shader_present);
 
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
 	return ret;
 }
 
@@ -2340,17 +2375,35 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
 static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct kbase_device *kbdev;
+#if MALI_USE_CSF
+	u64 new_core_mask;
+#else
 	u64 new_core_mask[3];
-	int items, i;
+	u64 group0_core_mask;
+	int i;
+#endif /* MALI_USE_CSF */
+
+	int items;
 	ssize_t err = count;
 	unsigned long flags;
-	u64 shader_present, group0_core_mask;
+	u64 shader_present;
 
 	kbdev = to_kbase_device(dev);
 
 	if (!kbdev)
 		return -ENODEV;
 
+#if MALI_USE_CSF
+	items = sscanf(buf, "%llx", &new_core_mask);
+
+	if (items != 1) {
+		dev_err(kbdev->dev,
+			"Couldn't process core mask write operation.\n"
+			"Use format <core_mask>\n");
+		err = -EINVAL;
+		goto end;
+	}
+#else
 	items = sscanf(buf, "%llx %llx %llx",
 			&new_core_mask[0], &new_core_mask[1],
 			&new_core_mask[2]);
@@ -2365,11 +2418,35 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
 
 	if (items == 1)
 		new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
+#endif
 
 	mutex_lock(&kbdev->pm.lock);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
 	shader_present = kbdev->gpu_props.props.raw_props.shader_present;
+
+#if MALI_USE_CSF
+	if ((new_core_mask & shader_present) != new_core_mask) {
+		dev_err(dev,
+			"Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)",
+			new_core_mask, shader_present);
+		err = -EINVAL;
+		goto unlock;
+
+	} else if (!(new_core_mask & shader_present &
+		     kbdev->pm.backend.ca_cores_enabled)) {
+		dev_err(dev,
+			"Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+			new_core_mask,
+			kbdev->gpu_props.props.raw_props.shader_present,
+			kbdev->pm.backend.ca_cores_enabled);
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	if (kbdev->pm.debug_core_mask != new_core_mask)
+		kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+#else
 	group0_core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
 
 	for (i = 0; i < 3; ++i) {
@@ -2404,6 +2481,7 @@ static ssize_t set_core_mask(struct device *dev, struct device_attribute *attr,
 		kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0],
 				new_core_mask[1], new_core_mask[2]);
 	}
+#endif /* MALI_USE_CSF */
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -3031,8 +3109,6 @@ static ssize_t kbase_show_gpuinfo(struct device *dev,
 		  .name = "Mali-TTUX" },
 		{ .id = GPU_ID2_PRODUCT_LTUX >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-LTUX" },
-		{ .id = GPU_ID2_PRODUCT_TE2X >> GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TE2X" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -3223,6 +3299,75 @@ static ssize_t show_pm_poweroff(struct device *dev,
 static DEVICE_ATTR(pm_poweroff, S_IRUGO | S_IWUSR, show_pm_poweroff,
 		set_pm_poweroff);
 
+#if MALI_USE_CSF
+/**
+ * set_idle_hysteresis_time - Store callback for CSF idle_hysteresis_time
+ *                            sysfs file.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the idle_hysteresis_time sysfs file is
+ * written to.
+ *
+ * This file contains values of the idle idle hysteresis duration.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_idle_hysteresis_time(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kstrtou32(buf, 0, &dur)) {
+		dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time write operation.\n"
+				"Use format <idle_hysteresis_time>\n");
+		return -EINVAL;
+	}
+
+	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+
+	return count;
+}
+
+/**
+ * show_idle_hysteresis_time - Show callback for CSF idle_hysteresis_time
+ *                             sysfs entry.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current idle hysteresis duration in ms.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_idle_hysteresis_time(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
+
+	return ret;
+}
+
+static DEVICE_ATTR(idle_hysteresis_time, S_IRUGO | S_IWUSR,
+		show_idle_hysteresis_time, set_idle_hysteresis_time);
+#endif
+
 /**
  * set_reset_timeout - Store callback for the reset_timeout sysfs file.
  * @dev:   The device with sysfs file is for
@@ -3459,6 +3604,203 @@ static ssize_t set_lp_mem_pool_max_size(struct device *dev,
 static DEVICE_ATTR(lp_mem_pool_max_size, S_IRUGO | S_IWUSR, show_lp_mem_pool_max_size,
 		set_lp_mem_pool_max_size);
 
+/**
+ * show_simplified_mem_pool_max_size - Show the maximum size for the memory
+ *                                     pool 0 of small (4KiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the max size.
+ *
+ * This function is called to get the maximum size for the memory pool 0 of
+ * small (4KiB) pages. It is assumed that the maximum size value is same for
+ * all the pools.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.small, 1, kbase_mem_pool_debugfs_max_size);
+}
+
+/**
+ * set_simplified_mem_pool_max_size - Set the same maximum size for all the
+ *                                    memory pools of small (4KiB) pages.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called to set the same maximum size for all the memory
+ * pools of small (4KiB) pages.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t set_simplified_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	unsigned long new_size;
+	int gid;
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid)
+		kbase_mem_pool_debugfs_set_max_size(
+			kbdev->mem_pools.small, gid, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(max_size, 0600, show_simplified_mem_pool_max_size,
+		set_simplified_mem_pool_max_size);
+
+/**
+ * show_simplified_lp_mem_pool_max_size - Show the maximum size for the memory
+ *                                        pool 0 of large (2MiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the total current pool size.
+ *
+ * This function is called to get the maximum size for the memory pool 0 of
+ * large (2MiB) pages. It is assumed that the maximum size value is same for
+ * all the pools.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+
+	if (!kbdev)
+		return -ENODEV;
+
+	return kbase_debugfs_helper_get_attr_to_string(buf, PAGE_SIZE,
+		kbdev->mem_pools.large, 1, kbase_mem_pool_debugfs_max_size);
+}
+
+/**
+ * set_simplified_lp_mem_pool_max_size - Set the same maximum size for all the
+ *                                       memory pools of large (2MiB) pages.
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called to set the same maximum size for all the memory
+ * pools of large (2MiB) pages.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t set_simplified_lp_mem_pool_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *const kbdev = to_kbase_device(dev);
+	unsigned long new_size;
+	int gid;
+	int err;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	for (gid = 0; gid < MEMORY_GROUP_MANAGER_NR_GROUPS; ++gid)
+		kbase_mem_pool_debugfs_set_max_size(
+			kbdev->mem_pools.large, gid, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(lp_max_size, 0600, show_simplified_lp_mem_pool_max_size,
+		set_simplified_lp_mem_pool_max_size);
+
+/**
+ * show_simplified_ctx_default_max_size - Show the default maximum size for the
+ *                                        memory pool 0 of small (4KiB) pages.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the pool size.
+ *
+ * This function is called to get the default ctx maximum size for the memory
+ * pool 0 of small (4KiB) pages. It is assumed that maximum size value is same
+ * for all the pools. The maximum size for the pool of large (2MiB) pages will
+ * be same as max size of the pool of small (4KiB) pages in terms of bytes.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_simplified_ctx_default_max_size(struct device *dev,
+		struct device_attribute *attr, char * const buf)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+	size_t max_size;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	max_size = kbase_mem_pool_config_debugfs_max_size(
+			kbdev->mem_pool_defaults.small, 0);
+
+	return scnprintf(buf, PAGE_SIZE, "%zu\n", max_size);
+}
+
+/**
+ * set_simplified_ctx_default_max_size - Set the same default maximum size for
+ *                                       all the pools created for new
+ *                                       contexts. This covers the pool of
+ *                                       large pages as well and its max size
+ *                                       will be same as max size of the pool
+ *                                       of small pages in terms of bytes.
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The value written to the sysfs file.
+ * @count: The number of bytes written to the sysfs file.
+ *
+ * This function is called to set the same maximum size for all pools created
+ * for new contexts.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t set_simplified_ctx_default_max_size(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	unsigned long new_size;
+	int err;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	err = kstrtoul(buf, 0, &new_size);
+	if (err)
+		return -EINVAL;
+
+	kbase_mem_pool_group_config_set_max_size(
+		&kbdev->mem_pool_defaults, (size_t)new_size);
+
+	return count;
+}
+
+static DEVICE_ATTR(ctx_default_max_size, 0600,
+		show_simplified_ctx_default_max_size,
+		set_simplified_ctx_default_max_size);
+
 #if !MALI_USE_CSF
 /**
  * show_js_ctx_scheduling_mode - Show callback for js_ctx_scheduling_mode sysfs
@@ -3822,6 +4164,10 @@ static const struct protected_mode_ops kbasep_native_protected_ops = {
 	.protected_mode_disable = kbasep_protected_mode_disable
 };
 
+#ifndef PLATFORM_PROTECTED_CALLBACKS
+#define PLATFORM_PROTECTED_CALLBACKS (&kbasep_native_protected_ops)
+#endif /* PLATFORM_PROTECTED_CALLBACKS */
+
 int kbase_protected_mode_init(struct kbase_device *kbdev)
 {
 	/* Use native protected ops */
@@ -3830,7 +4176,7 @@ int kbase_protected_mode_init(struct kbase_device *kbdev)
 	if (!kbdev->protected_dev)
 		return -ENOMEM;
 	kbdev->protected_dev->data = kbdev;
-	kbdev->protected_ops = &kbasep_native_protected_ops;
+	kbdev->protected_ops = PLATFORM_PROTECTED_CALLBACKS;
 	INIT_WORK(&kbdev->protected_mode_hwcnt_disable_work,
 		kbasep_protected_mode_hwcnt_disable_worker);
 	kbdev->protected_mode_hwcnt_desired = true;
@@ -4415,7 +4761,7 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 
 	kbasep_gpu_memory_debugfs_init(kbdev);
 	kbase_as_fault_debugfs_init(kbdev);
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 	kbase_instr_backend_debugfs_init(kbdev);
 #endif
 	/* fops_* variables created by invocations of macro
@@ -4474,6 +4820,8 @@ int kbase_device_debugfs_init(struct kbase_device *kbdev)
 			&kbasep_serialize_jobs_debugfs_fops);
 #endif
 
+	kbase_dvfs_status_debugfs_init(kbdev);
+
 	return 0;
 
 out:
@@ -4604,6 +4952,9 @@ static struct attribute *kbase_attrs[] = {
 	&dev_attr_gpuinfo.attr,
 	&dev_attr_dvfs_period.attr,
 	&dev_attr_pm_poweroff.attr,
+#if MALI_USE_CSF
+	&dev_attr_idle_hysteresis_time.attr,
+#endif
 	&dev_attr_reset_timeout.attr,
 #if !MALI_USE_CSF
 	&dev_attr_js_scheduling_period.attr,
@@ -4620,12 +4971,25 @@ static struct attribute *kbase_attrs[] = {
 	NULL
 };
 
+static struct attribute *kbase_mempool_attrs[] = {
+	&dev_attr_max_size.attr,
+	&dev_attr_lp_max_size.attr,
+	&dev_attr_ctx_default_max_size.attr,
+	NULL
+};
+
 #define SYSFS_SCHEDULING_GROUP "scheduling"
 static const struct attribute_group kbase_scheduling_attr_group = {
 	.name = SYSFS_SCHEDULING_GROUP,
 	.attrs = kbase_scheduling_attrs,
 };
 
+#define SYSFS_MEMPOOL_GROUP "mempool"
+static const struct attribute_group kbase_mempool_attr_group = {
+	.name = SYSFS_MEMPOOL_GROUP,
+	.attrs = kbase_mempool_attrs,
+};
+
 static const struct attribute_group kbase_attr_group = {
 	.attrs = kbase_attrs,
 };
@@ -4641,15 +5005,28 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
 	kbdev->mdev.mode = 0666;
 
 	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
-	if (!err) {
-		err = sysfs_create_group(&kbdev->dev->kobj,
-					 &kbase_scheduling_attr_group);
-		if (err) {
-			dev_err(kbdev->dev, "Creation of %s sysfs group failed",
-				SYSFS_SCHEDULING_GROUP);
-			sysfs_remove_group(&kbdev->dev->kobj,
-					   &kbase_attr_group);
-		}
+	if (err)
+		return err;
+
+	err = sysfs_create_group(&kbdev->dev->kobj,
+			&kbase_scheduling_attr_group);
+	if (err) {
+		dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+			SYSFS_SCHEDULING_GROUP);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_attr_group);
+		return err;
+	}
+
+	err = sysfs_create_group(&kbdev->dev->kobj,
+			&kbase_mempool_attr_group);
+	if (err) {
+		dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+			SYSFS_MEMPOOL_GROUP);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_scheduling_attr_group);
+		sysfs_remove_group(&kbdev->dev->kobj,
+			&kbase_attr_group);
 	}
 
 	return err;
@@ -4657,6 +5034,7 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
 
 void kbase_sysfs_term(struct kbase_device *kbdev)
 {
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_mempool_attr_group);
 	sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group);
 	sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
 	put_device(kbdev->dev);
@@ -4974,6 +5352,7 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(MALI_RELEASE_NAME " (UK version " \
 		__stringify(BASE_UK_VERSION_MAJOR) "." \
 		__stringify(BASE_UK_VERSION_MINOR) ")");
+MODULE_SOFTDEP("pre: memory_group_manager");
 
 #define CREATE_TRACE_POINTS
 /* Create the trace points (otherwise we just get code to call a tracepoint) */
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index 750dbd8..70b498a 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -21,8 +21,6 @@
  */
 
 #include <mali_kbase.h>
-#include <mali_kbase_config_defaults.h>
-
 #include <mali_kbase_defs.h>
 #include "mali_kbase_ctx_sched.h"
 #include "tl/mali_kbase_tracepoints.h"
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 49c582a..ab1f380 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -155,7 +155,6 @@ struct kbase_context;
 struct kbase_device;
 struct kbase_as;
 struct kbase_mmu_setup;
-struct kbase_ipa_model_vinstr_data;
 struct kbase_kinstr_jm;
 
 /**
@@ -384,13 +383,25 @@ struct kbase_pm_device_data {
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
 	/* Wait queue set when active_count == 0 */
 	wait_queue_head_t zero_active_count_wait;
+	/* Wait queue to block the termination of a Kbase context until the
+	 * system resume of GPU device.
+	 */
+	wait_queue_head_t resume_wait;
 
+#if MALI_USE_CSF
+	/**
+	 * Bit masks identifying the available shader cores that are specified
+	 * via sysfs.
+	 */
+	u64 debug_core_mask;
+#else
 	/**
 	 * Bit masks identifying the available shader cores that are specified
 	 * via sysfs. One mask per job slot.
 	 */
 	u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
 	u64 debug_core_mask_all;
+#endif /* MALI_USE_CSF */
 
 	/**
 	 * Callback for initializing the runtime power management.
@@ -1019,8 +1030,6 @@ struct kbase_device {
 		 * the difference between last_metrics and the current values.
 		 */
 		struct kbasep_pm_metrics last_metrics;
-		/* Model data to pass to ipa_gpu_active/idle() */
-		struct kbase_ipa_model_vinstr_data *model_data;
 
 		/* true if use of fallback model has been forced by the User */
 		bool force_fallback_model;
@@ -1111,7 +1120,7 @@ struct kbase_device {
 	u8 l2_hash_override;
 
 #if MALI_USE_CSF
-	/* Command-stream front-end for the device. */
+	/* CSF object for the GPU device. */
 	struct kbase_csf_device csf;
 #else
 	struct kbasep_js_device_data js_data;
@@ -1778,29 +1787,4 @@ static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
 #define KBASE_AS_INACTIVE_MAX_LOOPS     100000000
 
-/* JobDescriptorHeader - taken from the architecture specifications, the layout
- * is currently identical for all GPU archs. */
-struct job_descriptor_header {
-	u32 exception_status;
-	u32 first_incomplete_task;
-	u64 fault_pointer;
-	u8 job_descriptor_size : 1;
-	u8 job_type : 7;
-	u8 job_barrier : 1;
-	u8 _reserved_01 : 1;
-	u8 _reserved_1 : 1;
-	u8 _reserved_02 : 1;
-	u8 _reserved_03 : 1;
-	u8 _reserved_2 : 1;
-	u8 _reserved_04 : 1;
-	u8 _reserved_05 : 1;
-	u16 job_index;
-	u16 job_dependency_index_1;
-	u16 job_dependency_index_2;
-	union {
-		u64 _64;
-		u32 _32;
-	} next_job;
-};
-
 #endif				/* _KBASE_DEFS_H_ */
diff --git a/mali_kbase/mali_kbase_dvfs_debugfs.c b/mali_kbase/mali_kbase_dvfs_debugfs.c
new file mode 100644
index 0000000..438b528
--- /dev/null
+++ b/mali_kbase/mali_kbase_dvfs_debugfs.c
@@ -0,0 +1,92 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_dvfs_debugfs.h"
+#include <mali_kbase.h>
+#include <linux/seq_file.h>
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * kbasep_dvfs_utilization_debugfs_show() - Print the DVFS utilization info
+ *
+ * @file: The seq_file for printing to
+ * @data: The debugfs dentry private data, a pointer to kbase_context
+ *
+ * Return: Negative error code or 0 on success.
+ */
+static int kbasep_dvfs_utilization_debugfs_show(struct seq_file *file, void *data)
+{
+	struct kbase_device *kbdev = file->private;
+
+	seq_printf(file, "busy_time: %u idle_time: %u\n",
+		   kbdev->pm.backend.metrics.values.time_busy,
+		   kbdev->pm.backend.metrics.values.time_idle);
+
+	return 0;
+}
+
+static int kbasep_dvfs_utilization_debugfs_open(struct inode *in,
+						struct file *file)
+{
+	return single_open(file, kbasep_dvfs_utilization_debugfs_show,
+			   in->i_private);
+}
+
+static const struct file_operations kbasep_dvfs_utilization_debugfs_fops = {
+	.open = kbasep_dvfs_utilization_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *file;
+#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
+	const mode_t mode = 0444;
+#else
+	const mode_t mode = 0400;
+#endif
+
+	if (WARN_ON(!kbdev || IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+		return;
+
+	file = debugfs_create_file("dvfs_utilization", mode,
+				   kbdev->mali_debugfs_directory, kbdev,
+				   &kbasep_dvfs_utilization_debugfs_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev,
+			 "Unable to create dvfs debugfs entry");
+	}
+}
+
+#else
+/*
+ * Stub functions for when debugfs is disabled
+ */
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/mali_kbase/mali_kbase_dvfs_debugfs.h b/mali_kbase/mali_kbase_dvfs_debugfs.h
new file mode 100644
index 0000000..4f9e3fc
--- /dev/null
+++ b/mali_kbase/mali_kbase_dvfs_debugfs.h
@@ -0,0 +1,36 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_DVFS_DEBUGFS_H_
+#define _KBASE_DVFS_DEBUGFS_H_
+
+/* Forward declaration */
+struct kbase_device;
+
+/**
+ * kbase_dvfs_status_debugfs_init() - Create a debugfs entry for DVFS queries
+ *
+ * @kbdev: Pointer to the GPU device for which to create the debugfs entry
+ */
+void kbase_dvfs_status_debugfs_init(struct kbase_device *kbdev);
+
+#endif /* _KBASE_DVFS_DEBUGFS_H_ */
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index 020b5d8..81e0395 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -195,7 +195,6 @@ static void kbase_gpuprops_calculate_props(
 {
 	int i;
 	u32 gpu_id;
-	u32 product_id;
 
 	/* Populate the base_gpu_props structure */
 	kbase_gpuprops_update_core_props_gpu_id(gpu_props);
@@ -251,8 +250,6 @@ static void kbase_gpuprops_calculate_props(
 	 * Workaround for the incorrectly applied THREAD_FEATURES to tDUx.
 	 */
 	gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
-	product_id = gpu_id & GPU_ID_VERSION_PRODUCT_ID;
-	product_id >>= GPU_ID_VERSION_PRODUCT_ID_SHIFT;
 
 #if MALI_USE_CSF
 	gpu_props->thread_props.max_registers =
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index dc58ffb..386fb9e 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -89,9 +89,6 @@ void kbase_hw_set_features_mask(struct kbase_device *kbdev)
 	case GPU_ID2_PRODUCT_LTUX:
 		features = base_hw_features_tTUx;
 		break;
-	case GPU_ID2_PRODUCT_TE2X:
-		features = base_hw_features_tE2x;
-		break;
 	default:
 		features = base_hw_features_generic;
 		break;
@@ -243,9 +240,6 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(
 		 {{GPU_ID2_VERSION_MAKE(3, 0, 0), base_hw_issues_tTUx_r0p0},
 		  {U32_MAX, NULL} } },
 
-		{GPU_ID2_PRODUCT_TE2X,
-		 {{GPU_ID2_VERSION_MAKE(0, 0, 0), base_hw_issues_tE2x_r0p0},
-		  {U32_MAX, NULL} } },
 	};
 
 	u32 gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
@@ -403,9 +397,6 @@ int kbase_hw_set_issues_mask(struct kbase_device *kbdev)
 		case GPU_ID2_PRODUCT_LTUX:
 			issues = base_hw_issues_model_tTUx;
 			break;
-		case GPU_ID2_PRODUCT_TE2X:
-			issues = base_hw_issues_model_tE2x;
-			break;
 		default:
 			dev_err(kbdev->dev,
 				"Unknown GPU ID %x", gpu_id);
diff --git a/mali_kbase/mali_kbase_hwaccess_defs.h b/mali_kbase/mali_kbase_hwaccess_defs.h
index 124a2d9..3c3dfb0 100644
--- a/mali_kbase/mali_kbase_hwaccess_defs.h
+++ b/mali_kbase/mali_kbase_hwaccess_defs.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014, 2016-2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
  *
  */
 
-
 /**
  * @file mali_kbase_hwaccess_gpu_defs.h
  * HW access common definitions
@@ -43,7 +42,9 @@
  * @backend:         GPU backend specific data for HW access layer
  */
 struct kbase_hwaccess_data {
+#if !MALI_USE_CSF
 	struct kbase_context *active_kctx[BASE_JM_MAX_NR_SLOTS];
+#endif
 
 	struct kbase_backend_data backend;
 };
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index 4fd2e35..fd17e55 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -39,8 +39,7 @@
  * @shader_bm:         counters selection bitmask (Shader).
  * @tiler_bm:          counters selection bitmask (Tiler).
  * @mmu_l2_bm:         counters selection bitmask (MMU_L2).
- * @use_secondary:     use secondary performance counters set for applicable
- *                     counter blocks.
+ * @counter_set:       the performance counter set to use.
  */
 struct kbase_instr_hwcnt_enable {
 	u64 dump_buffer;
@@ -49,7 +48,7 @@ struct kbase_instr_hwcnt_enable {
 	u32 shader_bm;
 	u32 tiler_bm;
 	u32 mmu_l2_bm;
-	bool use_secondary;
+	u8 counter_set;
 };
 
 /**
@@ -139,7 +138,7 @@ int kbase_instr_backend_init(struct kbase_device *kbdev);
  */
 void kbase_instr_backend_term(struct kbase_device *kbdev);
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY_VIA_DEBUG_FS
+#ifdef CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS
 /**
  * kbase_instr_backend_debugfs_init() - Add a debugfs entry for the
  *                                      hardware counter set.
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index bbaf6ea..3e223c6 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2015, 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
  *
  */
 
-
 /**
  * @file mali_kbase_hwaccess_pm.h
  * HW access power manager common APIs
@@ -119,7 +118,19 @@ void kbase_hwaccess_pm_gpu_active(struct kbase_device *kbdev);
  */
 void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
 
-
+#if MALI_USE_CSF
+/**
+ * Set the debug core mask.
+ *
+ * This determines which cores the power manager is allowed to use.
+ *
+ * @param kbdev         The kbase device structure for the device (must be a
+ *                      valid pointer)
+ * @param new_core_mask The core mask to use
+ */
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
+				  u64 new_core_mask);
+#else
 /**
  * Set the debug core mask.
  *
@@ -134,7 +145,7 @@ void kbase_hwaccess_pm_gpu_idle(struct kbase_device *kbdev);
 void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev,
 		u64 new_core_mask_js0, u64 new_core_mask_js1,
 		u64 new_core_mask_js2);
-
+#endif /* MALI_USE_CSF */
 
 /**
  * Get the current policy.
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
index 7b668b0..7d1334e 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -44,15 +44,14 @@
  * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
  *                                      of a JM hardware counter backend.
  * @kbdev:         KBase device.
- * @use_secondary: True if secondary performance counters should be used,
- *                 else false. Ignored if secondary counters are not supported.
+ * @counter_set:   The performance counter set to use.
  * @metadata:      Hardware counter metadata.
  * @dump_bytes:    Bytes of GPU memory required to perform a
  *                 hardware counter dump.
  */
 struct kbase_hwcnt_backend_jm_info {
 	struct kbase_device *kbdev;
-	bool use_secondary;
+	enum kbase_hwcnt_set counter_set;
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t dump_bytes;
 };
@@ -226,7 +225,8 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
 		(struct kbase_hwcnt_backend_jm *)backend;
 	struct kbase_context *kctx;
 	struct kbase_device *kbdev;
-	struct kbase_hwcnt_physical_enable_map phys;
+	struct kbase_hwcnt_physical_enable_map phys_enable_map;
+	enum kbase_hwcnt_physical_set phys_counter_set;
 	struct kbase_instr_hwcnt_enable enable;
 	u64 timestamp_ns;
 
@@ -239,13 +239,16 @@ static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
 
 	lockdep_assert_held(&kbdev->hwaccess_lock);
 
-	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
 
-	enable.fe_bm = phys.fe_bm;
-	enable.shader_bm = phys.shader_bm;
-	enable.tiler_bm = phys.tiler_bm;
-	enable.mmu_l2_bm = phys.mmu_l2_bm;
-	enable.use_secondary = backend_jm->info->use_secondary;
+	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set,
+					backend_jm->info->counter_set);
+
+	enable.fe_bm = phys_enable_map.fe_bm;
+	enable.shader_bm = phys_enable_map.shader_bm;
+	enable.tiler_bm = phys_enable_map.tiler_bm;
+	enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+	enable.counter_set = phys_counter_set;
 	enable.dump_buffer = backend_jm->gpu_dump_va;
 	enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
 
@@ -454,10 +457,8 @@ static int kbasep_hwcnt_backend_jm_dump_alloc(
 	flags = BASE_MEM_PROT_CPU_RD |
 		BASE_MEM_PROT_GPU_WR |
 		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
-		BASE_MEM_CACHED_CPU;
-
-	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
-		flags |= BASE_MEM_UNCACHED_GPU;
+		BASE_MEM_CACHED_CPU |
+		BASE_MEM_UNCACHED_GPU;
 
 	nr_pages = PFN_UP(info->dump_bytes);
 
@@ -672,16 +673,19 @@ static int kbasep_hwcnt_backend_jm_info_create(
 
 	info->kbdev = kbdev;
 
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
-	info->use_secondary = true;
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
 #else
-	info->use_secondary = false;
+	/* Default to primary */
+	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
 #endif
 
-	errcode = kbase_hwcnt_gpu_metadata_create(
-		&hwcnt_gpu_info, info->use_secondary,
-		&info->metadata,
-		&info->dump_bytes);
+	errcode = kbase_hwcnt_gpu_metadata_create(&hwcnt_gpu_info,
+						  info->counter_set,
+						  &info->metadata,
+						  &info->dump_bytes);
 	if (errcode)
 		goto error;
 
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 575d39c..1f4953f 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -35,12 +35,93 @@
 /* Index of the PRFCNT_EN header into a V5 counter block */
 #define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
 
+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
+				     bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_tiler_block_type(u64 *dst,
+					enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set,
+				     bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf) {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
+		} else {
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		}
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_memsys_block_type(u64 *dst,
+					 enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
 /**
  * kbasep_hwcnt_backend_gpu_metadata_v5_create() - Create hardware counter
  *                                                 metadata for a v5 GPU.
  * @v5_info:       Non-NULL pointer to hwcnt info for a v5 GPU.
- * @use_secondary: True if secondary performance counters should be used, else
- *                 false. Ignored if secondary counters are not supported.
+ * @counter_set:   The performance counter set to use.
  * @metadata:      Non-NULL pointer to where created metadata is stored
  *                 on success.
  *
@@ -48,7 +129,7 @@
  */
 static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	const struct kbase_hwcnt_gpu_v5_info *v5_info,
-	bool use_secondary,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **metadata)
 {
 	struct kbase_hwcnt_description desc;
@@ -76,22 +157,20 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
 		return -EINVAL;
 
-	/* One Job Manager block */
-	blks[0].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM;
+	/* One Front End block */
+	kbasep_get_fe_block_type(&blks[0].type, counter_set, v5_info->is_csf);
 	blks[0].inst_cnt = 1;
 	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[0].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
 
 	/* One Tiler block */
-	blks[1].type = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+	kbasep_get_tiler_block_type(&blks[1].type, counter_set);
 	blks[1].inst_cnt = 1;
 	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[1].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
 
 	/* l2_count memsys blks */
-	blks[2].type = use_secondary ?
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 :
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+	kbasep_get_memsys_block_type(&blks[2].type, counter_set);
 	blks[2].inst_cnt = v5_info->l2_count;
 	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[2].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
@@ -112,9 +191,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 	 * requirements, and embed the core mask into the availability mask so
 	 * we can determine later which shader cores physically exist.
 	 */
-	blks[3].type = use_secondary ?
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 :
-		KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+	kbasep_get_sc_block_type(&blks[3].type, counter_set, v5_info->is_csf);
 	blks[3].inst_cnt = sc_block_count;
 	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
 	blks[3].ctr_cnt = KBASE_HWCNT_V5_COUNTERS_PER_BLOCK;
@@ -167,7 +244,7 @@ int kbase_hwcnt_gpu_info_init(
 	info->type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
 	info->v5.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
 	info->v5.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
-#else
+#else /* CONFIG_MALI_NO_MALI */
 	{
 		const struct base_gpu_props *props = &kbdev->gpu_props.props;
 		const size_t l2_count = props->l2_props.num_l2_slices;
@@ -178,6 +255,12 @@ int kbase_hwcnt_gpu_info_init(
 		info->v5.l2_count = l2_count;
 		info->v5.core_mask = core_mask;
 	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+#ifdef MALI_USE_CSF
+	info->v5.is_csf = true;
+#else
+	info->v5.is_csf = false;
 #endif
 
 	/* Determine the number of available clock domains. */
@@ -192,7 +275,7 @@ int kbase_hwcnt_gpu_info_init(
 
 int kbase_hwcnt_gpu_metadata_create(
 	const struct kbase_hwcnt_gpu_info *info,
-	bool use_secondary,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **out_metadata,
 	size_t *out_dump_bytes)
 {
@@ -206,7 +289,7 @@ int kbase_hwcnt_gpu_metadata_create(
 	if (info->type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
 		dump_bytes = kbasep_hwcnt_backend_gpu_v5_dump_bytes(&info->v5);
 		errcode = kbasep_hwcnt_backend_gpu_metadata_v5_create(
-			&info->v5, use_secondary, &metadata);
+			&info->v5, counter_set, &metadata);
 	} else {
 		return -EINVAL;
 	}
@@ -248,7 +331,8 @@ static bool is_block_type_shader(
 		return false;
 
 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
-	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2)
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3)
 		is_shader = true;
 
 	return is_shader;
@@ -437,7 +521,12 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+				/* Nothing to do in this case. */
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
 				fe_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
@@ -445,6 +534,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
 				shader_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
@@ -470,6 +560,25 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_enable_map_to_physical);
 
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
+				     enum kbase_hwcnt_set src)
+{
+	switch (src) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_hwcnt_gpu_set_to_physical);
+
 void kbase_hwcnt_gpu_enable_map_from_physical(
 	struct kbase_hwcnt_enable_map *dst,
 	const struct kbase_hwcnt_physical_enable_map *src)
@@ -512,7 +621,12 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 		    KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
-			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED:
+				/* Nothing to do in this case. */
+				break;
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
 				*blk_map = fe_bm;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
@@ -520,6 +634,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
 				*blk_map = shader_bm;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index f0d5176..c9039b2 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -36,29 +36,54 @@ struct kbase_hwcnt_dump_buffer;
  * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
  */
 enum kbase_hwcnt_gpu_group_type {
-	KBASE_HWCNT_GPU_GROUP_TYPE_V5 = 0x10,
+	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
 };
 
 /**
  * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
  *                                      used to identify metadata blocks.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:      Job Manager block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:   Tiler block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:      Shader Core block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:     Secondary Shader Core block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:  Memsys block.
- * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED: Undefined block (e.g. if a
+ *                                                counter set that a block
+ *                                                doesn't support is used).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:        Front End block (Job manager
+ *                                                or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:       Secondary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:       Tertiary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:     Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:        Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:       Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:       Tertiary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:    Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:   Secondary Memsys block.
  */
 enum kbase_hwcnt_gpu_v5_block_type {
-	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM = 0x40,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_UNDEFINED,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
 	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
 };
 
 /**
+ * enum kbase_hwcnt_set - GPU hardware counter sets
+ * @KBASE_HWCNT_SET_PRIMARY:   The Primary set of counters
+ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
+ * @KBASE_HWCNT_SET_TERTIARY:  The Tertiary set of counters
+ */
+enum kbase_hwcnt_set {
+	KBASE_HWCNT_SET_PRIMARY,
+	KBASE_HWCNT_SET_SECONDARY,
+	KBASE_HWCNT_SET_TERTIARY,
+};
+
+/**
  * struct kbase_hwcnt_physical_enable_map - Representation of enable map
  *                                          directly used by GPU.
  * @fe_bm:     Front end (JM/CSHW) counters selection bitmask.
@@ -74,15 +99,27 @@ struct kbase_hwcnt_physical_enable_map {
 };
 
 /**
+ * Values for Hardware Counter SET_SELECT value.
+ * Directly passed to HW.
+ */
+enum kbase_hwcnt_physical_set {
+	KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
+	KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
+	KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
+};
+
+/**
  * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
  * @l2_count:   L2 cache count.
  * @core_mask:  Shader core mask. May be sparse.
  * @clk_cnt:    Number of clock domains available.
+ * @is_csf: 	Whether CSF is used.
  */
 struct kbase_hwcnt_gpu_v5_info {
 	size_t l2_count;
 	u64 core_mask;
 	u8 clk_cnt;
+	bool is_csf;
 };
 
 /**
@@ -113,8 +150,7 @@ int kbase_hwcnt_gpu_info_init(
  *                                     current GPU.
  * @info:           Non-NULL pointer to info struct initialised by
  *                  kbase_hwcnt_gpu_info_init.
- * @use_secondary:  True if secondary performance counters should be used, else
- *                  false. Ignored if secondary counters are not supported.
+ * @counter_set:    The performance counter set used.
  * @out_metadata:   Non-NULL pointer to where created metadata is stored on
  *                  success.
  * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
@@ -124,7 +160,7 @@ int kbase_hwcnt_gpu_info_init(
  */
 int kbase_hwcnt_gpu_metadata_create(
 	const struct kbase_hwcnt_gpu_info *info,
-	bool use_secondary,
+	enum kbase_hwcnt_set counter_set,
 	const struct kbase_hwcnt_metadata **out_metadata,
 	size_t *out_dump_bytes);
 
@@ -179,6 +215,16 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 	const struct kbase_hwcnt_enable_map *src);
 
 /**
+ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
+ *                                     SET_SELECT value.
+ *
+ * @dst: Non-NULL pointer to dst physical SET_SELECT value.
+ * @src: Non-NULL pointer to src counter set selection.
+ */
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst,
+				     enum kbase_hwcnt_set src);
+
+/**
  * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
  *                                              an enable map abstraction.
  * @dst: Non-NULL pointer to dst enable map abstraction.
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
index fed4510..1180b3a 100644
--- a/mali_kbase/mali_kbase_ioctl.h
+++ b/mali_kbase/mali_kbase_ioctl.h
@@ -90,7 +90,7 @@ struct kbase_ioctl_get_gpuprops {
  *
  * @va_pages: The number of pages of virtual address space to reserve
  * @commit_pages: The number of physical pages to allocate
- * @extent: The number of extra pages to allocate on each GPU fault which grows
+ * @extension: The number of extra pages to allocate on each GPU fault which grows
  *          the region
  * @flags: Flags
  * @gpu_va: The GPU virtual address which is allocated
@@ -102,7 +102,7 @@ union kbase_ioctl_mem_alloc {
 	struct {
 		__u64 va_pages;
 		__u64 commit_pages;
-		__u64 extent;
+		__u64 extension;
 		__u64 flags;
 	} in;
 	struct {
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index cd89ccb..a8cdf82 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -640,8 +640,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 			u64 addr_end;
 
 			if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
-				const unsigned long extent_bytes = reg->extent
-					<< PAGE_SHIFT;
+				const unsigned long extension_bytes =
+					reg->extension << PAGE_SHIFT;
 				const u64 low_ptr = ptr[LOW];
 				const u64 high_ptr = ptr[HIGH];
 
@@ -662,8 +662,8 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 				 * this, but here to avoid future maintenance
 				 * hazards
 				 */
-				WARN_ON(!is_power_of_2(extent_bytes));
-				addr_end = ALIGN(read_val, extent_bytes);
+				WARN_ON(!is_power_of_2(extension_bytes));
+				addr_end = ALIGN(read_val, extension_bytes);
 			} else {
 				addr_end = read_val = READ_ONCE(*ptr);
 			}
@@ -1054,20 +1054,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 			return jd_done_nolock(katom, NULL);
 		}
 
-		if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
-			/* This softjob has failed due to a previous
-			 * dependency, however we should still run the
-			 * prepare & finish functions
-			 */
-			if (kbase_prepare_soft_job(katom) != 0) {
-				katom->event_code =
-					BASE_JD_EVENT_JOB_INVALID;
-				return jd_done_nolock(katom, NULL);
-			}
-		}
-
 		katom->will_fail_event_code = katom->event_code;
-		return false;
 	}
 
 	/* These must occur after the above loop to ensure that an atom
diff --git a/mali_kbase/mali_kbase_jm.c b/mali_kbase/mali_kbase_jm.c
index fb15a8c..16ae320 100644
--- a/mali_kbase/mali_kbase_jm.c
+++ b/mali_kbase/mali_kbase_jm.c
@@ -110,7 +110,6 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev)
 		up(&js_devdata->schedule_sem);
 	}
 }
-#endif /* !MALI_USE_CSF */
 
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 {
@@ -127,7 +126,6 @@ void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx)
 	}
 }
 
-#if !MALI_USE_CSF
 struct kbase_jd_atom *kbase_jm_return_atom_to_js(struct kbase_device *kbdev,
 				struct kbase_jd_atom *katom)
 {
diff --git a/mali_kbase/mali_kbase_jm.h b/mali_kbase/mali_kbase_jm.h
index b3fd421..132db41 100644
--- a/mali_kbase/mali_kbase_jm.h
+++ b/mali_kbase/mali_kbase_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014, 2016, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2014, 2016, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,6 @@
  *
  */
 
-
 /*
  * Job manager common APIs
  */
@@ -76,6 +75,7 @@ void kbase_jm_try_kick(struct kbase_device *kbdev, u32 js_mask);
 void kbase_jm_try_kick_all(struct kbase_device *kbdev);
 #endif /* !MALI_USE_CSF */
 
+#if !MALI_USE_CSF
 /**
  * kbase_jm_idle_ctx() - Mark a context as idle.
  * @kbdev:	Device pointer
@@ -91,7 +91,6 @@ void kbase_jm_try_kick_all(struct kbase_device *kbdev);
  */
 void kbase_jm_idle_ctx(struct kbase_device *kbdev, struct kbase_context *kctx);
 
-#if !MALI_USE_CSF
 /**
  * kbase_jm_return_atom_to_js() - Return an atom to the job scheduler that has
  *				  been soft-stopped or will fail due to a
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
index 8457d6c..1e91a7c 100644
--- a/mali_kbase/mali_kbase_kinstr_jm.c
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -38,6 +38,7 @@
 #include <linux/circ_buf.h>
 #include <linux/fs.h>
 #include <linux/kref.h>
+#include <linux/ktime.h>
 #include <linux/log2.h>
 #include <linux/mutex.h>
 #include <linux/rculist_bl.h>
@@ -813,22 +814,6 @@ void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx)
 	kbase_kinstr_jm_ref_put(ctx);
 }
 
-/**
- * timestamp() - Retrieves the current monotonic nanoseconds
- * Return: monotonic nanoseconds timestamp.
- */
-static u64 timestamp(void)
-{
-	struct timespec ts;
-	long ns;
-
-	getrawmonotonic(&ts);
-	ns = ((long)(ts.tv_sec) * NSEC_PER_SEC) + ts.tv_nsec;
-	if (unlikely(ns < 0))
-		return 0;
-	return ((u64)(ns));
-}
-
 void kbasep_kinstr_jm_atom_state(
 	struct kbase_jd_atom *const katom,
 	const enum kbase_kinstr_jm_reader_atom_state state)
@@ -837,7 +822,7 @@ void kbasep_kinstr_jm_atom_state(
 	struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm;
 	const u8 id = kbase_jd_atom_id(kctx, katom);
 	struct kbase_kinstr_jm_atom_state_change change = {
-		.timestamp = timestamp(), .atom = id, .state = state
+		.timestamp = ktime_get_raw_ns(), .atom = id, .state = state
 	};
 	struct reader *reader;
 	struct hlist_bl_node *node;
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index ce52f6a..7ec6094 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -620,8 +620,8 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
 			WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
 					__func__,
 					(unsigned long)align);
-			align_mask  = reg->extent - 1;
-			align_offset = reg->extent - reg->initial_commit;
+			align_mask = reg->extension - 1;
+			align_offset = reg->extension - reg->initial_commit;
 		}
 #endif /* !MALI_USE_CSF */
 
@@ -2755,7 +2755,7 @@ bool kbase_check_alloc_flags(unsigned long flags)
 
 #if !MALI_USE_CSF
 	/* GPU executable memory also cannot have the top of its initial
-	 * commit aligned to 'extent'
+	 * commit aligned to 'extension'
 	 */
 	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
 			BASE_MEM_TILER_ALIGN_TOP))
@@ -2837,15 +2837,15 @@ bool kbase_check_import_flags(unsigned long flags)
 }
 
 int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
-		u64 va_pages, u64 commit_pages, u64 large_extent)
+			    u64 va_pages, u64 commit_pages, u64 large_extension)
 {
 	struct device *dev = kctx->kbdev->dev;
 	int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
 	u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
 	struct kbase_va_region test_reg;
 
-	/* kbase_va_region's extent member can be of variable size, so check against that type */
-	test_reg.extent = large_extent;
+	/* kbase_va_region's extension member can be of variable size, so check against that type */
+	test_reg.extension = large_extension;
 
 #define KBASE_MSG_PRE "GPU allocation attempted with "
 
@@ -2872,25 +2872,30 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 		return -EINVAL;
 	}
 
-	if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n");
+	if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_GROW_ON_GPF but extension == 0\n");
 		return -EINVAL;
 	}
 
 #if !MALI_USE_CSF
-	if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+	if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n");
 		return -EINVAL;
 	}
 
 	if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
-			test_reg.extent != 0) {
-		dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n");
+	    test_reg.extension != 0) {
+		dev_warn(
+			dev, KBASE_MSG_PRE
+			"neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n");
 		return -EINVAL;
 	}
 #else
-	if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extent != 0) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF not set but extent != 0\n");
+	if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) {
+		dev_warn(dev, KBASE_MSG_PRE
+			 "BASE_MEM_GROW_ON_GPF not set but extension != 0\n");
 		return -EINVAL;
 	}
 #endif /* !MALI_USE_CSF */
@@ -2899,28 +2904,35 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 	/* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
 	if (flags & BASE_MEM_TILER_ALIGN_TOP) {
 #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
-		unsigned long small_extent;
-
-		if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld",
-					(unsigned long long)large_extent,
-					BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES);
+		unsigned long small_extension;
+
+		if (large_extension >
+		    BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "extension==%lld pages exceeds limit %lld",
+				 (unsigned long long)large_extension,
+				 BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES);
 			return -EINVAL;
 		}
 		/* For use with is_power_of_2, which takes unsigned long, so
 		 * must ensure e.g. on 32-bit kernel it'll fit in that type */
-		small_extent = (unsigned long)large_extent;
+		small_extension = (unsigned long)large_extension;
 
-		if (!is_power_of_2(small_extent)) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2",
-					small_extent);
+		if (!is_power_of_2(small_extension)) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "extension==%ld not a non-zero power of 2",
+				 small_extension);
 			return -EINVAL;
 		}
 
-		if (commit_pages > large_extent) {
-			dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld",
-					(unsigned long)commit_pages,
-					(unsigned long)large_extent);
+		if (commit_pages > large_extension) {
+			dev_warn(dev,
+				 KBASE_MSG_PRE_FLAG
+				 "commit_pages==%ld exceeds extension==%ld",
+				 (unsigned long)commit_pages,
+				 (unsigned long)large_extension);
 			return -EINVAL;
 		}
 #undef KBASE_MSG_PRE_FLAG
@@ -3013,7 +3025,7 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file,
 		}
 
 		size = scnprintf(data->buffer, sizeof(data->buffer),
-				"%llu,%llu,%llu", data->active_value,
+				"%llu,%llu,%llu\n", data->active_value,
 				data->pool_value, data->destroy_value);
 	}
 
@@ -3311,7 +3323,7 @@ static bool meet_size_and_tiler_align_top_requirements(
 
 #if !MALI_USE_CSF
 	if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) {
-		size_t align = info->extent;
+		size_t align = info->extension;
 		size_t align_mask = align - 1;
 
 		if ((walker->start_pfn + info->commit_pages) & align_mask)
@@ -3366,20 +3378,20 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
 			KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES);
 	} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
 		/* The GPU could report being ready to write to the next
-		 * 'extent' sized chunk, but didn't actually write to it, so we
-		 * can report up to 'extent' size pages more than the backed
+		 * 'extension' sized chunk, but didn't actually write to it, so we
+		 * can report up to 'extension' size pages more than the backed
 		 * size.
 		 *
 		 * Note, this is allowed to exceed reg->nr_pages.
 		 */
-		max_allowed_pages += reg->extent;
+		max_allowed_pages += reg->extension;
 
 		/* Also note that in these GPUs, the GPU may make a large (>1
 		 * page) initial allocation but not actually write out to all
 		 * of it. Hence it might report that a much higher amount of
 		 * memory was used than actually was written to. This does not
 		 * result in a real warning because on growing this memory we
-		 * round up the size of the allocation up to an 'extent' sized
+		 * round up the size of the allocation up to an 'extension' sized
 		 * chunk, hence automatically bringing the backed size up to
 		 * the reported size.
 		 */
@@ -3605,7 +3617,7 @@ done:
 
 	/* Update attributes of JIT allocation taken from the pool */
 	reg->initial_commit = info->commit_pages;
-	reg->extent = info->extent;
+	reg->extension = info->extension;
 
 update_failed:
 	return ret;
@@ -3963,7 +3975,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		kbase_gpu_vm_unlock(kctx);
 
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
-				info->extent, &flags, &gpu_addr);
+				      info->extension, &flags, &gpu_addr);
 		if (!reg) {
 			/* Most likely not enough GPU virtual space left for
 			 * the new JIT allocation.
@@ -4321,12 +4333,18 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 			alloc->imported.user_buf.nr_pages,
 			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
 			pages, NULL);
-#else
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(5, 9, 0)
 	pinned_pages = get_user_pages_remote(NULL, mm,
 			address,
 			alloc->imported.user_buf.nr_pages,
 			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
 			pages, NULL, NULL);
+#else
+	pinned_pages = get_user_pages_remote(mm,
+			address,
+			alloc->imported.user_buf.nr_pages,
+			reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0,
+			pages, NULL, NULL);
 #endif
 
 	if (pinned_pages <= 0)
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 2238fbf..7a5cc66 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -263,7 +263,7 @@ static inline struct kbase_mem_phy_alloc *kbase_mem_phy_alloc_put(struct kbase_m
  * @threshold_pages: If non-zero and the amount of memory committed to a region
  *                   that can grow on page fault exceeds this number of pages
  *                   then the driver switches to incremental rendering.
- * @extent:    Number of pages allocated on page fault.
+ * @extension:    Number of pages allocated on page fault.
  * @cpu_alloc: The physical memory we mmap to the CPU when mapping this region.
  * @gpu_alloc: The physical memory we mmap to the GPU when mapping this region.
  * @jit_node:     Links to neighboring regions in the just-in-time memory pool.
@@ -341,7 +341,7 @@ struct kbase_va_region {
 #endif
 
 #if !MALI_USE_CSF
-/* The top of the initial commit is aligned to extent pages.
+/* The top of the initial commit is aligned to extension pages.
  * Extent must be a power of 2 */
 #define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
 #else
@@ -416,7 +416,7 @@ struct kbase_va_region {
 #endif
 
 	unsigned long flags;
-	size_t extent;
+	size_t extension;
 	struct kbase_mem_phy_alloc *cpu_alloc;
 	struct kbase_mem_phy_alloc *gpu_alloc;
 	struct list_head jit_node;
@@ -1072,7 +1072,7 @@ bool kbase_check_import_flags(unsigned long flags);
  * @flags:        The flags passed from user space
  * @va_pages:     The size of the requested region, in pages.
  * @commit_pages: Number of pages to commit initially.
- * @extent:       Number of pages to grow by on GPU page fault and/or alignment
+ * @extension:       Number of pages to grow by on GPU page fault and/or alignment
  *                (depending on flags)
  *
  * Makes checks on the size parameters passed in from user space for a memory
@@ -1081,7 +1081,7 @@ bool kbase_check_import_flags(unsigned long flags);
  * Return: 0 if sizes are valid for these flags, negative error code otherwise
  */
 int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
-		u64 va_pages, u64 commit_pages, u64 extent);
+			    u64 va_pages, u64 commit_pages, u64 extension);
 
 /**
  * kbase_update_region_flags - Convert user space flags to kernel region flags
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 989bb36..f6d386f 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -296,8 +296,8 @@ void kbase_phy_alloc_mapping_put(struct kbase_context *kctx,
 }
 
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
-		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
-		u64 *gpu_va)
+					u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va)
 {
 	int zone;
 	struct kbase_va_region *reg;
@@ -309,8 +309,9 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 	KBASE_DEBUG_ASSERT(gpu_va);
 
 	dev = kctx->kbdev->dev;
-	dev_dbg(dev, "Allocating %lld va_pages, %lld commit_pages, %lld extent, 0x%llX flags\n",
-		va_pages, commit_pages, extent, *flags);
+	dev_dbg(dev,
+		"Allocating %lld va_pages, %lld commit_pages, %lld extension, 0x%llX flags\n",
+		va_pages, commit_pages, extension, *flags);
 
 #if MALI_USE_CSF
 	*gpu_va = 0; /* return 0 on failure */
@@ -356,7 +357,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
 
-	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages, extent))
+	if (kbase_check_alloc_sizes(kctx, *flags, va_pages, commit_pages,
+				    extension))
 		goto bad_sizes;
 
 #ifdef CONFIG_MALI_MEMORY_FULLY_BACKED
@@ -413,15 +415,15 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		reg->threshold_pages = 0;
 
 	if (*flags & BASE_MEM_GROW_ON_GPF) {
-		/* kbase_check_alloc_sizes() already checks extent is valid for
-		 * assigning to reg->extent */
-		reg->extent = extent;
+		/* kbase_check_alloc_sizes() already checks extension is valid for
+		 * assigning to reg->extension */
+		reg->extension = extension;
 #if !MALI_USE_CSF
 	} else if (*flags & BASE_MEM_TILER_ALIGN_TOP) {
-		reg->extent = extent;
+		reg->extension = extension;
 #endif /* !MALI_USE_CSF */
 	} else {
-		reg->extent = 0;
+		reg->extension = 0;
 	}
 
 	if (kbase_alloc_phy_pages(reg, va_pages, commit_pages) != 0) {
@@ -448,14 +450,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		}
 	}
 
-#if MALI_USE_CSF
-	if (reg->flags & KBASE_REG_CSF_EVENT) {
-		WARN_ON(!(*flags & BASE_MEM_SAME_VA));
-
-		kbase_link_event_mem_page(kctx, reg);
-	}
-#endif
-
 	/* mmap needed to setup VA? */
 	if (*flags & BASE_MEM_SAME_VA) {
 		unsigned long cookie, cookie_nr;
@@ -503,13 +497,6 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 
 no_mmap:
 no_cookie:
-#if MALI_USE_CSF
-	if (reg->flags & KBASE_REG_CSF_EVENT) {
-		kbase_gpu_vm_lock(kctx);
-		kbase_unlink_event_mem_page(kctx, reg);
-		kbase_gpu_vm_unlock(kctx);
-	}
-#endif
 no_kern_mapping:
 no_mem:
 #if MALI_JIT_PRESSURE_LIMIT_BASE
@@ -1480,7 +1467,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
 	reg->gpu_alloc->imported.umm.need_sync = need_sync;
 	reg->gpu_alloc->imported.umm.kctx = kctx;
-	reg->extent = 0;
+	reg->extension = 0;
 
 	if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
 		int err;
@@ -1670,7 +1657,7 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 		goto fault_mismatch;
 
 	reg->gpu_alloc->nents = 0;
-	reg->extent = 0;
+	reg->extension = 0;
 
 	if (pages) {
 		struct device *dev = kctx->kbdev->dev;
@@ -2674,6 +2661,11 @@ static int kbasep_reg_mmap(struct kbase_context *kctx,
 	kctx->pending_regions[cookie] = NULL;
 	bitmap_set(kctx->cookies, cookie, 1);
 
+#if MALI_USE_CSF
+	if (reg->flags & KBASE_REG_CSF_EVENT)
+		kbase_link_event_mem_page(kctx, reg);
+#endif
+
 	/*
 	 * Overwrite the offset with the region start_pfn, so we effectively
 	 * map from offset 0 in the region. However subtract the aligned
@@ -2842,6 +2834,18 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 		/* MMU dump - userspace should now have a reference on
 		 * the pages, so we can now free the kernel mapping */
 		vfree(kaddr);
+		/* CPU mapping of GPU allocations have GPU VA as the vm_pgoff
+		 * and that is used to shrink the mapping when the commit size
+		 * is reduced. So vm_pgoff for CPU mapping created to get the
+		 * snapshot of GPU page tables shall not match with any GPU VA.
+		 * That can be ensured by setting vm_pgoff as vma->vm_start
+		 * because,
+		 * - GPU VA of any SAME_VA allocation cannot match with
+		 *   vma->vm_start, as CPU VAs are unique.
+		 * - GPU VA of CUSTOM_VA allocations are outside the CPU
+		 *   virtual address space.
+		 */
+		vma->vm_pgoff = PFN_DOWN(vma->vm_start);
 	}
 
 out_unlock:
@@ -3364,8 +3368,10 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 #endif
 	struct kbase_context *kctx = vma->vm_private_data;
 	struct kbase_device *kbdev = kctx->kbdev;
+	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
 	unsigned long pfn = PFN_DOWN(kbdev->reg_start + USER_BASE);
 	size_t nr_pages = PFN_DOWN(vma->vm_end - vma->vm_start);
+	vm_fault_t ret = VM_FAULT_SIGBUS;
 
 	/* Few sanity checks up front */
 	if (WARN_ON(nr_pages != 1) ||
@@ -3374,11 +3380,22 @@ static vm_fault_t kbase_csf_user_reg_vm_fault(struct vm_fault *vmf)
 			PFN_DOWN(BASEP_MEM_CSF_USER_REG_PAGE_HANDLE)))
 		return VM_FAULT_SIGBUS;
 
-	/* TODO: check PM state here and don't map in the actual register page
-	 * if GPU is powered down or is about to be powered down.
+	mutex_lock(&kbdev->pm.lock);
+
+	/* Don't map in the actual register page if GPU is powered down.
+	 * Always map in the dummy page in no mali builds.
 	 */
+	if (!kbdev->pm.backend.gpu_powered || IS_ENABLED(CONFIG_MALI_NO_MALI))
+		pfn = PFN_DOWN(as_phys_addr_t(kbdev->csf.dummy_user_reg_page));
+
+	ret = mgm_dev->ops.mgm_vmf_insert_pfn_prot(mgm_dev,
+						   KBASE_MEM_GROUP_CSF_FW, vma,
+						   vma->vm_start, pfn,
+						   vma->vm_page_prot);
 
-	return vmf_insert_pfn_prot(vma, vma->vm_start, pfn, vma->vm_page_prot);
+	mutex_unlock(&kbdev->pm.lock);
+
+	return ret;
 }
 
 static const struct vm_operations_struct kbase_csf_user_reg_vm_ops = {
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 85e030a..c80d885 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -43,7 +43,7 @@ struct kbase_hwc_dma_mapping {
  * @kctx:         The kernel context
  * @va_pages:     The number of pages of virtual address space to reserve
  * @commit_pages: The number of physical pages to allocate upfront
- * @extent:       The number of extra pages to allocate on each GPU fault which
+ * @extension:       The number of extra pages to allocate on each GPU fault which
  *                grows the region.
  * @flags:        bitmask of BASE_MEM_* flags to convey special requirements &
  *                properties for the new allocation.
@@ -53,8 +53,8 @@ struct kbase_hwc_dma_mapping {
  * Return: 0 on success or error code
  */
 struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
-		u64 va_pages, u64 commit_pages, u64 extent, u64 *flags,
-		u64 *gpu_va);
+					u64 va_pages, u64 commit_pages,
+					u64 extension, u64 *flags, u64 *gpu_va);
 
 /**
  * kbase_mem_query - Query properties of a GPU memory region
diff --git a/mali_kbase/mali_kbase_regs_history_debugfs.c b/mali_kbase/mali_kbase_regs_history_debugfs.c
index 1980da8..640db95 100644
--- a/mali_kbase/mali_kbase_regs_history_debugfs.c
+++ b/mali_kbase/mali_kbase_regs_history_debugfs.c
@@ -118,7 +118,7 @@ void kbase_io_history_add(struct kbase_io_history *h,
 void kbase_io_history_dump(struct kbase_device *kbdev)
 {
 	struct kbase_io_history *const h = &kbdev->io_history;
-	u16 i;
+	size_t i;
 	size_t iters;
 	unsigned long flags;
 
@@ -136,7 +136,7 @@ void kbase_io_history_dump(struct kbase_device *kbdev)
 			&h->buf[(h->count - iters + i) % h->size];
 		char const access = (io->addr & 1) ? 'w' : 'r';
 
-		dev_err(kbdev->dev, "%6i: %c: reg 0x%016lx val %08x\n", i,
+		dev_err(kbdev->dev, "%6zu: %c: reg 0x%016lx val %08x\n", i,
 			access, (unsigned long)(io->addr & ~0x1), io->value);
 	}
 
@@ -180,7 +180,7 @@ DEFINE_SIMPLE_ATTRIBUTE(regs_history_size_fops,
 static int regs_history_show(struct seq_file *sfile, void *data)
 {
 	struct kbase_io_history *const h = sfile->private;
-	u16 i;
+	size_t i;
 	size_t iters;
 	unsigned long flags;
 
@@ -199,8 +199,8 @@ static int regs_history_show(struct seq_file *sfile, void *data)
 			&h->buf[(h->count - iters + i) % h->size];
 		char const access = (io->addr & 1) ? 'w' : 'r';
 
-		seq_printf(sfile, "%6i: %c: reg 0x%016lx val %08x\n", i, access,
-				(unsigned long)(io->addr & ~0x1), io->value);
+		seq_printf(sfile, "%6zu: %c: reg 0x%016lx val %08x\n", i,
+			   access, (unsigned long)(io->addr & ~0x1), io->value);
 	}
 
 	spin_unlock_irqrestore(&h->lock, flags);
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index ef5da68..0dc8c03 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1006,10 +1006,10 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 		ret = kbasep_jit_alloc_validate(kctx, info);
 		if (ret)
 			goto free_info;
-		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(kbdev, katom,
-			info->va_pages, info->commit_pages, info->extent,
-			info->id, info->bin_id, info->max_allocations,
-			info->flags, info->usage_id);
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JITALLOCINFO(
+			kbdev, katom, info->va_pages, info->commit_pages,
+			info->extension, info->id, info->bin_id,
+			info->max_allocations, info->flags, info->usage_id);
 	}
 
 	katom->jit_blocked = false;
@@ -1024,7 +1024,7 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 	 * though the region is valid it doesn't represent the
 	 * same thing it used to.
 	 *
-	 * Complete validation of va_pages, commit_pages and extent
+	 * Complete validation of va_pages, commit_pages and extension
 	 * isn't done here as it will be done during the call to
 	 * kbase_mem_alloc.
 	 */
@@ -1228,10 +1228,10 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
 			 MIDGARD_MMU_BOTTOMLEVEL, kctx->jit_group_id);
 #endif
 
-		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(kbdev, katom,
-			info->gpu_alloc_addr, new_addr, info->flags,
-			entry_mmu_flags, info->id, info->commit_pages,
-			info->extent, info->va_pages);
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_JIT(
+			kbdev, katom, info->gpu_alloc_addr, new_addr,
+			info->flags, entry_mmu_flags, info->id,
+			info->commit_pages, info->extension, info->va_pages);
 		kbase_vunmap(kctx, &mapping);
 
 		kbase_trace_jit_report_gpu_mem(kctx, reg,
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.c b/mali_kbase/mali_kbase_trace_gpu_mem.c
index 7669895..d0e9f0b 100644
--- a/mali_kbase/mali_kbase_trace_gpu_mem.c
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.c
@@ -127,31 +127,31 @@ static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx,
 	}
 
 	if (unique_buf_imported) {
-		struct kbase_dma_buf *buf_node =
-			kzalloc(sizeof(*buf_node), GFP_KERNEL);
+		struct kbase_dma_buf *new_buf_node =
+			kzalloc(sizeof(*new_buf_node), GFP_KERNEL);
 
-		if (buf_node == NULL) {
+		if (new_buf_node == NULL) {
 			dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n");
 			/* Dont account for it if we fail to allocate memory */
 			unique_buf_imported = false;
 		} else {
 			struct rb_node **new = &(root->rb_node), *parent = NULL;
 
-			buf_node->dma_buf = dma_buf;
-			buf_node->import_count = 1;
+			new_buf_node->dma_buf = dma_buf;
+			new_buf_node->import_count = 1;
 			while (*new) {
-				struct kbase_dma_buf *node;
+				struct kbase_dma_buf *new_node;
 
 				parent = *new;
-				node = rb_entry(parent, struct kbase_dma_buf,
-						dma_buf_node);
-				if (dma_buf < node->dma_buf)
+				new_node = rb_entry(parent, struct kbase_dma_buf,
+						   dma_buf_node);
+				if (dma_buf < new_node->dma_buf)
 					new = &(*new)->rb_left;
 				else
 					new = &(*new)->rb_right;
 			}
-			rb_link_node(&buf_node->dma_buf_node, parent, new);
-			rb_insert_color(&buf_node->dma_buf_node, root);
+			rb_link_node(&new_buf_node->dma_buf_node, parent, new);
+			rb_insert_color(&new_buf_node->dma_buf_node, root);
 		}
 	} else if (!WARN_ON(!buf_node)) {
 		buf_node->import_count++;
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 3b0e2d6..e0e828c 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -33,6 +33,7 @@
 #include <linux/fcntl.h>
 #include <linux/fs.h>
 #include <linux/hrtimer.h>
+#include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/poll.h>
@@ -389,7 +390,7 @@ static void kbasep_vinstr_client_destroy(struct kbase_vinstr_client *vcli)
  *                                 the vinstr context.
  * @vctx:     Non-NULL pointer to vinstr context.
  * @setup:    Non-NULL pointer to hardware counter ioctl setup structure.
- *            setup->buffer_count must not be 0.
+ *            setup->buffer_count must not be 0 and must be a power of 2.
  * @out_vcli: Non-NULL pointer to where created client will be stored on
  *            success.
  *
@@ -407,6 +408,7 @@ static int kbasep_vinstr_client_create(
 	WARN_ON(!vctx);
 	WARN_ON(!setup);
 	WARN_ON(setup->buffer_count == 0);
+	WARN_ON(!is_power_of_2(setup->buffer_count));
 
 	vcli = kzalloc(sizeof(*vcli), GFP_KERNEL);
 	if (!vcli)
@@ -586,7 +588,8 @@ int kbase_vinstr_hwcnt_reader_setup(
 
 	if (!vctx || !setup ||
 	    (setup->buffer_count == 0) ||
-	    (setup->buffer_count > MAX_BUFFER_COUNT))
+	    (setup->buffer_count > MAX_BUFFER_COUNT) ||
+	    !is_power_of_2(setup->buffer_count))
 		return -EINVAL;
 
 	errcode = kbasep_vinstr_client_create(vctx, setup, &vcli);
@@ -719,7 +722,9 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
 	if (unlikely(copy_to_user(buffer, meta, min_size)))
 		return -EFAULT;
 
-	atomic_inc(&cli->meta_idx);
+	/* Compare exchange meta idx to protect against concurrent getters */
+	if (meta_idx != atomic_cmpxchg(&cli->meta_idx, meta_idx, meta_idx + 1))
+		return -EBUSY;
 
 	return 0;
 }
@@ -791,7 +796,13 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
 		goto out;
 	}
 
-	atomic_inc(&cli->read_idx);
+	/* Compare exchange read idx to protect against concurrent putters */
+	if (read_idx !=
+	    atomic_cmpxchg(&cli->read_idx, read_idx, read_idx + 1)) {
+		ret = -EPERM;
+		goto out;
+	}
+
 out:
 	if (unlikely(kbuf != stack_kbuf))
 		kfree(kbuf);
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 1d10699..b23d1ff 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -27,7 +27,6 @@
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_fault.h>
 #include <mali_kbase_ctx_sched.h>
-#include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_as_fault_debugfs.h>
 #include "../mali_kbase_mmu_internal.h"
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index a5cda00..cb57dc9 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -37,8 +37,6 @@
 #include <mali_kbase_defs.h>
 #include <mali_kbase_hw.h>
 #include <mmu/mali_kbase_mmu_hw.h>
-#include <mali_kbase_hwaccess_jm.h>
-#include <mali_kbase_hwaccess_time.h>
 #include <mali_kbase_mem.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
@@ -135,20 +133,21 @@ static int kbase_mmu_update_pages_no_flush(struct kbase_context *kctx, u64 vpfn,
 static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 		struct kbase_va_region *reg, size_t fault_rel_pfn)
 {
-	size_t multiple = reg->extent;
+	size_t multiple = reg->extension;
 	size_t reg_current_size = kbase_reg_current_backed_size(reg);
 	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
 	size_t remainder;
 
 	if (!multiple) {
-		dev_warn(kbdev->dev,
-			"VA Region 0x%llx extent was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
+		dev_warn(
+			kbdev->dev,
+			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW\n",
 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
 		return minimum_extra;
 	}
 
 	/* Calculate the remainder to subtract from minimum_extra to make it
-	 * the desired (rounded down) multiple of the extent.
+	 * the desired (rounded down) multiple of the extension.
 	 * Depending on reg's flags, the base used for calculating multiples is
 	 * different
 	 */
@@ -718,6 +717,10 @@ page_fault_retry:
 		goto fault_done;
 	}
 
+	if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) ==
+		AS_FAULTSTATUS_ACCESS_TYPE_READ)
+		dev_warn(kbdev->dev, "Grow on pagefault while reading");
+
 	/* find the size we need to grow it by
 	 * we know the result fit in a size_t due to
 	 * kbase_region_tracker_find_region_enclosing_address
@@ -1570,10 +1573,29 @@ static void kbase_mmu_flush_invalidate_as(struct kbase_device *kbdev,
 {
 	int err;
 	u32 op;
+	bool gpu_powered;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	gpu_powered = kbdev->pm.backend.gpu_powered;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	/* GPU is off so there's no need to perform flush/invalidate.
+	 * But even if GPU is not actually powered down, after gpu_powered flag
+	 * was set to false, it is still safe to skip the flush/invalidate.
+	 * The TLB invalidation will anyways be performed due to AS_COMMAND_UPDATE
+	 * which is sent when address spaces are restored after gpu_powered flag
+	 * is set to true. Flushing of L2 cache is certainly not required as L2
+	 * cache is definitely off if gpu_powered is false.
+	 */
+	if (!gpu_powered)
+		return;
 
 	if (kbase_pm_context_active_handle_suspend(kbdev,
 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
-		/* GPU is off so there's no need to perform flush/invalidate */
+		/* GPU has just been powered off due to system suspend.
+		 * So again, no need to perform flush/invalidate.
+		 */
 		return;
 	}
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index a820ab2..e9eef8b 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -26,7 +26,6 @@
 #include <mmu/mali_kbase_mmu_hw.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <device/mali_kbase_device.h>
-#include <mali_kbase_as_fault_debugfs.h>
 
 /**
  * lock_region() - Generate lockaddr to lock memory region in MMU
diff --git a/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
index 11a8b77..47933a7 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
@@ -25,18 +25,45 @@
 #include <linux/clk.h>
 #include "mali_kbase_config_platform.h"
 
+#if MALI_USE_CSF
+#include <asm/arch_timer.h>
+#endif
+
 static void *enumerate_gpu_clk(struct kbase_device *kbdev,
 		unsigned int index)
 {
 	if (index >= kbdev->nr_clocks)
 		return NULL;
 
+#if MALI_USE_CSF
+	if (of_machine_is_compatible("arm,juno"))
+		WARN_ON(kbdev->nr_clocks != 1);
+#endif
+
 	return kbdev->clocks[index];
 }
 
 static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev,
 		void *gpu_clk_handle)
 {
+#if MALI_USE_CSF
+	/* On Juno fpga platforms, the GPU clock rate is reported as 600 MHZ at
+	 * the boot time. Then after the first call to kbase_devfreq_target()
+	 * the clock rate is reported as 450 MHZ and the frequency does not
+	 * change after that. But the actual frequency at which GPU operates
+	 * is always 50 MHz, which is equal to the frequency of system counter
+	 * and HW counters also increment at the same rate.
+	 * DVFS, which is a client of kbase_ipa_control, needs normalization of
+	 * GPU_ACTIVE counter to calculate the time for which GPU has been busy.
+	 * So for the correct normalization need to return the system counter
+	 * frequency value.
+	 * This is a reasonable workaround as the frequency value remains same
+	 * throughout. It can be removed after GPUCORE-25693.
+	 */
+	if (of_machine_is_compatible("arm,juno"))
+		return arch_timer_get_cntfrq();
+#endif
+
 	return clk_get_rate((struct clk *)gpu_clk_handle);
 }
 
@@ -51,12 +78,23 @@ static int gpu_clk_notifier_register(struct kbase_device *kbdev,
 	     sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle),
 	     "mismatch in the size of clk member");
 
+#if MALI_USE_CSF
+	/* Frequency is fixed on Juno platforms */
+	if (of_machine_is_compatible("arm,juno"))
+		return 0;
+#endif
+
 	return clk_notifier_register((struct clk *)gpu_clk_handle, nb);
 }
 
 static void gpu_clk_notifier_unregister(struct kbase_device *kbdev,
 		void *gpu_clk_handle, struct notifier_block *nb)
 {
+#if MALI_USE_CSF
+	if (of_machine_is_compatible("arm,juno"))
+		return;
+#endif
+
 	clk_notifier_unregister((struct clk *)gpu_clk_handle, nb);
 }
 
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index cd90ea0..83a293d 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -319,18 +319,21 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 				}
 #if !MALI_USE_CSF
 			} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
-				unsigned long extent_bytes =
-				     (unsigned long)(reg->extent << PAGE_SHIFT);
+				unsigned long extension_bytes =
+					(unsigned long)(reg->extension
+							<< PAGE_SHIFT);
 				/* kbase_check_alloc_sizes() already satisfies
 				 * these checks, but they're here to avoid
 				 * maintenance hazards due to the assumptions
 				 * involved */
-				WARN_ON(reg->extent > (ULONG_MAX >> PAGE_SHIFT));
+				WARN_ON(reg->extension >
+					(ULONG_MAX >> PAGE_SHIFT));
 				WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
-				WARN_ON(!is_power_of_2(extent_bytes));
-				align_mask = extent_bytes - 1;
+				WARN_ON(!is_power_of_2(extension_bytes));
+				align_mask = extension_bytes - 1;
 				align_offset =
-				      extent_bytes - (reg->initial_commit << PAGE_SHIFT);
+					extension_bytes -
+					(reg->initial_commit << PAGE_SHIFT);
 #endif /* !MALI_USE_CSF */
 			} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
 				is_same_4gb_page = true;
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index de76fa5..0502c0d 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -87,6 +87,8 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_MAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT,
 	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_UNMAP_IMPORT_FORCE,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND,
 	KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
 	KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
 	KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_ENQUEUE_JIT_ALLOC,
@@ -114,7 +116,9 @@ enum tl_msg_id_obj {
 	KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_EXECUTE_JIT_FREE_END,
 	KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END,
 	KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END,
-	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START,
+	KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END,
 	KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW,
 	KBASE_TL_KBASE_CSFFW_RESET,
 	KBASE_OBJ_MSG_COUNT,
@@ -334,8 +338,8 @@ enum tl_msg_id_aux {
 		"kcpu_queue,fence") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT, \
 		"KCPU Queue enqueues Wait on Cross Queue Sync Object", \
-		"@pLI", \
-		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value") \
+		"@pLII", \
+		"kcpu_queue,cqs_obj_gpu_addr,cqs_obj_compare_value,cqs_obj_inherit_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_SET, \
 		"KCPU Queue enqueues Set on Cross Queue Sync Object", \
 		"@pL", \
@@ -352,6 +356,14 @@ enum tl_msg_id_aux {
 		"KCPU Queue enqueues Unmap Import ignoring reference count", \
 		"@pL", \
 		"kcpu_queue,map_import_buf_gpu_addr") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER, \
+		"KCPU Queue enqueues Error Barrier", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND, \
+		"KCPU Queue enqueues Group Suspend", \
+		"@ppI", \
+		"kcpu_queue,group_suspend_buf,gpu_cmdq_grp_handle") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC, \
 		"Begin array of KCPU Queue enqueues JIT Alloc", \
 		"@p", \
@@ -382,52 +394,52 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END, \
 		"KCPU Queue ends a Signal on Fence", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_START, \
 		"KCPU Queue starts a Wait on Fence", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END, \
 		"KCPU Queue ends a Wait on Fence", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_START, \
 		"KCPU Queue starts a Wait on an array of Cross Queue Sync Objects", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END, \
 		"KCPU Queue ends a Wait on an array of Cross Queue Sync Objects", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET, \
 		"KCPU Queue executes a Set on an array of Cross Queue Sync Objects", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_START, \
 		"KCPU Queue starts a Map Import", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END, \
 		"KCPU Queue ends a Map Import", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_START, \
 		"KCPU Queue starts an Unmap Import", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END, \
 		"KCPU Queue ends an Unmap Import", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_START, \
 		"KCPU Queue starts an Unmap Import ignoring reference count", \
 		"@p", \
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END, \
 		"KCPU Queue ends an Unmap Import ignoring reference count", \
-		"@p", \
-		"kcpu_queue") \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_ALLOC_START, \
 		"KCPU Queue starts an array of JIT Allocs", \
 		"@p", \
@@ -438,8 +450,8 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
 		"Array item of KCPU Queue ends an array of JIT Allocs", \
-		"@pLL", \
-		"kcpu_queue,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
+		"@pILL", \
+		"kcpu_queue,execute_error,jit_alloc_gpu_alloc_addr,jit_alloc_mmu_flags") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_ALLOC_END, \
 		"End array of KCPU Queue ends an array of JIT Allocs", \
 		"@p", \
@@ -454,16 +466,24 @@ enum tl_msg_id_aux {
 		"kcpu_queue") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
 		"Array item of KCPU Queue ends an array of JIT Frees", \
-		"@pL", \
-		"kcpu_queue,jit_free_pages_used") \
+		"@pIL", \
+		"kcpu_queue,execute_error,jit_free_pages_used") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END, \
 		"End array of KCPU Queue ends an array of JIT Frees", \
 		"@p", \
 		"kcpu_queue") \
-	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER, \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER, \
 		"KCPU Queue executes an Error Barrier", \
 		"@p", \
 		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START, \
+		"KCPU Queue starts a group suspend", \
+		"@p", \
+		"kcpu_queue") \
+	TRACEPOINT_DESC(KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END, \
+		"KCPU Queue ends a group suspend", \
+		"@pI", \
+		"kcpu_queue,execute_error") \
 	TRACEPOINT_DESC(KBASE_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW, \
 		"An overflow has happened with the CSFFW Timeline stream", \
 		"@LL", \
@@ -2125,13 +2145,15 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value)
+	u32 cqs_obj_compare_value,
+	u32 cqs_obj_inherit_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
 		+ sizeof(cqs_obj_gpu_addr)
 		+ sizeof(cqs_obj_compare_value)
+		+ sizeof(cqs_obj_inherit_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2147,6 +2169,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 		pos, &cqs_obj_gpu_addr, sizeof(cqs_obj_gpu_addr));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &cqs_obj_compare_value, sizeof(cqs_obj_compare_value));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &cqs_obj_inherit_error, sizeof(cqs_obj_inherit_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2255,6 +2279,58 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *group_suspend_buf,
+	u32 gpu_cmdq_grp_handle)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		+ sizeof(group_suspend_buf)
+		+ sizeof(gpu_cmdq_grp_handle)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &group_suspend_buf, sizeof(group_suspend_buf));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &gpu_cmdq_grp_handle, sizeof(gpu_cmdq_grp_handle));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue)
@@ -2451,11 +2527,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2467,6 +2545,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2495,11 +2575,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2511,6 +2593,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2539,11 +2623,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2555,17 +2641,21 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2577,6 +2667,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2605,11 +2697,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2621,6 +2715,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2649,11 +2745,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2665,6 +2763,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2693,11 +2793,13 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue)
+	const void *kcpu_queue,
+	u32 execute_error)
 {
 	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2709,6 +2811,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
@@ -2760,12 +2864,14 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
 	u64 jit_alloc_mmu_flags)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		+ sizeof(jit_alloc_gpu_alloc_addr)
 		+ sizeof(jit_alloc_mmu_flags)
 		;
@@ -2780,6 +2886,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
 	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+	pos = kbasep_serialize_bytes(buffer,
 		pos, &jit_alloc_gpu_alloc_addr, sizeof(jit_alloc_gpu_alloc_addr));
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &jit_alloc_mmu_flags, sizeof(jit_alloc_mmu_flags));
@@ -2856,11 +2964,13 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_free_pages_used)
 {
 	const u32 msg_id = KBASE_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		+ sizeof(jit_free_pages_used)
 		;
 	char *buffer;
@@ -2874,6 +2984,8 @@ void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
 	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
+	pos = kbasep_serialize_bytes(buffer,
 		pos, &jit_free_pages_used, sizeof(jit_free_pages_used));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
@@ -2901,13 +3013,59 @@ void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
 
-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue)
 {
-	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER;
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START;
+	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
+		+ sizeof(kcpu_queue)
+		;
+	char *buffer;
+	unsigned long acq_flags;
+	size_t pos = 0;
+
+	buffer = kbase_tlstream_msgbuf_acquire(stream, msg_size, &acq_flags);
+
+	pos = kbasep_serialize_bytes(buffer, pos, &msg_id, sizeof(msg_id));
+	pos = kbasep_serialize_timestamp(buffer, pos);
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &kcpu_queue, sizeof(kcpu_queue));
+
+	kbase_tlstream_msgbuf_release(stream, acq_flags);
+}
+
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error)
+{
+	const u32 msg_id = KBASE_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END;
 	const size_t msg_size = sizeof(msg_id) + sizeof(u64)
 		+ sizeof(kcpu_queue)
+		+ sizeof(execute_error)
 		;
 	char *buffer;
 	unsigned long acq_flags;
@@ -2919,6 +3077,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
 	pos = kbasep_serialize_timestamp(buffer, pos);
 	pos = kbasep_serialize_bytes(buffer,
 		pos, &kcpu_queue, sizeof(kcpu_queue));
+	pos = kbasep_serialize_bytes(buffer,
+		pos, &execute_error, sizeof(execute_error));
 
 	kbase_tlstream_msgbuf_release(stream, acq_flags);
 }
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index 01f7710..3cd94e2 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -332,7 +332,8 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 cqs_obj_gpu_addr,
-	u32 cqs_obj_compare_value);
+	u32 cqs_obj_compare_value,
+	u32 cqs_obj_inherit_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_set(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
@@ -349,6 +350,14 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_unmap_import_force(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
 	u64 map_import_buf_gpu_addr);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	const void *group_suspend_buf,
+	u32 gpu_cmdq_grp_handle);
 void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_enqueue_jit_alloc(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
@@ -382,40 +391,47 @@ void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_start(
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(
 	struct kbase_tlstream *stream,
-	const void *kcpu_queue);
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_kcpuqueue_execute_jit_alloc_start(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
@@ -425,6 +441,7 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_alloc_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_alloc_gpu_alloc_addr,
 	u64 jit_alloc_mmu_flags);
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_alloc_end(
@@ -439,13 +456,21 @@ void __kbase_tlstream_tl_kbase_array_begin_kcpuqueue_execute_jit_free_end(
 void __kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue,
+	u32 execute_error,
 	u64 jit_free_pages_used);
 void __kbase_tlstream_tl_kbase_array_end_kcpuqueue_execute_jit_free_end(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
-void __kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(
 	struct kbase_tlstream *stream,
 	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue);
+void __kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(
+	struct kbase_tlstream *stream,
+	const void *kcpu_queue,
+	u32 execute_error);
 void __kbase_tlstream_tl_kbase_csffw_tlstream_overflow(
 	struct kbase_tlstream *stream,
 	u64 csffw_timestamp,
@@ -1842,27 +1867,30 @@ struct kbase_tlstream;
  * @cqs_obj_gpu_addr: CQS Object GPU ptr
  * @cqs_obj_compare_value: Semaphore value that should be exceeded
  * for the WAIT to pass
+ * @cqs_obj_inherit_error: Indicates the error state should be inherited into the queue or not
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value	\
+	cqs_obj_compare_value,	\
+	cqs_obj_inherit_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_cqs_wait(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value);	\
+				kcpu_queue, cqs_obj_gpu_addr, cqs_obj_compare_value, cqs_obj_inherit_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_CQS_WAIT(	\
 	kbdev,	\
 	kcpu_queue,	\
 	cqs_obj_gpu_addr,	\
-	cqs_obj_compare_value	\
+	cqs_obj_compare_value,	\
+	cqs_obj_inherit_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -1988,6 +2016,66 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER -
+ *   KCPU Queue enqueues Error Barrier
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_error_barrier(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND -
+ *   KCPU Queue enqueues Group Suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @group_suspend_buf: Pointer to the suspend buffer structure
+ * @gpu_cmdq_grp_handle: GPU Command Queue Group handle which will match userspace
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(	\
+	kbdev,	\
+	kcpu_queue,	\
+	group_suspend_buf,	\
+	gpu_cmdq_grp_handle	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_enqueue_group_suspend(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue, group_suspend_buf, gpu_cmdq_grp_handle);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_ENQUEUE_GROUP_SUSPEND(	\
+	kbdev,	\
+	kcpu_queue,	\
+	group_suspend_buf,	\
+	gpu_cmdq_grp_handle	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
  * KBASE_TLSTREAM_TL_KBASE_ARRAY_BEGIN_KCPUQUEUE_ENQUEUE_JIT_ALLOC -
  *   Begin array of KCPU Queue enqueues JIT Alloc
  *
@@ -2223,23 +2311,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_signal_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_SIGNAL_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2277,23 +2368,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_fence_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_FENCE_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2331,23 +2425,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_wait_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_WAIT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2358,23 +2455,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_cqs_set(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_CQS_SET(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2412,23 +2512,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_map_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_MAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2466,23 +2569,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2520,23 +2626,26 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  */
 #if MALI_USE_CSF
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_kcpuqueue_execute_unmap_import_force_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue);	\
+				kcpu_queue, execute_error);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_UNMAP_IMPORT_FORCE_END(	\
 	kbdev,	\
-	kcpu_queue	\
+	kcpu_queue,	\
+	execute_error	\
 	)	\
 	do { } while (0)
 #endif /* MALI_USE_CSF */
@@ -2601,6 +2710,7 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  * @jit_alloc_gpu_alloc_addr: The JIT allocated GPU virtual address
  * @jit_alloc_mmu_flags: The MMU flags for the JIT allocation
  */
@@ -2608,6 +2718,7 @@ struct kbase_tlstream;
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_alloc_gpu_alloc_addr,	\
 	jit_alloc_mmu_flags	\
 	)	\
@@ -2616,12 +2727,13 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_alloc_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
+				kcpu_queue, execute_error, jit_alloc_gpu_alloc_addr, jit_alloc_mmu_flags);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_ALLOC_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_alloc_gpu_alloc_addr,	\
 	jit_alloc_mmu_flags	\
 	)	\
@@ -2715,6 +2827,7 @@ struct kbase_tlstream;
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
  * @jit_free_pages_used: The actual number of pages used by the JIT
  * allocation
  */
@@ -2722,6 +2835,7 @@ struct kbase_tlstream;
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_free_pages_used	\
 	)	\
 	do {	\
@@ -2729,12 +2843,13 @@ struct kbase_tlstream;
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
 			__kbase_tlstream_tl_kbase_array_item_kcpuqueue_execute_jit_free_end(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
-				kcpu_queue, jit_free_pages_used);	\
+				kcpu_queue, execute_error, jit_free_pages_used);	\
 	} while (0)
 #else
 #define KBASE_TLSTREAM_TL_KBASE_ARRAY_ITEM_KCPUQUEUE_EXECUTE_JIT_FREE_END(	\
 	kbdev,	\
 	kcpu_queue,	\
+	execute_error,	\
 	jit_free_pages_used	\
 	)	\
 	do { } while (0)
@@ -2768,26 +2883,53 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
- * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER -
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER -
  *   KCPU Queue executes an Error Barrier
  *
  * @kbdev: Kbase device
  * @kcpu_queue: KCPU queue
  */
 #if MALI_USE_CSF
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_error_barrier(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERROR_BARRIER(	\
+	kbdev,	\
+	kcpu_queue	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START -
+ *   KCPU Queue starts a group suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(	\
 	kbdev,	\
 	kcpu_queue	\
 	)	\
 	do {	\
 		int enabled = atomic_read(&kbdev->timeline_flags);	\
 		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
-			__kbase_tlstream_tl_kbase_kcpuqueue_execute_errorbarrier(	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_start(	\
 				__TL_DISPATCH_STREAM(kbdev, obj),	\
 				kcpu_queue);	\
 	} while (0)
 #else
-#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_ERRORBARRIER(	\
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_START(	\
 	kbdev,	\
 	kcpu_queue	\
 	)	\
@@ -2795,6 +2937,36 @@ struct kbase_tlstream;
 #endif /* MALI_USE_CSF */
 
 /**
+ * KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END -
+ *   KCPU Queue ends a group suspend
+ *
+ * @kbdev: Kbase device
+ * @kcpu_queue: KCPU queue
+ * @execute_error: Non-zero error code if KCPU Queue item completed with error, else zero
+ */
+#if MALI_USE_CSF
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do {	\
+		int enabled = atomic_read(&kbdev->timeline_flags);	\
+		if (enabled & BASE_TLSTREAM_ENABLE_CSF_TRACEPOINTS)	\
+			__kbase_tlstream_tl_kbase_kcpuqueue_execute_group_suspend_end(	\
+				__TL_DISPATCH_STREAM(kbdev, obj),	\
+				kcpu_queue, execute_error);	\
+	} while (0)
+#else
+#define KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_GROUP_SUSPEND_END(	\
+	kbdev,	\
+	kcpu_queue,	\
+	execute_error	\
+	)	\
+	do { } while (0)
+#endif /* MALI_USE_CSF */
+
+/**
  * KBASE_TLSTREAM_TL_KBASE_CSFFW_TLSTREAM_OVERFLOW -
  *   An overflow has happened with the CSFFW Timeline stream
  *
author	Sidath Senanayake <sidaths@google.com>	2021-01-29 14:51:21 +0000
committer	Sidath Senanayake <sidaths@google.com>	2021-01-29 14:51:21 +0000
commit	201c8bfb4637601363b6e9283f3bdc510711a226 (patch)
tree	afa8b543c81e78e5b82156be5d5266060c71e069
parent	72f2457ff7355ff0389efe5bc9cec3365362d8c4 (diff)
download	gpu-201c8bfb4637601363b6e9283f3bdc510711a226.tar.gz