Mali Valhall DDK r26p0 KMD

Provenance: 009a7d86a (collaborate/EAC/v_r26p0) VX504X08X-BU-00000-r26p0-01eac0 - Android DDK VX504X08X-BU-60000-r26p0-01eac0 - Android Document Bundle Signed-off-by: Sidath Senanayake <sidaths@google.com> Change-Id: Ic3671bdc454b706b6f98a9d1a615d1886da0c3e8
author: Sidath Senanayake <sidaths@google.com> 2020-09-11 16:44:12 +0100
committer: Sidath Senanayake <sidaths@google.com> 2020-09-11 16:44:12 +0100
commit: d4ca6eb7268ee2db9deabd1745b505c6e1c162f9 (patch)
tree: 64058c324e9e6adb30e8689d17f0a2e2b27636bc
parent: bc3c01e61c8ce9783a8ab091053905effcae12de (diff)
download: gpu-d4ca6eb7268ee2db9deabd1745b505c6e1c162f9.tar.gz
72 files changed, 6232 insertions, 1039 deletions
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 192ac06..06dda9c 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -21,9 +21,12 @@
 
 
 # Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= "r25p0-01eac0"
+MALI_RELEASE_NAME ?= "r26p0-01eac0"
 
 # Paths required for build
+
+# make $(src) as absolute path if it isn't already, by prefixing $(srctree)
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
 KBASE_PATH = $(src)
 KBASE_PLATFORM_PATH = $(KBASE_PATH)/platform_dummy
 UMP_PATH = $(src)/../../../base
@@ -34,6 +37,7 @@ MALI_USE_CSF ?= 0
 MALI_UNIT_TEST ?= 0
 MALI_KERNEL_TEST_API ?= 0
 MALI_COVERAGE ?= 0
+MALI_JIT_PRESSURE_LIMIT_BASE ?= 1
 CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
 # Experimental features (corresponding -D definition should be appended to
 # DEFINES below, e.g. for MALI_EXPERIMENTAL_FEATURE,
@@ -41,7 +45,6 @@ CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
 #
 # Experimental features must default to disabled, e.g.:
 # MALI_EXPERIMENTAL_FEATURE ?= 0
-MALI_JIT_PRESSURE_LIMIT ?= 0
 MALI_INCREMENTAL_RENDERING ?= 0
 
 # Set up our defines, which will be passed to gcc
@@ -52,7 +55,7 @@ DEFINES = \
 	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
 	-DMALI_COVERAGE=$(MALI_COVERAGE) \
 	-DMALI_RELEASE_NAME=\"$(MALI_RELEASE_NAME)\" \
-	-DMALI_JIT_PRESSURE_LIMIT=$(MALI_JIT_PRESSURE_LIMIT) \
+	-DMALI_JIT_PRESSURE_LIMIT_BASE=$(MALI_JIT_PRESSURE_LIMIT_BASE) \
 	-DMALI_INCREMENTAL_RENDERING=$(MALI_INCREMENTAL_RENDERING)
 
 ifeq ($(KBUILD_EXTMOD),)
@@ -76,6 +79,7 @@ SRC := \
 	debug/mali_kbase_debug_ktrace.c \
 	device/mali_kbase_device.c \
 	mali_kbase_cache_policy.c \
+	mali_kbase_ccswe.c \
 	mali_kbase_mem.c \
 	mali_kbase_mem_pool_group.c \
 	mali_kbase_native_mgm.c \
@@ -86,7 +90,7 @@ SRC := \
 	mali_kbase_config.c \
 	mali_kbase_vinstr.c \
 	mali_kbase_hwcnt.c \
-	mali_kbase_hwcnt_backend_gpu.c \
+	mali_kbase_hwcnt_backend_jm.c \
 	mali_kbase_hwcnt_gpu.c \
 	mali_kbase_hwcnt_legacy.c \
 	mali_kbase_hwcnt_types.c \
@@ -111,12 +115,14 @@ SRC := \
 	mali_kbase_strings.c \
 	mali_kbase_as_fault_debugfs.c \
 	mali_kbase_regs_history_debugfs.c \
+	mali_power_gpu_frequency_trace.c \
 	thirdparty/mali_kbase_mmap.c \
 	tl/mali_kbase_timeline.c \
 	tl/mali_kbase_timeline_io.c \
 	tl/mali_kbase_tlstream.c \
 	tl/mali_kbase_tracepoints.c \
-	gpu/mali_kbase_gpu.c
+	gpu/mali_kbase_gpu.c \
+	mali_kbase_trace_gpu_mem.c
 
 ifeq ($(MALI_USE_CSF),1)
 	SRC += \
@@ -135,6 +141,7 @@ else
 		mali_kbase_jd_debugfs.c \
 		mali_kbase_js.c \
 		mali_kbase_js_ctx_attr.c \
+		mali_kbase_kinstr_jm.c \
 		debug/backend/mali_kbase_debug_ktrace_jm.c \
 		device/backend/mali_kbase_device_jm.c \
 		gpu/backend/mali_kbase_gpu_fault_jm.c \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 58a5b0b..ca59dbb 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -230,6 +230,10 @@ config MALI_DMA_BUF_LEGACY_COMPAT
 	  maintenance where MALI_DMA_BUF_MAP_ON_DEMAND would do the mapping,
 	  including a cache flush.
 
+	  This option might work-around issues related to missing cache
+	  flushes in other drivers. This only has an effect for clients using
+	  UK 11.18 or older. For later UK versions it is not possible.
+
 config MALI_HW_ERRATA_1485982_NOT_AFFECTED
 	bool "Disable workaround for BASE_HW_ISSUE_GPU2017_1336"
 	depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 2449e80..0b3e073 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2014-2019 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -34,7 +34,8 @@ BACKEND += \
 	backend/gpu/mali_kbase_pm_coarse_demand.c \
 	backend/gpu/mali_kbase_pm_policy.c \
 	backend/gpu/mali_kbase_time.c \
-	backend/gpu/mali_kbase_l2_mmu_config.c
+	backend/gpu/mali_kbase_l2_mmu_config.c \
+	backend/gpu/mali_kbase_clk_rate_trace_mgr.c
 
 ifeq ($(MALI_USE_CSF),1)
 # empty
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
new file mode 100644
index 0000000..18bb117
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -0,0 +1,280 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of the GPU clock rate trace manager.
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_config_defaults.h>
+#include <linux/clk.h>
+#include <asm/div64.h>
+#include "mali_kbase_clk_rate_trace_mgr.h"
+
+#ifdef CONFIG_TRACE_POWER_GPU_FREQUENCY
+#include <trace/events/power_gpu_frequency.h>
+#else
+#include "mali_power_gpu_frequency_trace.h"
+#endif
+
+#ifndef CLK_RATE_TRACE_OPS
+#define CLK_RATE_TRACE_OPS (NULL)
+#endif
+
+static int gpu_clk_rate_change_notifier(struct notifier_block *nb,
+			unsigned long event, void *data)
+{
+	struct kbase_gpu_clk_notifier_data *ndata = data;
+	struct kbase_clk_data *clk_data =
+		container_of(nb, struct kbase_clk_data, clk_rate_change_nb);
+	struct kbase_clk_rate_trace_manager *clk_rtm = clk_data->clk_rtm;
+	unsigned long flags;
+
+	if (WARN_ON_ONCE(clk_data->gpu_clk_handle != ndata->gpu_clk_handle))
+		return NOTIFY_BAD;
+
+	spin_lock_irqsave(&clk_rtm->lock, flags);
+	if (event == POST_RATE_CHANGE) {
+		if (!clk_rtm->gpu_idle &&
+		    (clk_data->clock_val != ndata->new_rate)) {
+			kbase_clk_rate_trace_manager_notify_all(
+				clk_rtm, clk_data->index, ndata->new_rate);
+		}
+
+		clk_data->clock_val = ndata->new_rate;
+	}
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
+
+	return NOTIFY_DONE;
+}
+
+static int gpu_clk_data_init(struct kbase_device *kbdev,
+		void *gpu_clk_handle, unsigned int index)
+{
+	struct kbase_clk_rate_trace_op_conf *callbacks =
+		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+	struct kbase_clk_data *clk_data;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	int ret = 0;
+
+	if (WARN_ON(!callbacks) ||
+	    WARN_ON(!gpu_clk_handle) ||
+	    WARN_ON(index >= BASE_MAX_NR_CLOCKS_REGULATORS))
+		return -EINVAL;
+
+	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data) {
+		dev_err(kbdev->dev, "Failed to allocate data for clock enumerated at index %u", index);
+		return -ENOMEM;
+	}
+
+	clk_data->index = (u8)index;
+	clk_data->gpu_clk_handle = gpu_clk_handle;
+	/* Store the initial value of clock */
+	clk_data->clock_val =
+		callbacks->get_gpu_clk_rate(kbdev, gpu_clk_handle);
+
+	{
+		/* At the initialization time, GPU is powered off. */
+		unsigned long flags;
+
+		spin_lock_irqsave(&clk_rtm->lock, flags);
+		kbase_clk_rate_trace_manager_notify_all(
+			clk_rtm, clk_data->index, 0);
+		spin_unlock_irqrestore(&clk_rtm->lock, flags);
+	}
+
+	clk_data->clk_rtm = clk_rtm;
+	clk_rtm->clks[index] = clk_data;
+
+	clk_data->clk_rate_change_nb.notifier_call =
+			gpu_clk_rate_change_notifier;
+
+	ret = callbacks->gpu_clk_notifier_register(kbdev, gpu_clk_handle,
+			&clk_data->clk_rate_change_nb);
+	if (ret) {
+		dev_err(kbdev->dev, "Failed to register notifier for clock enumerated at index %u", index);
+		kfree(clk_data);
+	}
+
+	return ret;
+}
+
+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev)
+{
+	struct kbase_clk_rate_trace_op_conf *callbacks =
+		(struct kbase_clk_rate_trace_op_conf *)CLK_RATE_TRACE_OPS;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	unsigned int i;
+	int ret = 0;
+
+	/* Return early if no callbacks provided for clock rate tracing */
+	if (!callbacks)
+		return 0;
+
+	spin_lock_init(&clk_rtm->lock);
+	INIT_LIST_HEAD(&clk_rtm->listeners);
+
+	clk_rtm->gpu_idle = true;
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		void *gpu_clk_handle =
+			callbacks->enumerate_gpu_clk(kbdev, i);
+
+		if (!gpu_clk_handle)
+			break;
+
+		ret = gpu_clk_data_init(kbdev, gpu_clk_handle, i);
+		if (ret)
+			goto error;
+	}
+
+	/* Activate clock rate trace manager if at least one GPU clock was
+	 * enumerated.
+	 */
+	if (i)
+		WRITE_ONCE(clk_rtm->clk_rate_trace_ops, callbacks);
+	else
+		dev_info(kbdev->dev, "No clock(s) available for rate tracing");
+
+	return 0;
+
+error:
+	while (i--) {
+		clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
+				kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+				&clk_rtm->clks[i]->clk_rate_change_nb);
+		kfree(clk_rtm->clks[i]);
+	}
+
+	return ret;
+}
+
+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev)
+{
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	unsigned int i;
+
+	WARN_ON(!list_empty(&clk_rtm->listeners));
+
+	if (!clk_rtm->clk_rate_trace_ops)
+		return;
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (!clk_rtm->clks[i])
+			break;
+
+		clk_rtm->clk_rate_trace_ops->gpu_clk_notifier_unregister(
+				kbdev, clk_rtm->clks[i]->gpu_clk_handle,
+				&clk_rtm->clks[i]->clk_rate_change_nb);
+		kfree(clk_rtm->clks[i]);
+	}
+
+	WRITE_ONCE(clk_rtm->clk_rate_trace_ops, NULL);
+}
+
+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
+{
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	unsigned int i;
+	unsigned long flags;
+
+	if (!clk_rtm->clk_rate_trace_ops)
+		return;
+
+	spin_lock_irqsave(&clk_rtm->lock, flags);
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		struct kbase_clk_data *clk_data = clk_rtm->clks[i];
+
+		if (!clk_data)
+			break;
+
+		if (unlikely(!clk_data->clock_val))
+			continue;
+
+		kbase_clk_rate_trace_manager_notify_all(
+			clk_rtm, clk_data->index, clk_data->clock_val);
+	}
+
+	clk_rtm->gpu_idle = false;
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev)
+{
+	struct kbase_clk_rate_trace_manager *clk_rtm = &kbdev->pm.clk_rtm;
+	unsigned int i;
+	unsigned long flags;
+
+	if (!clk_rtm->clk_rate_trace_ops)
+		return;
+
+	spin_lock_irqsave(&clk_rtm->lock, flags);
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		struct kbase_clk_data *clk_data = clk_rtm->clks[i];
+
+		if (!clk_data)
+			break;
+
+		if (unlikely(!clk_data->clock_val))
+			continue;
+
+		kbase_clk_rate_trace_manager_notify_all(
+			clk_rtm, clk_data->index, 0);
+	}
+
+	clk_rtm->gpu_idle = true;
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+void kbase_clk_rate_trace_manager_notify_all(
+	struct kbase_clk_rate_trace_manager *clk_rtm,
+	u32 clk_index,
+	unsigned long new_rate)
+{
+	struct kbase_clk_rate_listener *pos;
+	struct kbase_device *kbdev;
+
+	lockdep_assert_held(&clk_rtm->lock);
+
+	kbdev = container_of(clk_rtm, struct kbase_device, pm.clk_rtm);
+
+	dev_dbg(kbdev->dev, "GPU clock %u rate changed to %lu",
+		clk_index, new_rate);
+
+	/* Raise standard `power/gpu_frequency` ftrace event */
+	{
+		unsigned long new_rate_khz = new_rate;
+
+		do_div(new_rate_khz, 1000);
+		trace_gpu_frequency(new_rate_khz, clk_index);
+	}
+
+	/* Notify the listeners. */
+	list_for_each_entry(pos, &clk_rtm->listeners, node) {
+		pos->notify(pos, clk_index, new_rate);
+	}
+}
+KBASE_EXPORT_TEST_API(kbase_clk_rate_trace_manager_notify_all);
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
new file mode 100644
index 0000000..dcafb26
--- /dev/null
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.h
@@ -0,0 +1,155 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CLK_RATE_TRACE_MGR_
+#define _KBASE_CLK_RATE_TRACE_MGR_
+
+/** The index of top clock domain in kbase_clk_rate_trace_manager:clks. */
+#define KBASE_CLOCK_DOMAIN_TOP (0)
+
+/** The index of shader-cores clock domain in
+ * kbase_clk_rate_trace_manager:clks.
+ */
+#define KBASE_CLOCK_DOMAIN_SHADER_CORES (1)
+
+/**
+ * struct kbase_clk_data - Data stored per enumerated GPU clock.
+ *
+ * @clk_rtm:            Pointer to clock rate trace manager object.
+ * @gpu_clk_handle:     Handle unique to the enumerated GPU clock.
+ * @plat_private:       Private data for the platform to store into
+ * @clk_rate_change_nb: notifier block containing the pointer to callback
+ *                      function that is invoked whenever the rate of
+ *                      enumerated GPU clock changes.
+ * @clock_val:          Current rate of the enumerated GPU clock.
+ * @index:              Index at which the GPU clock was enumerated.
+ */
+struct kbase_clk_data {
+	struct kbase_clk_rate_trace_manager *clk_rtm;
+	void *gpu_clk_handle;
+	void *plat_private;
+	struct notifier_block clk_rate_change_nb;
+	unsigned long clock_val;
+	u8 index;
+};
+
+/**
+ * kbase_clk_rate_trace_manager_init - Initialize GPU clock rate trace manager.
+ *
+ * @kbdev:      Device pointer
+ *
+ * Return: 0 if success, or an error code on failure.
+ */
+int kbase_clk_rate_trace_manager_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_term - Terminate GPU clock rate trace manager.
+ *
+ *  @kbdev:      Device pointer
+ */
+void kbase_clk_rate_trace_manager_term(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_gpu_active - Inform GPU clock rate trace
+ *                                           manager of GPU becoming active.
+ *
+ * @kbdev:      Device pointer
+ */
+void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_gpu_idle - Inform GPU clock rate trace
+ *                                         manager of GPU becoming idle.
+ * @kbdev:      Device pointer
+ */
+void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev);
+
+/**
+ * kbase_clk_rate_trace_manager_subscribe_no_lock() - Add freq change listener.
+ *
+ * @clk_rtm:    Clock rate manager instance.
+ * @listener:   Listener handle
+ *
+ * kbase_clk_rate_trace_manager:lock must be held by the caller.
+ */
+static inline void kbase_clk_rate_trace_manager_subscribe_no_lock(
+	struct kbase_clk_rate_trace_manager *clk_rtm,
+	struct kbase_clk_rate_listener *listener)
+{
+	lockdep_assert_held(&clk_rtm->lock);
+	list_add(&listener->node, &clk_rtm->listeners);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_subscribe() - Add freq change listener.
+ *
+ * @clk_rtm:    Clock rate manager instance.
+ * @listener:   Listener handle
+ */
+static inline void kbase_clk_rate_trace_manager_subscribe(
+	struct kbase_clk_rate_trace_manager *clk_rtm,
+	struct kbase_clk_rate_listener *listener)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_rtm->lock, flags);
+	kbase_clk_rate_trace_manager_subscribe_no_lock(
+		clk_rtm, listener);
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_unsubscribe() - Remove freq change listener.
+ *
+ * @clk_rtm:    Clock rate manager instance.
+ * @listener:   Listener handle
+ */
+static inline void kbase_clk_rate_trace_manager_unsubscribe(
+	struct kbase_clk_rate_trace_manager *clk_rtm,
+	struct kbase_clk_rate_listener *listener)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&clk_rtm->lock, flags);
+	list_del(&listener->node);
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
+}
+
+/**
+ * kbase_clk_rate_trace_manager_notify_all() - Notify all clock \
+ *                                             rate listeners.
+ *
+ * @clk_rtm:     Clock rate manager instance.
+ * @clk_index:   Clock index.
+ * @new_rate:    New clock frequency(Hz)
+ *
+ * kbase_clk_rate_trace_manager:lock must be locked.
+ * This function is exported to be used by clock rate trace test
+ * portal.
+ */
+void kbase_clk_rate_trace_manager_notify_all(
+	struct kbase_clk_rate_trace_manager *clk_rtm,
+	u32 clock_index,
+	unsigned long new_rate);
+
+#endif /* _KBASE_CLK_RATE_TRACE_MGR_ */
+
diff --git a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
index 8b320c7..f9c2ec7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_instr_backend.c
@@ -87,7 +87,7 @@ int kbase_instr_hwcnt_enable_internal(struct kbase_device *kbdev,
 					enable->dump_buffer >> 32);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_JM_EN),
-					enable->jm_bm);
+					enable->fe_bm);
 
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(PRFCNT_SHADER_EN),
 					enable->shader_bm);
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index 21b2aa2..8696c6a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -79,8 +79,6 @@ static irqreturn_t kbase_job_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-KBASE_EXPORT_TEST_API(kbase_job_irq_handler);
-
 static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
 {
 	unsigned long flags;
@@ -177,7 +175,7 @@ static irq_handler_t kbase_handler_table[] = {
  * Return: IRQ_HANDLED if the requests are from the GPU device,
  *         IRQ_NONE otherwise
  */
-static irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
+irqreturn_t kbase_gpu_irq_test_handler(int irq, void *data, u32 val)
 {
 	struct kbase_device *kbdev = kbase_untag(data);
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index fa6bc83..73c4f6b 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -33,6 +33,7 @@
 #include <mali_kbase_hwaccess_jm.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_ctx_sched.h>
+#include <mali_kbase_kinstr_jm.h>
 #include <mali_kbase_hwcnt_context.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
@@ -277,6 +278,7 @@ void kbase_job_hw_submit(struct kbase_device *kbdev,
 			katom,
 			&kbdev->gpu_props.props.raw_props.js_features[js],
 			"ctx_nr,atom_nr");
+	kbase_kinstr_jm_atom_hw_submit(katom);
 #ifdef CONFIG_GPU_TRACEPOINTS
 	if (!kbase_backend_nr_atoms_submitted(kbdev, js)) {
 		/* If this is the only job on the slot, trace it as starting */
@@ -692,12 +694,40 @@ void kbase_backend_jm_kill_running_jobs_from_kctx(struct kbase_context *kctx)
 		kbase_job_slot_hardstop(kctx, i, NULL);
 }
 
+/**
+ * kbase_is_existing_atom_submitted_later_than_ready
+ * @ready: sequence number of the ready atom
+ * @existing: sequence number of the existing atom
+ *
+ * Returns true if the existing atom has been submitted later than the
+ * ready atom. It is used to understand if an atom that is ready has been
+ * submitted earlier than the currently running atom, so that the currently
+ * running atom should be preempted to allow the ready atom to run.
+ */
+static inline bool kbase_is_existing_atom_submitted_later_than_ready(u64 ready, u64 existing)
+{
+	/* No seq_nr set? */
+	if (!ready || !existing)
+		return false;
+
+	/* Efficiently handle the unlikely case of wrapping.
+	 * The following code assumes that the delta between the sequence number
+	 * of the two atoms is less than INT64_MAX.
+	 * In the extremely unlikely case where the delta is higher, the comparison
+	 * defaults for no preemption.
+	 * The code also assumes that the conversion from unsigned to signed types
+	 * works because the signed integers are 2's complement.
+	 */
+	return (s64)(ready - existing) < 0;
+}
+
 void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 				struct kbase_jd_atom *target_katom)
 {
 	struct kbase_device *kbdev;
 	int js = target_katom->slot_nr;
 	int priority = target_katom->sched_priority;
+	int seq_nr = target_katom->seq_nr;
 	int i;
 	bool stop_sent = false;
 
@@ -719,7 +749,8 @@ void kbase_job_slot_ctx_priority_check_locked(struct kbase_context *kctx,
 				(katom->kctx != kctx))
 			continue;
 
-		if (katom->sched_priority > priority) {
+		if ((katom->sched_priority > priority) ||
+		    (katom->kctx == kctx && kbase_is_existing_atom_submitted_later_than_ready(seq_nr, katom->seq_nr))) {
 			if (!stop_sent)
 				KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITIZED(
 						kbdev,
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index ec7bcb1..8b409a0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -33,6 +33,7 @@
 #include <tl/mali_kbase_tracepoints.h>
 #include <mali_kbase_hwcnt_context.h>
 #include <mali_kbase_reset_gpu.h>
+#include <mali_kbase_kinstr_jm.h>
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -278,6 +279,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		break;
 
 	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		kbase_kinstr_jm_atom_hw_release(katom);
 		/* Inform power management at start/finish of atom so it can
 		 * update its GPU utilisation metrics. Mark atom as not
 		 * submitted beforehand. */
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index fcc0437..d2d11a3 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -37,7 +37,7 @@
 static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 {
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
-	s8 nr_running_ctxs;
+	int nr_running_ctxs;
 
 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
 
@@ -69,10 +69,10 @@ static inline bool timer_callback_should_run(struct kbase_device *kbdev)
 		 * don't check KBASEP_JS_CTX_ATTR_NON_COMPUTE).
 		 */
 		{
-			s8 nr_compute_ctxs =
+			int nr_compute_ctxs =
 				kbasep_js_ctx_attr_count_on_runpool(kbdev,
 						KBASEP_JS_CTX_ATTR_COMPUTE);
-			s8 nr_noncompute_ctxs = nr_running_ctxs -
+			int nr_noncompute_ctxs = nr_running_ctxs -
 							nr_compute_ctxs;
 
 			return (bool) (nr_compute_ctxs >= 2 ||
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index cb10518..a9c33e2 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -25,13 +25,13 @@
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 
-void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
-				u64 *system_time, struct timespec64 *ts)
+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
+					  u64 *cycle_counter,
+					  u64 *system_time,
+					  struct timespec64 *ts)
 {
 	u32 hi1, hi2;
 
-	kbase_pm_request_gpu_cycle_counter(kbdev);
-
 	if (cycle_counter) {
 		/* Read hi, lo, hi to ensure a coherent u64 */
 		do {
@@ -65,6 +65,13 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
 #else
 		ktime_get_raw_ts64(ts);
 #endif
+}
 
+void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
+				u64 *system_time, struct timespec64 *ts)
+{
+	kbase_pm_request_gpu_cycle_counter(kbdev);
+	kbase_backend_get_gpu_time_norequest(
+		kbdev, cycle_counter, system_time, ts);
 	kbase_pm_release_gpu_cycle_counter(kbdev);
 }
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 2cd2551..5d5b639 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -30,6 +30,7 @@
 #include <mali_kbase.h>
 #include <mali_kbase_ctx_sched.h>
 #include <mali_kbase_dma_fence.h>
+#include <mali_kbase_kinstr_jm.h>
 #include <mali_kbase_mem_linux.h>
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
@@ -70,6 +71,21 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx)
 KBASE_EXPORT_SYMBOL(kbase_context_debugfs_term);
 #endif /* CONFIG_DEBUG_FS */
 
+static int kbase_context_kbase_kinstr_jm_init(struct kbase_context *kctx)
+{
+	int ret = kbase_kinstr_jm_init(&kctx->kinstr_jm);
+
+	if (!ret)
+		return ret;
+
+	return 0;
+}
+
+static void kbase_context_kbase_kinstr_jm_term(struct kbase_context *kctx)
+{
+	kbase_kinstr_jm_term(kctx->kinstr_jm);
+}
+
 static int kbase_context_kbase_timer_setup(struct kbase_context *kctx)
 {
 	kbase_timer_setup(&kctx->soft_job_timeout,
@@ -122,6 +138,8 @@ static const struct kbase_context_init context_init[] = {
 			"Sticky resource initialization failed"},
 	{kbase_jit_init, kbase_jit_term,
 			"JIT initialization failed"},
+	{kbase_context_kbase_kinstr_jm_init, kbase_context_kbase_kinstr_jm_term,
+			"JM instrumentation initialization failed"},
 	{kbase_context_kbase_timer_setup, NULL, NULL},
 	{kbase_context_submit_check, NULL, NULL},
 };
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 93fe431..5c27224 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -36,9 +36,99 @@
 #include <mmu/mali_kbase_mmu.h>
 #include <context/mali_kbase_context_internal.h>
 
+/**
+ * find_process_node - Used to traverse the process rb_tree to find if
+ *                     process exists already in process rb_tree.
+ *
+ * @node: Pointer to root node to start search.
+ * @tgid: Thread group PID to search for.
+ *
+ * Return: Pointer to kbase_process if exists otherwise NULL.
+ */
+static struct kbase_process *find_process_node(struct rb_node *node, pid_t tgid)
+{
+	struct kbase_process *kprcs = NULL;
+
+	/* Check if the kctx creation request is from a existing process.*/
+	while (node) {
+		struct kbase_process *prcs_node =
+			rb_entry(node, struct kbase_process, kprcs_node);
+		if (prcs_node->tgid == tgid) {
+			kprcs = prcs_node;
+			break;
+		}
+
+		if (tgid < prcs_node->tgid)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+
+	return kprcs;
+}
+
+/**
+ * kbase_insert_kctx_to_process - Initialise kbase process context.
+ *
+ * @kctx: Pointer to kbase context.
+ *
+ * Here we initialise per process rb_tree managed by kbase_device.
+ * We maintain a rb_tree of each unique process that gets created.
+ * and Each process maintains a list of kbase context.
+ * This setup is currently used by kernel trace functionality
+ * to trace and visualise gpu memory consumption.
+ *
+ * Return: 0 on success and error number on failure.
+ */
+static int kbase_insert_kctx_to_process(struct kbase_context *kctx)
+{
+	struct rb_root *const prcs_root = &kctx->kbdev->process_root;
+	const pid_t tgid = kctx->tgid;
+	struct kbase_process *kprcs = NULL;
+
+	lockdep_assert_held(&kctx->kbdev->kctx_list_lock);
+
+	kprcs = find_process_node(prcs_root->rb_node, tgid);
+
+	/* if the kctx is from new process then create a new kbase_process
+	 * and add it to the &kbase_device->rb_tree
+	 */
+	if (!kprcs) {
+		struct rb_node **new = &prcs_root->rb_node, *parent = NULL;
+
+		kprcs = kzalloc(sizeof(*kprcs), GFP_KERNEL);
+		if (kprcs == NULL)
+			return -ENOMEM;
+		kprcs->tgid = tgid;
+		INIT_LIST_HEAD(&kprcs->kctx_list);
+		kprcs->dma_buf_root = RB_ROOT;
+		kprcs->total_gpu_pages = 0;
+
+		while (*new) {
+			struct kbase_process *prcs_node;
+
+			parent = *new;
+			prcs_node = rb_entry(parent, struct kbase_process,
+					     kprcs_node);
+			if (tgid < prcs_node->tgid)
+				new = &(*new)->rb_left;
+			else
+				new = &(*new)->rb_right;
+		}
+		rb_link_node(&kprcs->kprcs_node, parent, new);
+		rb_insert_color(&kprcs->kprcs_node, prcs_root);
+	}
+
+	kctx->kprcs = kprcs;
+	list_add(&kctx->kprcs_link, &kprcs->kctx_list);
+
+	return 0;
+}
+
 int kbase_context_common_init(struct kbase_context *kctx)
 {
 	const unsigned long cookies_mask = KBASE_COOKIE_MASK;
+	int err = 0;
 
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
@@ -81,13 +171,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
 	list_add(&kctx->kctx_list_link, &kctx->kbdev->kctx_list);
 
+	err = kbase_insert_kctx_to_process(kctx);
+	if (err)
+		dev_err(kctx->kbdev->dev,
+		"(err:%d) failed to insert kctx to kbase_process\n", err);
+
 	KBASE_TLSTREAM_TL_KBASE_NEW_CTX(kctx->kbdev, kctx->id,
 		kctx->kbdev->gpu_props.props.raw_props.gpu_id);
 	KBASE_TLSTREAM_TL_NEW_CTX(kctx->kbdev, kctx, kctx->id,
 			(u32)(kctx->tgid));
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
-	return 0;
+	return err;
+}
+
+/**
+ * kbase_remove_kctx_from_process - remove a terminating context from
+ *                                    the process list.
+ *
+ * @kctx: Pointer to kbase context.
+ *
+ * Remove the tracking of context from the list of contexts maintained under
+ * kbase process and if the list if empty then there no outstanding contexts
+ * we can remove the process node as well.
+ */
+
+static void kbase_remove_kctx_from_process(struct kbase_context *kctx)
+{
+	struct kbase_process *kprcs = kctx->kprcs;
+
+	lockdep_assert_held(&kctx->kbdev->kctx_list_lock);
+	list_del(&kctx->kprcs_link);
+
+	/* if there are no outstanding contexts in current process node,
+	 * we can remove it from the process rb_tree.
+	 */
+	if (list_empty(&kprcs->kctx_list)) {
+		rb_erase(&kprcs->kprcs_node, &kctx->kbdev->process_root);
+		/* Add checks, so that the terminating process Should not
+		 * hold any gpu_memory.
+		 */
+		WARN_ON(kprcs->total_gpu_pages);
+		WARN_ON(!RB_EMPTY_ROOT(&kprcs->dma_buf_root));
+		kfree(kprcs);
+	}
 }
 
 void kbase_context_common_term(struct kbase_context *kctx)
@@ -109,6 +236,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	WARN_ON(atomic_read(&kctx->nonmapped_pages) != 0);
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
+	kbase_remove_kctx_from_process(kctx);
 
 	KBASE_TLSTREAM_TL_KBASE_DEL_CTX(kctx->kbdev, kctx->id);
 
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index fbba2e7..2a45a33 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -43,6 +43,7 @@
 #include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_dummy_job_wa.h>
+#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 
 /**
  * kbase_backend_late_init - Perform any backend-specific initialization.
@@ -178,8 +179,11 @@ static const struct kbase_device_init dev_init[] = {
 			"Job JS devdata initialization failed"},
 	{kbase_device_timeline_init, kbase_device_timeline_term,
 			"Timeline stream initialization failed"},
-	{kbase_device_hwcnt_backend_gpu_init,
-			kbase_device_hwcnt_backend_gpu_term,
+	{kbase_clk_rate_trace_manager_init,
+			kbase_clk_rate_trace_manager_term,
+			"Clock rate trace manager initialization failed"},
+	{kbase_device_hwcnt_backend_jm_init,
+			kbase_device_hwcnt_backend_jm_term,
 			"GPU hwcnt backend creation failed"},
 	{kbase_device_hwcnt_context_init, kbase_device_hwcnt_context_term,
 			"GPU hwcnt context initialization failed"},
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index 76f14e5..d0b85ba 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -271,14 +271,14 @@ void kbase_increment_device_id(void)
 	kbase_dev_nr++;
 }
 
-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev)
+int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev)
 {
-	return kbase_hwcnt_backend_gpu_create(kbdev, &kbdev->hwcnt_gpu_iface);
+	return kbase_hwcnt_backend_jm_create(kbdev, &kbdev->hwcnt_gpu_iface);
 }
 
-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev)
+void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev)
 {
-	kbase_hwcnt_backend_gpu_destroy(&kbdev->hwcnt_gpu_iface);
+	kbase_hwcnt_backend_jm_destroy(&kbdev->hwcnt_gpu_iface);
 }
 
 int kbase_device_hwcnt_context_init(struct kbase_device *kbdev)
diff --git a/mali_kbase/device/mali_kbase_device_internal.h b/mali_kbase/device/mali_kbase_device_internal.h
index 9f96db0..5464458 100644
--- a/mali_kbase/device/mali_kbase_device_internal.h
+++ b/mali_kbase/device/mali_kbase_device_internal.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,8 +43,8 @@ void kbase_device_vinstr_term(struct kbase_device *kbdev);
 int kbase_device_timeline_init(struct kbase_device *kbdev);
 void kbase_device_timeline_term(struct kbase_device *kbdev);
 
-int kbase_device_hwcnt_backend_gpu_init(struct kbase_device *kbdev);
-void kbase_device_hwcnt_backend_gpu_term(struct kbase_device *kbdev);
+int kbase_device_hwcnt_backend_jm_init(struct kbase_device *kbdev);
+void kbase_device_hwcnt_backend_jm_term(struct kbase_device *kbdev);
 
 int kbase_device_hwcnt_context_init(struct kbase_device *kbdev);
 void kbase_device_hwcnt_context_term(struct kbase_device *kbdev);
diff --git a/mali_kbase/jm/mali_base_jm_kernel.h b/mali_kbase/jm/mali_base_jm_kernel.h
index 879a436..ce36020 100644
--- a/mali_kbase/jm/mali_base_jm_kernel.h
+++ b/mali_kbase/jm/mali_base_jm_kernel.h
@@ -155,18 +155,23 @@
 /* Use the GPU VA chosen by the kernel client */
 #define BASE_MEM_FLAG_MAP_FIXED ((base_mem_alloc_flags)1 << 27)
 
+/* Bit 28 reserved for Kernel side cache sync ops flag */
+
+/* Force trimming of JIT allocations when creating a new allocation */
+#define BASEP_MEM_PERFORM_JIT_TRIM ((base_mem_alloc_flags)1 << 29)
+
 /* Number of bits used as flags for base memory management
  *
  * Must be kept in sync with the base_mem_alloc_flags flags
  */
-#define BASE_MEM_FLAGS_NR_BITS 28
+#define BASE_MEM_FLAGS_NR_BITS 30
 
 /* A mask of all the flags which are only valid for allocations within kbase,
  * and may not be passed from user space.
  */
 #define BASEP_MEM_FLAGS_KERNEL_ONLY \
 	(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | \
-	 BASE_MEM_FLAG_MAP_FIXED)
+	 BASE_MEM_FLAG_MAP_FIXED | BASEP_MEM_PERFORM_JIT_TRIM)
 
 /* A mask for all output bits, excluding IN/OUT bits.
  */
@@ -192,6 +197,28 @@
 #define BASE_MEM_FIRST_FREE_ADDRESS            ((BITS_PER_LONG << 12) + \
 						BASE_MEM_COOKIE_BASE)
 
+/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
+ * initial commit is aligned to 'extent' pages, where 'extent' must be a power
+ * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
+ */
+#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
+
+/**
+ * If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE  (1 << 1)
+
+/**
+ * Valid set of just-in-time memory allocation flags
+ *
+ * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
+ * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
+ * and heap_info_gpu_addr being 0 will be rejected).
+ */
+#define BASE_JIT_ALLOC_VALID_FLAGS \
+	(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
+
 /**
  * typedef base_context_create_flags - Flags to pass to ::base_context_init.
  *
@@ -787,6 +814,54 @@ struct base_jd_atom_v2 {
 	u8 padding[7];
 };
 
+/**
+ * struct base_jd_atom - Same as base_jd_atom_v2, but has an extra seq_nr
+ *                          at the beginning.
+ *
+ * @seq_nr:        Sequence number of logical grouping of atoms.
+ * @jc:            GPU address of a job chain or (if BASE_JD_REQ_END_RENDERPASS
+ *                 is set in the base_jd_core_req) the CPU address of a
+ *                 base_jd_fragment object.
+ * @udata:         User data.
+ * @extres_list:   List of external resources.
+ * @nr_extres:     Number of external resources or JIT allocations.
+ * @jit_id:        Zero-terminated array of IDs of just-in-time memory
+ *                 allocations written to by the atom. When the atom
+ *                 completes, the value stored at the
+ *                 &struct_base_jit_alloc_info.heap_info_gpu_addr of
+ *                 each allocation is read in order to enforce an
+ *                 overall physical memory usage limit.
+ * @pre_dep:       Pre-dependencies. One need to use SETTER function to assign
+ *                 this field; this is done in order to reduce possibility of
+ *                 improper assignment of a dependency field.
+ * @atom_number:   Unique number to identify the atom.
+ * @prio:          Atom priority. Refer to base_jd_prio for more details.
+ * @device_nr:     Core group when BASE_JD_REQ_SPECIFIC_COHERENT_GROUP
+ *                 specified.
+ * @jobslot:       Job slot to use when BASE_JD_REQ_JOB_SLOT is specified.
+ * @core_req:      Core requirements.
+ * @renderpass_id: Renderpass identifier used to associate an atom that has
+ *                 BASE_JD_REQ_START_RENDERPASS set in its core requirements
+ *                 with an atom that has BASE_JD_REQ_END_RENDERPASS set.
+ * @padding:       Unused. Must be zero.
+ */
+typedef struct base_jd_atom {
+	u64 seq_nr;
+	u64 jc;
+	struct base_jd_udata udata;
+	u64 extres_list;
+	u16 nr_extres;
+	u8 jit_id[2];
+	struct base_dependency pre_dep[2];
+	base_atom_id atom_number;
+	base_jd_prio prio;
+	u8 device_nr;
+	u8 jobslot;
+	base_jd_core_req core_req;
+	u8 renderpass_id;
+	u8 padding[7];
+} base_jd_atom;
+
 /* Job chain event code bits
  * Defines the bits used to create ::base_jd_event_code
  */
@@ -982,7 +1057,7 @@ struct base_jd_event_v2 {
  *                                     jobs.
  *
  * This structure is stored into the memory pointed to by the @jc field
- * of &struct base_jd_atom_v2.
+ * of &struct base_jd_atom.
  *
  * It must not occupy the same CPU cache line(s) as any neighboring data.
  * This is to avoid cases where access to pages containing the structure
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index aac561b..307a342 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -496,9 +496,9 @@ struct kbase_jd_atom {
 	struct list_head jd_item;
 	bool in_jd_list;
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	u8 jit_ids[2];
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	u16 nr_extres;
 	struct kbase_ext_res *extres;
@@ -608,6 +608,9 @@ struct kbase_jd_atom {
 
 	atomic_t blocked;
 
+	/* user-space sequence number, to order atoms in some temporal order */
+	u64 seq_nr;
+
 	struct kbase_jd_atom *pre_dep;
 	struct kbase_jd_atom *post_dep;
 
diff --git a/mali_kbase/jm/mali_kbase_jm_ioctl.h b/mali_kbase/jm/mali_kbase_jm_ioctl.h
index 408e98e..6dc57d0 100644
--- a/mali_kbase/jm/mali_kbase_jm_ioctl.h
+++ b/mali_kbase/jm/mali_kbase_jm_ioctl.h
@@ -94,16 +94,32 @@
  * - The above changes are checked for safe values in usual builds
  * 11.21:
  * - v2.0 of mali_trace debugfs file, which now versions the file separately
+ * 11.22:
+ * - Added base_jd_atom (v3), which is seq_nr + base_jd_atom_v2.
+ *   KBASE_IOCTL_JOB_SUBMIT supports both in parallel.
+ * 11.23:
+ * - Modified KBASE_IOCTL_MEM_COMMIT behavior to reject requests to modify
+ *   the physical memory backing of JIT allocations. This was not supposed
+ *   to be a valid use case, but it was allowed by the previous implementation.
+ * 11.24:
+ * - Added a sysfs file 'serialize_jobs' inside a new sub-directory
+ *   'scheduling'.
+ * 11.25:
+ * - Enabled JIT pressure limit in base/kbase by default
+ * 11.26
+ * - Added kinstr_jm API
+ * 11.27
+ * - Backwards compatible extension to HWC ioctl.
  */
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 21
+#define BASE_UK_VERSION_MINOR 27
 
 /**
  * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
  *
- * @addr: Memory address of an array of struct base_jd_atom_v2
+ * @addr: Memory address of an array of struct base_jd_atom_v2 or v3
  * @nr_atoms: Number of entries in the array
- * @stride: sizeof(struct base_jd_atom_v2)
+ * @stride: sizeof(struct base_jd_atom_v2) or sizeof(struct base_jd_atom)
  */
 struct kbase_ioctl_job_submit {
 	__u64 addr;
@@ -132,5 +148,47 @@ struct kbase_ioctl_soft_event_update {
 #define KBASE_IOCTL_SOFT_EVENT_UPDATE \
 	_IOW(KBASE_IOCTL_TYPE, 28, struct kbase_ioctl_soft_event_update)
 
+/**
+ * struct kbase_kinstr_jm_fd_out - Explains the compatibility information for
+ * the `struct kbase_kinstr_jm_atom_state_change` structure returned from the
+ * kernel
+ *
+ * @size:    The size of the `struct kbase_kinstr_jm_atom_state_change`
+ * @version: Represents a breaking change in the
+ *           `struct kbase_kinstr_jm_atom_state_change`
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ *
+ * The `struct kbase_kinstr_jm_atom_state_change` may have extra members at the
+ * end of the structure that older user space might not understand. If the
+ * `version` is the same, the structure is still compatible with newer kernels.
+ * The `size` can be used to cast the opaque memory returned from the kernel.
+ */
+struct kbase_kinstr_jm_fd_out {
+	__u16 size;
+	__u8 version;
+	__u8 padding[5];
+};
+
+/**
+ * struct kbase_kinstr_jm_fd_in - Options when creating the file descriptor
+ *
+ * @count: Number of atom states that can be stored in the kernel circular
+ *         buffer. Must be a power of two
+ * @padding: Explicit padding to get the structure up to 64bits. See
+ * https://www.kernel.org/doc/Documentation/ioctl/botching-up-ioctls.rst
+ */
+struct kbase_kinstr_jm_fd_in {
+	__u16 count;
+	__u8 padding[6];
+};
+
+union kbase_kinstr_jm_fd {
+	struct kbase_kinstr_jm_fd_in in;
+	struct kbase_kinstr_jm_fd_out out;
+};
+
+#define KBASE_IOCTL_KINSTR_JM_FD \
+	_IOWR(KBASE_IOCTL_TYPE, 51, union kbase_kinstr_jm_fd)
 
 #endif /* _KBASE_JM_IOCTL_H_ */
diff --git a/mali_kbase/mali_base_kernel.h b/mali_kbase/mali_base_kernel.h
index 1e2744d..d45092f 100644
--- a/mali_kbase/mali_base_kernel.h
+++ b/mali_kbase/mali_base_kernel.h
@@ -213,28 +213,6 @@ struct base_mem_aliasing_info {
  */
 #define BASE_JIT_ALLOC_COUNT (255)
 
-/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
- * initial commit is aligned to 'extent' pages, where 'extent' must be a power
- * of 2 and no more than BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES
- */
-#define BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP  (1 << 0)
-
-/**
- * If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
- */
-#define BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE  (1 << 1)
-
-/**
- * Valid set of just-in-time memory allocation flags
- *
- * Note: BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE cannot be set if heap_info_gpu_addr
- * in %base_jit_alloc_info is 0 (atom with BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE set
- * and heap_info_gpu_addr being 0 will be rejected).
- */
-#define BASE_JIT_ALLOC_VALID_FLAGS \
-	(BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP | BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
-
 /* base_jit_alloc_info in use for kernel driver versions 10.2 to early 11.5
  *
  * jit_version is 1
diff --git a/mali_kbase/mali_gpu_mem_trace.h b/mali_kbase/mali_gpu_mem_trace.h
new file mode 100644
index 0000000..183e6c4
--- /dev/null
+++ b/mali_kbase/mali_gpu_mem_trace.h
@@ -0,0 +1,73 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gpu_mem
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_gpu_mem_trace
+
+#if !defined(_TRACE_MALI_GPU_MEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MALI_GPU_MEM_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * trace_gpu_mem_total
+ *
+ * The gpu_memory_total event indicates that there's an update to either the
+ * global or process total gpu memory counters.
+ *
+ * This event should be emitted whenever the kernel device driver allocates,
+ * frees, imports, unimports memory in the GPU addressable space.
+ *
+ * @gpu_id: Kbase device id.
+ * @pid: This is either the thread group ID of the process for which there was
+ *       an update in the GPU memory usage or 0 so as to indicate an update in
+ *       the device wide GPU memory usage.
+ * @size: GPU memory usage in bytes.
+ */
+TRACE_EVENT(gpu_mem_total,
+	TP_PROTO(uint32_t gpu_id, uint32_t pid, uint64_t size),
+
+	TP_ARGS(gpu_id, pid, size),
+
+	TP_STRUCT__entry(
+		__field(uint32_t, gpu_id)
+		__field(uint32_t, pid)
+		__field(uint64_t, size)
+	),
+
+	TP_fast_assign(
+		__entry->gpu_id = gpu_id;
+		__entry->pid = pid;
+		__entry->size = size;
+	),
+
+	TP_printk("gpu_id=%u pid=%u size=%llu",
+		__entry->gpu_id,
+		__entry->pid,
+		__entry->size)
+);
+#endif /* _TRACE_MALI_GPU_MEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 0445e0c..c623e7e 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -213,9 +213,9 @@ void kbase_jd_exit(struct kbase_context *kctx);
  * kbase_jd_submit - Submit atoms to the job dispatcher
  *
  * @kctx: The kbase context to submit to
- * @user_addr: The address in user space of the struct base_jd_atom_v2 array
+ * @user_addr: The address in user space of the struct base_jd_atom array
  * @nr_atoms: The number of atoms in the array
- * @stride: sizeof(struct base_jd_atom_v2)
+ * @stride: sizeof(struct base_jd_atom)
  * @uk6_atom: true if the atoms are legacy atoms (struct base_jd_atom_v2_uk6)
  *
  * Return: 0 on success or error code
@@ -457,7 +457,7 @@ void kbase_pm_metrics_stop(struct kbase_device *kbdev);
 
 /**
  * Return the atom's ID, as was originally supplied by userspace in
- * base_jd_atom_v2::atom_number
+ * base_jd_atom::atom_number
  */
 static inline int kbase_jd_atom_id(struct kbase_context *kctx, struct kbase_jd_atom *katom)
 {
diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h
new file mode 100644
index 0000000..b201a60
--- /dev/null
+++ b/mali_kbase/mali_kbase_caps.h
@@ -0,0 +1,65 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+
+/**
+ * @file mali_kbase_caps.h
+ *
+ * Driver Capability Queries.
+ */
+
+#ifndef _KBASE_CAPS_H_
+#define _KBASE_CAPS_H_
+
+#include <linux/types.h>
+
+typedef enum mali_kbase_cap {
+	MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
+	MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
+	MALI_KBASE_CAP_MEM_GROW_ON_GPF,
+	MALI_KBASE_CAP_MEM_PROTECTED,
+	MALI_KBASE_NUM_CAPS
+} mali_kbase_cap;
+
+extern bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap);
+
+static inline bool mali_kbase_supports_system_monitor(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_SYSTEM_MONITOR);
+}
+
+static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT);
+}
+
+static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF);
+}
+
+static inline bool mali_kbase_supports_mem_protected(unsigned long api_version)
+{
+	return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED);
+}
+
+#endif	/* __KBASE_CAPS_H_ */
diff --git a/mali_kbase/mali_kbase_ccswe.c b/mali_kbase/mali_kbase_ccswe.c
new file mode 100644
index 0000000..87d5aaa
--- /dev/null
+++ b/mali_kbase/mali_kbase_ccswe.c
@@ -0,0 +1,105 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_ccswe.h"
+#include "mali_kbase_linux.h"
+
+#include <linux/math64.h>
+#include <linux/time.h>
+
+static u64 kbasep_ccswe_cycle_at_no_lock(
+	struct kbase_ccswe *self, u64 timestamp_ns)
+{
+	s64 diff_s, diff_ns;
+	u32 gpu_freq;
+
+	lockdep_assert_held(&self->access);
+
+	diff_ns = timestamp_ns - self->timestamp_ns;
+	gpu_freq = diff_ns > 0 ? self->gpu_freq : self->prev_gpu_freq;
+
+	diff_s = div_s64(diff_ns, NSEC_PER_SEC);
+	diff_ns -= diff_s * NSEC_PER_SEC;
+
+	return self->cycles_elapsed + diff_s * gpu_freq
+		+ div_s64(diff_ns * gpu_freq, NSEC_PER_SEC);
+}
+
+void kbase_ccswe_init(struct kbase_ccswe *self)
+{
+	memset(self, 0, sizeof(*self));
+
+	spin_lock_init(&self->access);
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_init);
+
+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns)
+{
+	unsigned long flags;
+	u64 result;
+
+	spin_lock_irqsave(&self->access, flags);
+	result = kbasep_ccswe_cycle_at_no_lock(self, timestamp_ns);
+	spin_unlock_irqrestore(&self->access, flags);
+
+	return result;
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_cycle_at);
+
+void kbase_ccswe_freq_change(
+	struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&self->access, flags);
+
+	/* The time must go only forward. */
+	if (WARN_ON(timestamp_ns < self->timestamp_ns))
+		goto exit;
+
+	/* If this is the first frequency change, cycles_elapsed is zero. */
+	if (self->timestamp_ns)
+		self->cycles_elapsed = kbasep_ccswe_cycle_at_no_lock(
+			self, timestamp_ns);
+
+	self->timestamp_ns = timestamp_ns;
+	self->prev_gpu_freq = self->gpu_freq;
+	self->gpu_freq = gpu_freq;
+exit:
+	spin_unlock_irqrestore(&self->access, flags);
+}
+KBASE_EXPORT_TEST_API(kbase_ccswe_freq_change);
+
+void kbase_ccswe_reset(struct kbase_ccswe *self)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&self->access, flags);
+
+	self->timestamp_ns = 0;
+	self->cycles_elapsed = 0;
+	self->gpu_freq = 0;
+	self->prev_gpu_freq = 0;
+
+	spin_unlock_irqrestore(&self->access, flags);
+}
+
diff --git a/mali_kbase/mali_kbase_ccswe.h b/mali_kbase/mali_kbase_ccswe.h
new file mode 100644
index 0000000..3a7cf73
--- /dev/null
+++ b/mali_kbase/mali_kbase_ccswe.h
@@ -0,0 +1,97 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_CCSWE_H_
+#define _KBASE_CCSWE_H_
+
+#include <linux/spinlock.h>
+
+/**
+ * struct kbase_ccswe - Cycle count software estimator.
+ *
+ * @access:         Spinlock protecting this structure access.
+ * @timestamp_ns:   Timestamp(ns) when the last frequency change
+ *                  occurred.
+ * @cycles_elapsed: Number of cycles elapsed before the last frequency
+ *                  change
+ * @gpu_freq:       Current GPU frequency(Hz) value.
+ * @prev_gpu_freq:  Previous GPU frequency(Hz) before the last frequency
+ *                  change.
+ */
+struct kbase_ccswe {
+	spinlock_t access;
+	u64 timestamp_ns;
+	u64 cycles_elapsed;
+	u32 gpu_freq;
+	u32 prev_gpu_freq;
+};
+
+/**
+ * kbase_ccswe_init() - initialize the cycle count estimator.
+ *
+ * @self: Cycles count software estimator instance.
+ */
+void kbase_ccswe_init(struct kbase_ccswe *self);
+
+
+/**
+ * kbase_ccswe_cycle_at() - Estimate cycle count at given timestamp.
+ *
+ * @self: Cycles count software estimator instance.
+ * @timestamp_ns: The timestamp(ns) for cycle count estimation.
+ *
+ * The timestamp must be bigger than the timestamp of the penultimate
+ * frequency change. If only one frequency change occurred, the
+ * timestamp must be bigger than the timestamp of the frequency change.
+ * This is to allow the following code to be executed w/o synchronization.
+ * If lines below executed atomically, it is safe to assume that only
+ * one frequency change may happen in between.
+ *
+ *     u64 ts = ktime_get_raw_ns();
+ *     u64 cycle = kbase_ccswe_cycle_at(&ccswe, ts)
+ *
+ * Returns: estimated value of cycle count at a given time.
+ */
+u64 kbase_ccswe_cycle_at(struct kbase_ccswe *self, u64 timestamp_ns);
+
+/**
+ * kbase_ccswe_freq_change() - update GPU frequency.
+ *
+ * @self:         Cycles count software estimator instance.
+ * @timestamp_ns: Timestamp(ns) when frequency change occurred.
+ * @gpu_freq:     New GPU frequency value.
+ *
+ * The timestamp must be bigger than the timestamp of the previous
+ * frequency change. The function is to be called at the frequency
+ * change moment (not later).
+ */
+void kbase_ccswe_freq_change(
+	struct kbase_ccswe *self, u64 timestamp_ns, u32 gpu_freq);
+
+/**
+ * kbase_ccswe_reset() - reset estimator state
+ *
+ * @self:    Cycles count software estimator instance.
+ */
+void kbase_ccswe_reset(struct kbase_ccswe *self);
+
+#endif /* _KBASE_CCSWE_H_ */
diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h
index 69723ea..57456e2 100644
--- a/mali_kbase/mali_kbase_config.h
+++ b/mali_kbase/mali_kbase_config.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2017, 2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2017, 2019-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -223,6 +223,88 @@ struct kbase_pm_callback_conf {
 	int (*soft_reset_callback)(struct kbase_device *kbdev);
 };
 
+/* struct kbase_gpu_clk_notifier_data - Data for clock rate change notifier.
+ *
+ * Pointer to this structure is supposed to be passed to the gpu clock rate
+ * change notifier function. This structure is deliberately aligned with the
+ * common clock framework notification structure 'struct clk_notifier_data'
+ * and such alignment should be maintained.
+ *
+ * @gpu_clk_handle: Handle of the GPU clock for which notifier was registered.
+ * @old_rate:       Previous rate of this GPU clock.
+ * @new_rate:       New rate of this GPU clock.
+ */
+struct kbase_gpu_clk_notifier_data {
+	void *gpu_clk_handle;
+	unsigned long old_rate;
+	unsigned long new_rate;
+};
+
+/**
+ * kbase_clk_rate_trace_op_conf - Specifies GPU clock rate trace operations.
+ *
+ * Specifies the functions pointers for platform specific GPU clock rate trace
+ * operations. By default no functions are required.
+ */
+struct kbase_clk_rate_trace_op_conf {
+	/**
+	 * enumerate_gpu_clk - Enumerate a GPU clock on the given index
+	 * @kbdev - kbase_device pointer
+	 * @index - GPU clock index
+	 *
+	 * Returns a handle unique to the given GPU clock, or NULL if the clock
+	 * array has been exhausted at the given index value.
+	 *
+	 * Kbase will use this function pointer to enumerate the existence of a
+	 * GPU clock on the given index.
+	 */
+	void *(*enumerate_gpu_clk)(struct kbase_device *kbdev,
+		unsigned int index);
+
+	/**
+	 * get_gpu_clk_rate - Get the current rate for an enumerated clock.
+	 * @kbdev          - kbase_device pointer
+	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+	 *
+	 * Returns current rate of the GPU clock in unit of Hz.
+	 */
+	unsigned long (*get_gpu_clk_rate)(struct kbase_device *kbdev,
+		void *gpu_clk_handle);
+
+	/**
+	 * gpu_clk_notifier_register - Register a clock rate change notifier.
+	 * @kbdev          - kbase_device pointer
+	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+	 * @nb             - notifier block containing the callback function
+	 *                   pointer
+	 *
+	 * Returns 0 on success, negative error code otherwise.
+	 *
+	 * This function pointer is used to register a callback function that
+	 * is supposed to be invoked whenever the rate of clock corresponding
+	 * to @gpu_clk_handle changes.
+	 * @nb contains the pointer to callback function.
+	 * The callback function expects the pointer of type
+	 * 'struct kbase_gpu_clk_notifier_data' as the third argument.
+	 */
+	int (*gpu_clk_notifier_register)(struct kbase_device *kbdev,
+		void *gpu_clk_handle, struct notifier_block *nb);
+
+	/**
+	 * gpu_clk_notifier_unregister - Unregister clock rate change notifier
+	 * @kbdev          - kbase_device pointer
+	 * @gpu_clk_handle - Handle unique to the enumerated GPU clock
+	 * @nb             - notifier block containing the callback function
+	 *                   pointer
+	 *
+	 * This function pointer is used to unregister a callback function that
+	 * was previously registered to get notified of the change in rate
+	 * of clock corresponding to @gpu_clk_handle.
+	 */
+	void (*gpu_clk_notifier_unregister)(struct kbase_device *kbdev,
+		void *gpu_clk_handle, struct notifier_block *nb);
+};
+
 #ifdef CONFIG_OF
 struct kbase_platform_config {
 };
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index fb2353e..83a22d9 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -55,6 +55,7 @@
 #include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_device_internal.h>
 #include "mali_kbase_ioctl.h"
+#include "mali_kbase_kinstr_jm.h"
 #include "mali_kbase_hwcnt_context.h"
 #include "mali_kbase_hwcnt_virtualizer.h"
 #include "mali_kbase_hwcnt_legacy.h"
@@ -114,6 +115,8 @@
 #include <device/mali_kbase_device.h>
 #include <context/mali_kbase_context.h>
 
+#include <mali_kbase_caps.h>
+
 /* GPU IRQ Tags */
 #define	JOB_IRQ_TAG	0
 #define MMU_IRQ_TAG	1
@@ -122,6 +125,82 @@
 #define KERNEL_SIDE_DDK_VERSION_STRING "K:" MALI_RELEASE_NAME "(GPL)"
 
 /**
+ * Kernel min/maj <=> API Version
+ */
+#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
+					 (((minor) & 0xFFF) << 8) | \
+					 ((0 & 0xFF) << 0))
+
+#define KBASE_API_MIN(api_version) ((api_version >> 8) & 0xFFF)
+#define KBASE_API_MAJ(api_version) ((api_version >> 20) & 0xFFF)
+
+/**
+ * mali_kbase_api_version_to_maj_min - convert an api_version to a min/maj pair
+ *
+ * @api_version: API version to convert
+ * @major:  Major version number (must not exceed 12 bits)
+ * @minor:  Major version number (must not exceed 12 bits)
+ */
+void mali_kbase_api_version_to_maj_min(unsigned long api_version, u16 *maj, u16 *min)
+{
+	if (WARN_ON(!maj))
+		return;
+
+	if (WARN_ON(!min))
+		return;
+
+	*maj = KBASE_API_MAJ(api_version);
+	*min = KBASE_API_MIN(api_version);
+}
+
+/**
+ * kbase capabilities table
+ */
+typedef struct mali_kbase_capability_def {
+	u16 required_major;
+	u16 required_minor;
+} mali_kbase_capability_def;
+
+/**
+ * This must be kept in-sync with mali_kbase_cap
+ *
+ * TODO: The alternative approach would be to embed the cap enum values
+ * in the table. Less efficient but potentially safer.
+ */
+static mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CAPS] = {
+	{ 11, 15 },             /* SYSTEM_MONITOR 	*/
+	{ 11, 25 },             /* JIT_PRESSURE_LIMIT	*/
+	{ 11,  2 },             /* MEM_GROW_ON_GPF	*/
+	{ 11,  2 }              /* MEM_PROTECTED	*/
+};
+
+/**
+ * mali_kbase_supports_cap - Query whether a kbase capability is supported
+ *
+ * @api_version: 	API version to convert
+ * @cap:		Capability to query for - see mali_kbase_caps.h
+ */
+bool mali_kbase_supports_cap(unsigned long api_version, mali_kbase_cap cap)
+{
+	bool supported = false;
+	unsigned long required_ver;
+
+	mali_kbase_capability_def const *cap_def;
+
+	if (WARN_ON(cap < 0))
+		return false;
+
+	if (WARN_ON(cap >= MALI_KBASE_NUM_CAPS))
+		return false;
+
+	cap_def = &kbase_caps_table[(int)cap];
+	required_ver = KBASE_API_VERSION(cap_def->required_major, cap_def->required_minor);
+	supported = (api_version >= required_ver);
+
+	return supported;
+}
+
+/**
  * kbase_file_new - Create an object representing a device file
  *
  * @kbdev:  An instance of the GPU platform device, allocated from the probe
@@ -152,7 +231,7 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
 }
 
 /**
- * kbase_file_get_api_version - Set the application programmer interface version
+ * kbase_file_set_api_version - Set the application programmer interface version
  *
  * @kfile:  A device file created by kbase_file_new()
  * @major:  Major version number (must not exceed 12 bits)
@@ -326,7 +405,7 @@ static int kbase_api_handshake(struct kbase_file *kfile,
 	 * the flags have been set. Originally it was created on file open
 	 * (with job submission disabled) but we don't support that usage.
 	 */
-	if (kbase_file_get_api_version(kfile) < KBASE_API_VERSION(11, 15))
+	if (!mali_kbase_supports_system_monitor(kbase_file_get_api_version(kfile)))
 		err = kbase_file_create_kctx(kfile,
 			BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED);
 
@@ -663,7 +742,7 @@ static int kbase_api_set_flags(struct kbase_file *kfile,
 	/* For backward compatibility, the context may have been created before
 	 * the flags were set.
 	 */
-	if (api_version >= KBASE_API_VERSION(11, 15)) {
+	if (mali_kbase_supports_system_monitor(api_version)) {
 		err = kbase_file_create_kctx(kfile, flags->create_flags);
 	} else {
 		struct kbasep_js_kctx_info *js_kctx_info = NULL;
@@ -790,6 +869,12 @@ static int kbase_api_mem_free(struct kbase_context *kctx,
 	return kbase_mem_free(kctx, free->gpu_addr);
 }
 
+static int kbase_api_kinstr_jm_fd(struct kbase_context *kctx,
+				  union kbase_kinstr_jm_fd *arg)
+{
+	return kbase_kinstr_jm_get_fd(kctx->kinstr_jm, arg);
+}
+
 static int kbase_api_hwcnt_reader_setup(struct kbase_context *kctx,
 		struct kbase_ioctl_hwcnt_reader_setup *setup)
 {
@@ -1536,6 +1621,12 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		break;
 
 	/* Instrumentation. */
+	case KBASE_IOCTL_KINSTR_JM_FD:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_KINSTR_JM_FD,
+				kbase_api_kinstr_jm_fd,
+				union kbase_kinstr_jm_fd,
+				kctx);
+		break;
 	case KBASE_IOCTL_HWCNT_READER_SETUP:
 		KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_HWCNT_READER_SETUP,
 				kbase_api_hwcnt_reader_setup,
@@ -1890,7 +1981,7 @@ static ssize_t show_core_mask(struct device *dev, struct device_attribute *attr,
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
- * @count:	The number of bytes written to the sysfs file
+ * @count:	The number of bytes to write to the sysfs file
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
@@ -1985,7 +2076,7 @@ static DEVICE_ATTR(core_mask, S_IRUGO | S_IWUSR, show_core_mask, set_core_mask);
  * @dev: The device this sysfs file is for.
  * @attr: The attributes of the sysfs file.
  * @buf: The value written to the sysfs file.
- * @count: The number of bytes written to the sysfs file.
+ * @count: The number of bytes to write to the sysfs file.
  *
  * This allows setting the timeout for software jobs. Waiting soft event wait
  * jobs will be cancelled after this period expires, while soft fence wait jobs
@@ -2078,7 +2169,7 @@ static u32 timeout_ms_to_ticks(struct kbase_device *kbdev, long timeout_ms,
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
- * @count:	The number of bytes written to the sysfs file
+ * @count:	The number of bytes to write to the sysfs file
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
@@ -2255,7 +2346,7 @@ static u32 get_new_js_timeout(
  * @dev:   The device the sysfs file is for
  * @attr:  The attributes of the sysfs file
  * @buf:   The value written to the sysfs file
- * @count: The number of bytes written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
  *
  * This function is called when the js_scheduling_period sysfs file is written
  * to. It checks the data written, and if valid updates the js_scheduling_period
@@ -2495,7 +2586,7 @@ static ssize_t show_debug(struct device *dev, struct device_attribute *attr, cha
  * @dev:	The device with sysfs file is for
  * @attr:	The attributes of the sysfs file
  * @buf:	The value written to the sysfs file
- * @count:	The number of bytes written to the sysfs file
+ * @count:	The number of bytes to write to the sysfs file
  *
  * Return: @count if the function succeeded. An error code on failure.
  */
@@ -3096,7 +3187,6 @@ static DEVICE_ATTR(js_ctx_scheduling_mode, S_IRUGO | S_IWUSR,
 		set_js_ctx_scheduling_mode);
 
 #ifdef MALI_KBASE_BUILD
-#ifdef CONFIG_DEBUG_FS
 
 /* Number of entries in serialize_jobs_settings[] */
 #define NR_SERIALIZE_JOBS_SETTINGS 5
@@ -3117,8 +3207,47 @@ static struct
 };
 
 /**
- * kbasep_serialize_jobs_seq_show - Show callback for the serialize_jobs debugfs
- *                                  file
+ * update_serialize_jobs_setting - Update the serialization setting for the
+ *                                 submission of GPU jobs.
+ *
+ * This function is called when the serialize_jobs sysfs/debugfs file is
+ * written to. It matches the requested setting against the available settings
+ * and if a matching setting is found updates kbdev->serialize_jobs.
+ *
+ * @kbdev:  An instance of the GPU platform device, allocated from the probe
+ *          method of the driver.
+ * @buf:    Buffer containing the value written to the sysfs/debugfs file.
+ * @count:  The number of bytes to write to the sysfs/debugfs file.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t update_serialize_jobs_setting(struct kbase_device *kbdev,
+					     const char *buf, size_t count)
+{
+	int i;
+	bool valid = false;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
+			kbdev->serialize_jobs =
+				serialize_jobs_settings[i].setting;
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid) {
+		dev_err(kbdev->dev, "serialize_jobs: invalid setting");
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * kbasep_serialize_jobs_seq_debugfs_show - Show callback for the serialize_jobs
+ *					    debugfs file
  * @sfile: seq_file pointer
  * @data:  Private callback data
  *
@@ -3128,7 +3257,8 @@ static struct
  *
  * Return: 0 on success, or an error code on error
  */
-static int kbasep_serialize_jobs_seq_show(struct seq_file *sfile, void *data)
+static int kbasep_serialize_jobs_seq_debugfs_show(struct seq_file *sfile,
+						  void *data)
 {
 	struct kbase_device *kbdev = sfile->private;
 	int i;
@@ -3169,8 +3299,6 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
 	struct seq_file *s = file->private_data;
 	struct kbase_device *kbdev = s->private;
 	char buf[MAX_SERIALIZE_JOBS_NAME_LEN];
-	int i;
-	bool valid = false;
 
 	CSTD_UNUSED(ppos);
 
@@ -3180,21 +3308,7 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
 
 	buf[count] = 0;
 
-	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
-		if (sysfs_streq(serialize_jobs_settings[i].name, buf)) {
-			kbdev->serialize_jobs =
-					serialize_jobs_settings[i].setting;
-			valid = true;
-			break;
-		}
-	}
-
-	if (!valid) {
-		dev_err(kbdev->dev, "serialize_jobs: invalid setting\n");
-		return -EINVAL;
-	}
-
-	return count;
+	return update_serialize_jobs_setting(kbdev, buf, count);
 }
 
 /**
@@ -3208,7 +3322,8 @@ static ssize_t kbasep_serialize_jobs_debugfs_write(struct file *file,
 static int kbasep_serialize_jobs_debugfs_open(struct inode *in,
 		struct file *file)
 {
-	return single_open(file, kbasep_serialize_jobs_seq_show, in->i_private);
+	return single_open(file, kbasep_serialize_jobs_seq_debugfs_show,
+			   in->i_private);
 }
 
 static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
@@ -3221,6 +3336,72 @@ static const struct file_operations kbasep_serialize_jobs_debugfs_fops = {
 };
 
 #endif /* CONFIG_DEBUG_FS */
+
+/**
+ * show_serialize_jobs_sysfs - Show callback for serialize_jobs sysfs file.
+ *
+ * This function is called to get the contents of the serialize_jobs sysfs
+ * file. This is a list of the available settings with the currently active
+ * one surrounded by square brackets.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The output buffer for the sysfs file contents
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t show_serialize_jobs_sysfs(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct kbase_device *kbdev = to_kbase_device(dev);
+	ssize_t ret = 0;
+	int i;
+
+	for (i = 0; i < NR_SERIALIZE_JOBS_SETTINGS; i++) {
+		if (kbdev->serialize_jobs ==
+				serialize_jobs_settings[i].setting)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "[%s]",
+					 serialize_jobs_settings[i].name);
+		else
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s ",
+					 serialize_jobs_settings[i].name);
+	}
+
+	if (ret < PAGE_SIZE - 1) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	} else {
+		buf[PAGE_SIZE - 2] = '\n';
+		buf[PAGE_SIZE - 1] = '\0';
+		ret = PAGE_SIZE - 1;
+	}
+
+	return ret;
+}
+
+/**
+ * store_serialize_jobs_sysfs - Store callback for serialize_jobs sysfs file.
+ *
+ * This function is called when the serialize_jobs sysfs file is written to.
+ * It matches the requested setting against the available settings and if a
+ * matching setting is found updates kbdev->serialize_jobs.
+ *
+ * @dev:	The device this sysfs file is for
+ * @attr:	The attributes of the sysfs file
+ * @buf:	The value written to the sysfs file
+ * @count:	The number of bytes to write to the sysfs file
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t store_serialize_jobs_sysfs(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t count)
+{
+	return update_serialize_jobs_setting(to_kbase_device(dev), buf, count);
+}
+
+static DEVICE_ATTR(serialize_jobs, 0600, show_serialize_jobs_sysfs,
+		   store_serialize_jobs_sysfs);
 #endif /* MALI_KBASE_BUILD */
 
 static void kbasep_protected_mode_hwcnt_disable_worker(struct work_struct *data)
@@ -4019,6 +4200,11 @@ void buslog_term(struct kbase_device *kbdev)
 }
 #endif
 
+static struct attribute *kbase_scheduling_attrs[] = {
+	&dev_attr_serialize_jobs.attr,
+	NULL
+};
+
 static struct attribute *kbase_attrs[] = {
 #ifdef CONFIG_MALI_DEBUG
 	&dev_attr_debug_command.attr,
@@ -4041,6 +4227,12 @@ static struct attribute *kbase_attrs[] = {
 	NULL
 };
 
+#define SYSFS_SCHEDULING_GROUP "scheduling"
+static const struct attribute_group kbase_scheduling_attr_group = {
+	.name = SYSFS_SCHEDULING_GROUP,
+	.attrs = kbase_scheduling_attrs,
+};
+
 static const struct attribute_group kbase_attr_group = {
 	.attrs = kbase_attrs,
 };
@@ -4056,11 +4248,23 @@ int kbase_sysfs_init(struct kbase_device *kbdev)
 	kbdev->mdev.mode = 0666;
 
 	err = sysfs_create_group(&kbdev->dev->kobj, &kbase_attr_group);
+	if (!err) {
+		err = sysfs_create_group(&kbdev->dev->kobj,
+					 &kbase_scheduling_attr_group);
+		if (err) {
+			dev_err(kbdev->dev, "Creation of %s sysfs group failed",
+				SYSFS_SCHEDULING_GROUP);
+			sysfs_remove_group(&kbdev->dev->kobj,
+					   &kbase_attr_group);
+		}
+	}
+
 	return err;
 }
 
 void kbase_sysfs_term(struct kbase_device *kbdev)
 {
+	sysfs_remove_group(&kbdev->dev->kobj, &kbase_scheduling_attr_group);
 	sysfs_remove_group(&kbdev->dev->kobj, &kbase_attr_group);
 	put_device(kbdev->dev);
 }
diff --git a/mali_kbase/mali_kbase_cs_experimental.h b/mali_kbase/mali_kbase_cs_experimental.h
index e1fffc3..caba2cd 100644
--- a/mali_kbase/mali_kbase_cs_experimental.h
+++ b/mali_kbase/mali_kbase_cs_experimental.h
@@ -41,9 +41,6 @@
  */
 static inline void mali_kbase_print_cs_experimental(void)
 {
-#if MALI_JIT_PRESSURE_LIMIT
-	pr_info("mali_kbase: JIT_PRESSURE_LIMIT (experimental) enabled");
-#endif /* MALI_JIT_PRESSURE_LIMIT */
 #if MALI_INCREMENTAL_RENDERING
 	pr_info("mali_kbase: INCREMENTAL_RENDERING (experimental) enabled");
 #endif /* MALI_INCREMENTAL_RENDERING */
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 7056d80..5cbe6a9 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -40,7 +40,7 @@
 #include <mali_kbase_instr_defs.h>
 #include <mali_kbase_pm.h>
 #include <mali_kbase_gpuprops_types.h>
-#include <mali_kbase_hwcnt_backend_gpu.h>
+#include <mali_kbase_hwcnt_backend_jm.h>
 #include <protected_mode_switcher.h>
 
 #include <linux/atomic.h>
@@ -156,6 +156,7 @@ struct kbase_device;
 struct kbase_as;
 struct kbase_mmu_setup;
 struct kbase_ipa_model_vinstr_data;
+struct kbase_kinstr_jm;
 
 /**
  * struct kbase_io_access - holds information about 1 register access
@@ -320,6 +321,58 @@ struct kbasep_mem_device {
 	atomic_t ir_threshold;
 };
 
+struct kbase_clk_rate_listener;
+
+/**
+ * kbase_clk_rate_listener_on_change_t() - Frequency change callback
+ *
+ * @listener:     Clock frequency change listener.
+ * @clk_index:    Index of the clock for which the change has occurred.
+ * @clk_rate_hz:  Clock frequency(Hz).
+ *
+ * A callback to call when clock rate changes. The function must not
+ * sleep. No clock rate manager functions must be called from here, as
+ * its lock is taken.
+ */
+typedef void (*kbase_clk_rate_listener_on_change_t)(
+	struct kbase_clk_rate_listener *listener,
+	u32 clk_index,
+	u32 clk_rate_hz);
+
+/**
+ * struct kbase_clk_rate_listener - Clock frequency listener
+ *
+ * @node:        List node.
+ * @notify:    Callback to be called when GPU frequency changes.
+ */
+struct kbase_clk_rate_listener {
+	struct list_head node;
+	kbase_clk_rate_listener_on_change_t notify;
+};
+
+/**
+ * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock
+ *                                       rate trace manager.
+ *
+ * @gpu_idle:           Tracks the idle state of GPU.
+ * @clks:               Array of pointer to structures storing data for every
+ *                      enumerated GPU clock.
+ * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
+ *                      operations.
+ * @gpu_clk_rate_trace_write: Pointer to the function that would emit the
+ *                            tracepoint for the clock rate change.
+ * @listeners:          List of listener attached.
+ * @lock:               Lock to serialize the actions of GPU clock rate trace
+ *                      manager.
+ */
+struct kbase_clk_rate_trace_manager {
+	bool gpu_idle;
+	struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS];
+	struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops;
+	struct list_head listeners;
+	spinlock_t lock;
+};
+
 /**
  * Data stored per device for power management.
  *
@@ -385,6 +438,11 @@ struct kbase_pm_device_data {
 	 */
 	struct kbase_arbiter_vm_state *arb_vm_state;
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
+
+	/**
+	 * The state of the GPU clock rate trace manager
+	 */
+	struct kbase_clk_rate_trace_manager clk_rtm;
 };
 
 /**
@@ -560,6 +618,32 @@ struct kbase_devfreq_queue_info {
 };
 
 /**
+ * struct kbase_process - Representing an object of a kbase process instantiated
+ *                        when the first kbase context is created under it.
+ * @tgid:               Thread group ID.
+ * @total_gpu_pages:    Total gpu pages allocated across all the contexts
+ *                      of this process, it accounts for both native allocations
+ *                      and dma_buf imported allocations.
+ * @kctx_list:          List of kbase contexts created for the process.
+ * @kprcs_node:         Node to a rb_tree, kbase_device will maintain a rb_tree
+ *                      based on key tgid, kprcs_node is the node link to
+ *                      &struct_kbase_device.process_root.
+ * @dma_buf_root:       RB tree of the dma-buf imported allocations, imported
+ *                      across all the contexts created for this process.
+ *                      Used to ensure that pages of allocation are accounted
+ *                      only once for the process, even if the allocation gets
+ *                      imported multiple times for the process.
+ */
+struct kbase_process {
+	pid_t tgid;
+	size_t total_gpu_pages;
+	struct list_head kctx_list;
+
+	struct rb_node kprcs_node;
+	struct rb_root dma_buf_root;
+};
+
+/**
  * struct kbase_device   - Object representing an instance of GPU platform device,
  *                         allocated from the probe method of mali driver.
  * @hw_quirks_sc:          Configuration to be used for the shader cores as per
@@ -806,6 +890,20 @@ struct kbase_devfreq_queue_info {
  *                          Job Scheduler
  * @l2_size_override:       Used to set L2 cache size via device tree blob
  * @l2_hash_override:       Used to set L2 cache hash via device tree blob
+ * @process_root:           rb_tree root node for maintaining a rb_tree of
+ *                          kbase_process based on key tgid(thread group ID).
+ * @dma_buf_root:           rb_tree root node for maintaining a rb_tree of
+ *                          &struct kbase_dma_buf based on key dma_buf.
+ *                          We maintain a rb_tree of dma_buf mappings under
+ *                          kbase_device and kbase_process, one indicates a
+ *                          mapping and gpu memory usage at device level and
+ *                          other one at process level.
+ * @total_gpu_pages:        Total GPU pages used for the complete GPU device.
+ * @dma_buf_lock:           This mutex should be held while accounting for
+ *                          @total_gpu_pages from imported dma buffers.
+ * @gpu_mem_usage_lock:     This spinlock should be held while accounting
+ *                          @total_gpu_pages for both native and dma-buf imported
+ *                          allocations.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1043,6 +1141,13 @@ struct kbase_device {
 #endif /* CONFIG_MALI_CINSTR_GWT */
 
 
+	struct rb_root process_root;
+	struct rb_root dma_buf_root;
+
+	size_t total_gpu_pages;
+	struct mutex dma_buf_lock;
+	spinlock_t gpu_mem_usage_lock;
+
 	struct {
 		struct kbase_context *ctx;
 		u64 jc;
@@ -1056,10 +1161,6 @@ struct kbase_device {
 #endif
 };
 
-#define KBASE_API_VERSION(major, minor) ((((major) & 0xFFF) << 20)  | \
-					 (((minor) & 0xFFF) << 8) | \
-					 ((0 & 0xFF) << 0))
-
 /**
  * enum kbase_file_state - Initialization state of a file opened by @kbase_open
  *
@@ -1189,6 +1290,13 @@ enum kbase_context_flags {
 	KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
 	KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
 	KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	/*
+	 * Set when JIT physical page limit is less than JIT virtual address
+	 * page limit, so we must take care to not exceed the physical limit
+	 */
+	KCTX_JPL_ENABLED = 1U << 16,
+#endif /* !MALI_JIT_PRESSURE_LIMIT_BASE */
 };
 
 struct kbase_sub_alloc {
@@ -1399,6 +1507,16 @@ struct kbase_sub_alloc {
  *                             that were used (i.e. the
  *                             &struct_kbase_va_region.used_pages for regions
  *                             that have had a usage report).
+ * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being
+ *                                  now allocated for just-in-time memory
+ *                                  allocations of a context (across all the
+ *                                  threads). This is supposed to be updated
+ *                                  with @reg_lock held before allocating
+ *                                  the backing pages. This helps ensure that
+ *                                  total physical memory usage for just in
+ *                                  time memory allocation remains within the
+ *                                  @jit_phys_pages_limit in multi-threaded
+ *                                  scenarios.
  * @jit_active_head:      List containing the just-in-time memory allocations
  *                        which are in use.
  * @jit_pool_head:        List containing the just-in-time memory allocations
@@ -1425,6 +1543,10 @@ struct kbase_sub_alloc {
  *                        is used to determine the atom's age when it is added to
  *                        the runnable RB-tree.
  * @trim_level:           Level of JIT allocation trimming to perform on free (0-100%)
+ * @kprcs:                Reference to @struct kbase_process that the current
+ *                        kbase_context belongs to.
+ * @kprcs_link:           List link for the list of kbase context maintained
+ *                        under kbase_process.
  * @gwt_enabled:          Indicates if tracking of GPU writes is enabled, protected by
  *                        kbase_context.reg_lock.
  * @gwt_was_enabled:      Simple sticky bit flag to know if GWT was ever enabled.
@@ -1435,6 +1557,7 @@ struct kbase_sub_alloc {
  *                        for context scheduling, protected by hwaccess_lock.
  * @atoms_count:          Number of GPU atoms currently in use, per priority
  * @create_flags:         Flags used in context creation.
+ * @kinstr_jm:            Kernel job manager instrumentation context handle
  *
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
@@ -1545,10 +1668,11 @@ struct kbase_context {
 	u8 jit_current_allocations_per_bin[256];
 	u8 jit_version;
 	u8 jit_group_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	u64 jit_phys_pages_limit;
 	u64 jit_current_phys_pressure;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+	u64 jit_phys_pages_to_be_allocated;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 	struct list_head jit_active_head;
 	struct list_head jit_pool_head;
 	struct list_head jit_destroy_head;
@@ -1559,6 +1683,9 @@ struct kbase_context {
 
 	u8 trim_level;
 
+	struct kbase_process *kprcs;
+	struct list_head kprcs_link;
+
 #ifdef CONFIG_MALI_CINSTR_GWT
 	bool gwt_enabled;
 	bool gwt_was_enabled;
@@ -1567,6 +1694,8 @@ struct kbase_context {
 #endif
 
 	base_context_create_flags create_flags;
+
+	struct kbase_kinstr_jm *kinstr_jm;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/mali_kbase/mali_kbase_hwaccess_instr.h b/mali_kbase/mali_kbase_hwaccess_instr.h
index be85491..4fd2e35 100644
--- a/mali_kbase/mali_kbase_hwaccess_instr.h
+++ b/mali_kbase/mali_kbase_hwaccess_instr.h
@@ -35,7 +35,7 @@
  * struct kbase_instr_hwcnt_enable - Enable hardware counter collection.
  * @dump_buffer:       GPU address to write counters to.
  * @dump_buffer_bytes: Size in bytes of the buffer pointed to by dump_buffer.
- * @jm_bm:             counters selection bitmask (JM).
+ * @fe_bm:             counters selection bitmask (Front End).
  * @shader_bm:         counters selection bitmask (Shader).
  * @tiler_bm:          counters selection bitmask (Tiler).
  * @mmu_l2_bm:         counters selection bitmask (MMU_L2).
@@ -45,7 +45,7 @@
 struct kbase_instr_hwcnt_enable {
 	u64 dump_buffer;
 	u64 dump_buffer_bytes;
-	u32 jm_bm;
+	u32 fe_bm;
 	u32 shader_bm;
 	u32 tiler_bm;
 	u32 mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index a61e5b9..94b7551 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -39,4 +39,18 @@
 void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
 				u64 *system_time, struct timespec64 *ts);
 
+/**
+ * kbase_backend_get_gpu_time_norequest() - Get current GPU time without
+ *                                          request/release cycle counter
+ * @kbdev:		Device pointer
+ * @cycle_counter:	Pointer to u64 to store cycle counter in
+ * @system_time:	Pointer to u64 to store system time in
+ * @ts:			Pointer to struct timespec to store current monotonic
+ *			time in
+ */
+void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
+					  u64 *cycle_counter,
+					  u64 *system_time,
+					  struct timespec64 *ts);
+
 #endif /* _KBASE_BACKEND_TIME_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt.c b/mali_kbase/mali_kbase_hwcnt.c
index 14ec5cb..2708af7 100644
--- a/mali_kbase/mali_kbase_hwcnt.c
+++ b/mali_kbase/mali_kbase_hwcnt.c
@@ -242,6 +242,7 @@ static void kbasep_hwcnt_accumulator_disable(
 	bool backend_enabled = false;
 	struct kbase_hwcnt_accumulator *accum;
 	unsigned long flags;
+	u64 dump_time_ns;
 
 	WARN_ON(!hctx);
 	lockdep_assert_held(&hctx->accum_lock);
@@ -271,7 +272,7 @@ static void kbasep_hwcnt_accumulator_disable(
 		goto disable;
 
 	/* Try and accumulate before disabling */
-	errcode = hctx->iface->dump_request(accum->backend);
+	errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
 	if (errcode)
 		goto disable;
 
@@ -419,23 +420,16 @@ static int kbasep_hwcnt_accumulator_dump(
 
 	/* Initiate the dump if the backend is enabled. */
 	if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
-		/* Disable pre-emption, to make the timestamp as accurate as
-		 * possible.
-		 */
-		preempt_disable();
-		{
+		if (dump_buf) {
+			errcode = hctx->iface->dump_request(
+					accum->backend, &dump_time_ns);
+			dump_requested = true;
+		} else {
 			dump_time_ns = hctx->iface->timestamp_ns(
-				accum->backend);
-			if (dump_buf) {
-				errcode = hctx->iface->dump_request(
 					accum->backend);
-				dump_requested = true;
-			} else {
-				errcode = hctx->iface->dump_clear(
-					accum->backend);
-			}
+			errcode = hctx->iface->dump_clear(accum->backend);
 		}
-		preempt_enable();
+
 		if (errcode)
 			goto error;
 	} else {
diff --git a/mali_kbase/mali_kbase_hwcnt_backend.h b/mali_kbase/mali_kbase_hwcnt_backend.h
index b7aa0e1..3a921b7 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -137,6 +137,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
  * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
  *                                               dump.
  * @backend: Non-NULL pointer to backend.
+ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was
+ *                requested will be written out to on success.
  *
  * If the backend is not enabled or another dump is already in progress,
  * returns an error.
@@ -144,7 +146,8 @@ typedef int (*kbase_hwcnt_backend_dump_clear_fn)(
  * Return: 0 on success, else error code.
  */
 typedef int (*kbase_hwcnt_backend_dump_request_fn)(
-	struct kbase_hwcnt_backend *backend);
+	struct kbase_hwcnt_backend *backend,
+	u64 *dump_time_ns);
 
 /**
  * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c b/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
deleted file mode 100644
index 407c768..0000000
--- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/*
- *
- * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- * SPDX-License-Identifier: GPL-2.0
- *
- */
-
-#include "mali_kbase_hwcnt_backend_gpu.h"
-#include "mali_kbase_hwcnt_gpu.h"
-#include "mali_kbase_hwcnt_types.h"
-#include "mali_kbase.h"
-#include "mali_kbase_pm_ca.h"
-#include "mali_kbase_hwaccess_instr.h"
-#ifdef CONFIG_MALI_NO_MALI
-#include "backend/gpu/mali_kbase_model_dummy.h"
-#endif
-
-
-/**
- * struct kbase_hwcnt_backend_gpu_info - Information used to create an instance
- *                                       of a GPU hardware counter backend.
- * @kbdev:         KBase device.
- * @use_secondary: True if secondary performance counters should be used,
- *                 else false. Ignored if secondary counters are not supported.
- * @metadata:      Hardware counter metadata.
- * @dump_bytes:    Bytes of GPU memory required to perform a
- *                 hardware counter dump.
- */
-struct kbase_hwcnt_backend_gpu_info {
-	struct kbase_device *kbdev;
-	bool use_secondary;
-	const struct kbase_hwcnt_metadata *metadata;
-	size_t dump_bytes;
-};
-
-/**
- * struct kbase_hwcnt_backend_gpu - Instance of a GPU hardware counter backend.
- * @info:         Info used to create the backend.
- * @kctx:         KBase context used for GPU memory allocation and
- *                counter dumping.
- * @gpu_dump_va:  GPU hardware counter dump buffer virtual address.
- * @cpu_dump_va:  CPU mapping of gpu_dump_va.
- * @vmap:         Dump buffer vmap.
- * @enabled:      True if dumping has been enabled, else false.
- * @pm_core_mask:  PM state sync-ed shaders core mask for the enabled dumping.
- */
-struct kbase_hwcnt_backend_gpu {
-	const struct kbase_hwcnt_backend_gpu_info *info;
-	struct kbase_context *kctx;
-	u64 gpu_dump_va;
-	void *cpu_dump_va;
-	struct kbase_vmap_struct *vmap;
-	bool enabled;
-	u64 pm_core_mask;
-};
-
-/* GPU backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
-static u64 kbasep_hwcnt_backend_gpu_timestamp_ns(
-	struct kbase_hwcnt_backend *backend)
-{
-	(void)backend;
-	return ktime_get_raw_ns();
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
-static int kbasep_hwcnt_backend_gpu_dump_enable_nolock(
-	struct kbase_hwcnt_backend *backend,
-	const struct kbase_hwcnt_enable_map *enable_map)
-{
-	int errcode;
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-	struct kbase_context *kctx;
-	struct kbase_device *kbdev;
-	struct kbase_hwcnt_physical_enable_map phys;
-	struct kbase_instr_hwcnt_enable enable;
-
-	if (!backend_gpu || !enable_map || backend_gpu->enabled ||
-	    (enable_map->metadata != backend_gpu->info->metadata))
-		return -EINVAL;
-
-	kctx = backend_gpu->kctx;
-	kbdev = backend_gpu->kctx->kbdev;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
-
-	enable.jm_bm = phys.jm_bm;
-	enable.shader_bm = phys.shader_bm;
-	enable.tiler_bm = phys.tiler_bm;
-	enable.mmu_l2_bm = phys.mmu_l2_bm;
-	enable.use_secondary = backend_gpu->info->use_secondary;
-	enable.dump_buffer = backend_gpu->gpu_dump_va;
-	enable.dump_buffer_bytes = backend_gpu->info->dump_bytes;
-
-	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
-	if (errcode)
-		goto error;
-
-	backend_gpu->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
-	backend_gpu->enabled = true;
-
-	return 0;
-error:
-	return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_enable_fn */
-static int kbasep_hwcnt_backend_gpu_dump_enable(
-	struct kbase_hwcnt_backend *backend,
-	const struct kbase_hwcnt_enable_map *enable_map)
-{
-	unsigned long flags;
-	int errcode;
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-	struct kbase_device *kbdev;
-
-	if (!backend_gpu)
-		return -EINVAL;
-
-	kbdev = backend_gpu->kctx->kbdev;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-
-	errcode = kbasep_hwcnt_backend_gpu_dump_enable_nolock(
-		backend, enable_map);
-
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_gpu_dump_disable(
-	struct kbase_hwcnt_backend *backend)
-{
-	int errcode;
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-
-	if (WARN_ON(!backend_gpu) || !backend_gpu->enabled)
-		return;
-
-	errcode = kbase_instr_hwcnt_disable_internal(backend_gpu->kctx);
-	WARN_ON(errcode);
-
-	backend_gpu->enabled = false;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_clear_fn */
-static int kbasep_hwcnt_backend_gpu_dump_clear(
-	struct kbase_hwcnt_backend *backend)
-{
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-
-	if (!backend_gpu || !backend_gpu->enabled)
-		return -EINVAL;
-
-	return kbase_instr_hwcnt_clear(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_request_fn */
-static int kbasep_hwcnt_backend_gpu_dump_request(
-	struct kbase_hwcnt_backend *backend)
-{
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-
-	if (!backend_gpu || !backend_gpu->enabled)
-		return -EINVAL;
-
-	return kbase_instr_hwcnt_request_dump(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_wait_fn */
-static int kbasep_hwcnt_backend_gpu_dump_wait(
-	struct kbase_hwcnt_backend *backend)
-{
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-
-	if (!backend_gpu || !backend_gpu->enabled)
-		return -EINVAL;
-
-	return kbase_instr_hwcnt_wait_for_dump(backend_gpu->kctx);
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_dump_get_fn */
-static int kbasep_hwcnt_backend_gpu_dump_get(
-	struct kbase_hwcnt_backend *backend,
-	struct kbase_hwcnt_dump_buffer *dst,
-	const struct kbase_hwcnt_enable_map *dst_enable_map,
-	bool accumulate)
-{
-	struct kbase_hwcnt_backend_gpu *backend_gpu =
-		(struct kbase_hwcnt_backend_gpu *)backend;
-
-	if (!backend_gpu || !dst || !dst_enable_map ||
-	    (backend_gpu->info->metadata != dst->metadata) ||
-	    (dst_enable_map->metadata != dst->metadata))
-		return -EINVAL;
-
-	/* Invalidate the kernel buffer before reading from it. */
-	kbase_sync_mem_regions(
-		backend_gpu->kctx, backend_gpu->vmap, KBASE_SYNC_TO_CPU);
-
-	return kbase_hwcnt_gpu_dump_get(
-		dst, backend_gpu->cpu_dump_va, dst_enable_map,
-		backend_gpu->pm_core_mask, accumulate);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_dump_alloc() - Allocate a GPU dump buffer.
- * @info:        Non-NULL pointer to GPU backend info.
- * @kctx:        Non-NULL pointer to kbase context.
- * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
- *               is stored on success.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_dump_alloc(
-	const struct kbase_hwcnt_backend_gpu_info *info,
-	struct kbase_context *kctx,
-	u64 *gpu_dump_va)
-{
-	struct kbase_va_region *reg;
-	u64 flags;
-	u64 nr_pages;
-
-	WARN_ON(!info);
-	WARN_ON(!kctx);
-	WARN_ON(!gpu_dump_va);
-
-	flags = BASE_MEM_PROT_CPU_RD |
-		BASE_MEM_PROT_GPU_WR |
-		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
-		BASE_MEM_CACHED_CPU;
-
-	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
-		flags |= BASE_MEM_UNCACHED_GPU;
-
-	nr_pages = PFN_UP(info->dump_bytes);
-
-	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
-
-	if (!reg)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_dump_free() - Free an allocated GPU dump buffer.
- * @kctx:        Non-NULL pointer to kbase context.
- * @gpu_dump_va: GPU dump buffer virtual address.
- */
-static void kbasep_hwcnt_backend_gpu_dump_free(
-	struct kbase_context *kctx,
-	u64 gpu_dump_va)
-{
-	WARN_ON(!kctx);
-	if (gpu_dump_va)
-		kbase_mem_free(kctx, gpu_dump_va);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_destroy() - Destroy a GPU backend.
- * @backend: Pointer to GPU backend to destroy.
- *
- * Can be safely called on a backend in any state of partial construction.
- */
-static void kbasep_hwcnt_backend_gpu_destroy(
-	struct kbase_hwcnt_backend_gpu *backend)
-{
-	if (!backend)
-		return;
-
-	if (backend->kctx) {
-		struct kbase_context *kctx = backend->kctx;
-		struct kbase_device *kbdev = kctx->kbdev;
-
-		if (backend->cpu_dump_va)
-			kbase_phy_alloc_mapping_put(kctx, backend->vmap);
-
-		if (backend->gpu_dump_va)
-			kbasep_hwcnt_backend_gpu_dump_free(
-				kctx, backend->gpu_dump_va);
-
-		kbasep_js_release_privileged_ctx(kbdev, kctx);
-		kbase_destroy_context(kctx);
-	}
-
-	kfree(backend);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_create() - Create a GPU backend.
- * @info:        Non-NULL pointer to backend info.
- * @out_backend: Non-NULL pointer to where backend is stored on success.
- *
- * Return: 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_create(
-	const struct kbase_hwcnt_backend_gpu_info *info,
-	struct kbase_hwcnt_backend_gpu **out_backend)
-{
-
-	int errcode;
-	struct kbase_device *kbdev;
-	struct kbase_hwcnt_backend_gpu *backend = NULL;
-
-	WARN_ON(!info);
-	WARN_ON(!out_backend);
-
-	kbdev = info->kbdev;
-
-	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
-	if (!backend)
-		goto alloc_error;
-
-	backend->info = info;
-
-	backend->kctx = kbase_create_context(kbdev, true,
-		BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
-	if (!backend->kctx)
-		goto alloc_error;
-
-	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
-
-	errcode = kbasep_hwcnt_backend_gpu_dump_alloc(
-		info, backend->kctx, &backend->gpu_dump_va);
-	if (errcode)
-		goto error;
-
-	backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
-		backend->gpu_dump_va, &backend->vmap);
-	if (!backend->cpu_dump_va)
-		goto alloc_error;
-
-#ifdef CONFIG_MALI_NO_MALI
-	/* The dummy model needs the CPU mapping. */
-	gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
-#endif
-
-	*out_backend = backend;
-	return 0;
-
-alloc_error:
-	errcode = -ENOMEM;
-error:
-	kbasep_hwcnt_backend_gpu_destroy(backend);
-	return errcode;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_init_fn */
-static int kbasep_hwcnt_backend_gpu_init(
-	const struct kbase_hwcnt_backend_info *info,
-	struct kbase_hwcnt_backend **out_backend)
-{
-	int errcode;
-	struct kbase_hwcnt_backend_gpu *backend = NULL;
-
-	if (!info || !out_backend)
-		return -EINVAL;
-
-	errcode = kbasep_hwcnt_backend_gpu_create(
-		(const struct kbase_hwcnt_backend_gpu_info *) info, &backend);
-	if (errcode)
-		return errcode;
-
-	*out_backend = (struct kbase_hwcnt_backend *)backend;
-
-	return 0;
-}
-
-/* GPU backend implementation of kbase_hwcnt_backend_term_fn */
-static void kbasep_hwcnt_backend_gpu_term(struct kbase_hwcnt_backend *backend)
-{
-	if (!backend)
-		return;
-
-	kbasep_hwcnt_backend_gpu_dump_disable(backend);
-	kbasep_hwcnt_backend_gpu_destroy(
-		(struct kbase_hwcnt_backend_gpu *)backend);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_info_destroy() - Destroy a GPU backend info.
- * @info: Pointer to info to destroy.
- *
- * Can be safely called on a backend info in any state of partial construction.
- */
-static void kbasep_hwcnt_backend_gpu_info_destroy(
-	const struct kbase_hwcnt_backend_gpu_info *info)
-{
-	if (!info)
-		return;
-
-	kbase_hwcnt_gpu_metadata_destroy(info->metadata);
-	kfree(info);
-}
-
-/**
- * kbasep_hwcnt_backend_gpu_info_create() - Create a GPU backend info.
- * @kbdev: Non_NULL pointer to kbase device.
- * @out_info: Non-NULL pointer to where info is stored on success.
- *
- * Return 0 on success, else error code.
- */
-static int kbasep_hwcnt_backend_gpu_info_create(
-	struct kbase_device *kbdev,
-	const struct kbase_hwcnt_backend_gpu_info **out_info)
-{
-	int errcode = -ENOMEM;
-	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
-	struct kbase_hwcnt_backend_gpu_info *info = NULL;
-
-	WARN_ON(!kbdev);
-	WARN_ON(!out_info);
-
-	errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
-	if (errcode)
-		return errcode;
-
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		goto error;
-
-	info->kbdev = kbdev;
-
-#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
-	info->use_secondary = true;
-#else
-	info->use_secondary = false;
-#endif
-
-	errcode = kbase_hwcnt_gpu_metadata_create(
-		&hwcnt_gpu_info, info->use_secondary,
-		&info->metadata,
-		&info->dump_bytes);
-	if (errcode)
-		goto error;
-
-	*out_info = info;
-
-	return 0;
-error:
-	kbasep_hwcnt_backend_gpu_info_destroy(info);
-	return errcode;
-}
-
-int kbase_hwcnt_backend_gpu_create(
-	struct kbase_device *kbdev,
-	struct kbase_hwcnt_backend_interface *iface)
-{
-	int errcode;
-	const struct kbase_hwcnt_backend_gpu_info *info = NULL;
-
-	if (!kbdev || !iface)
-		return -EINVAL;
-
-	errcode = kbasep_hwcnt_backend_gpu_info_create(kbdev, &info);
-
-	if (errcode)
-		return errcode;
-
-	iface->metadata = info->metadata;
-	iface->info = (struct kbase_hwcnt_backend_info *)info;
-	iface->init = kbasep_hwcnt_backend_gpu_init;
-	iface->term = kbasep_hwcnt_backend_gpu_term;
-	iface->timestamp_ns = kbasep_hwcnt_backend_gpu_timestamp_ns;
-	iface->dump_enable = kbasep_hwcnt_backend_gpu_dump_enable;
-	iface->dump_enable_nolock = kbasep_hwcnt_backend_gpu_dump_enable_nolock;
-	iface->dump_disable = kbasep_hwcnt_backend_gpu_dump_disable;
-	iface->dump_clear = kbasep_hwcnt_backend_gpu_dump_clear;
-	iface->dump_request = kbasep_hwcnt_backend_gpu_dump_request;
-	iface->dump_wait = kbasep_hwcnt_backend_gpu_dump_wait;
-	iface->dump_get = kbasep_hwcnt_backend_gpu_dump_get;
-
-	return 0;
-}
-
-void kbase_hwcnt_backend_gpu_destroy(
-	struct kbase_hwcnt_backend_interface *iface)
-{
-	if (!iface)
-		return;
-
-	kbasep_hwcnt_backend_gpu_info_destroy(
-		(const struct kbase_hwcnt_backend_gpu_info *)iface->info);
-	memset(iface, 0, sizeof(*iface));
-}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
new file mode 100644
index 0000000..02a42bf
--- /dev/null
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.c
@@ -0,0 +1,707 @@
+/*
+ *
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include "mali_kbase_hwcnt_backend_jm.h"
+#include "mali_kbase_hwcnt_gpu.h"
+#include "mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "mali_kbase_ccswe.h"
+
+#ifdef CONFIG_MALI_NO_MALI
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/**
+ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
+ *                                      of a JM hardware counter backend.
+ * @kbdev:         KBase device.
+ * @use_secondary: True if secondary performance counters should be used,
+ *                 else false. Ignored if secondary counters are not supported.
+ * @metadata:      Hardware counter metadata.
+ * @dump_bytes:    Bytes of GPU memory required to perform a
+ *                 hardware counter dump.
+ */
+struct kbase_hwcnt_backend_jm_info {
+	struct kbase_device *kbdev;
+	bool use_secondary;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
+ * @info:             Info used to create the backend.
+ * @kctx:             KBase context used for GPU memory allocation and
+ *                    counter dumping.
+ * @gpu_dump_va:      GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va:      CPU mapping of gpu_dump_va.
+ * @vmap:             Dump buffer vmap.
+ * @enabled:          True if dumping has been enabled, else false.
+ * @pm_core_mask:     PM state sync-ed shaders core mask for the enabled
+ *                    dumping.
+ * @clk_enable_map:   The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:
+ *                    Cycle count elapsed for a given sample period.
+ *                    The top clock cycle, index 0, is read directly from
+ *                    hardware, but the other clock domains need to be
+ *                    calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ *                    sample period.
+ * @rate_listener:    Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ */
+struct kbase_hwcnt_backend_jm {
+	const struct kbase_hwcnt_backend_jm_info *info;
+	struct kbase_context *kctx;
+	u64 gpu_dump_va;
+	void *cpu_dump_va;
+	struct kbase_vmap_struct *vmap;
+	bool enabled;
+	u64 pm_core_mask;
+	u64 clk_enable_map;
+	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+	struct kbase_clk_rate_listener rate_listener;
+	struct kbase_ccswe ccswe_shader_cores;
+};
+
+/**
+ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
+ *
+ * @rate_listener:    Callback state
+ * @clk_index:        Clock index
+ * @clk_rate_hz:      Clock frequency(hz)
+ */
+static void kbasep_hwcnt_backend_jm_on_freq_change(
+	struct kbase_clk_rate_listener *rate_listener,
+	u32 clk_index,
+	u32 clk_rate_hz)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm = container_of(
+		rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
+	u64 timestamp_ns;
+
+	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+		return;
+
+	timestamp_ns = ktime_get_raw_ns();
+	kbase_ccswe_freq_change(
+		&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
+ *
+ * @backend:      Non-NULL pointer to backend.
+ * @enable_map:   Non-NULL pointer to enable map specifying enabled counters.
+ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
+ */
+static void kbasep_hwcnt_backend_jm_cc_enable(
+	struct kbase_hwcnt_backend_jm *backend_jm,
+	const struct kbase_hwcnt_enable_map *enable_map,
+	u64 timestamp_ns)
+{
+	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+	u64 clk_enable_map = enable_map->clk_enable_map;
+	u64 cycle_count;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(
+		    clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+		/* turn on the cycle counter */
+		kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
+		/* Read cycle count for top clock domain. */
+		kbase_backend_get_gpu_time_norequest(
+			kbdev, &cycle_count, NULL, NULL);
+
+		backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] =
+			cycle_count;
+	}
+
+	if (kbase_hwcnt_clk_enable_map_enabled(
+		    clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+		/* software estimation for non-top clock domains */
+		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+		const struct kbase_clk_data *clk_data =
+			rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+		u32 cur_freq;
+		unsigned long flags;
+
+		spin_lock_irqsave(&rtm->lock, flags);
+
+		cur_freq = (u32) clk_data->clock_val;
+		kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
+		kbase_ccswe_freq_change(
+			&backend_jm->ccswe_shader_cores,
+			timestamp_ns,
+			cur_freq);
+
+		kbase_clk_rate_trace_manager_subscribe_no_lock(
+			rtm, &backend_jm->rate_listener);
+
+		spin_unlock_irqrestore(&rtm->lock, flags);
+
+		/* ccswe was reset. The estimated cycle is zero. */
+		backend_jm->prev_cycle_count[
+			KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
+	}
+
+	/* Keep clk_enable_map for dump_request. */
+	backend_jm->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
+ *
+ * @backend:      Non-NULL pointer to backend.
+ */
+static void kbasep_hwcnt_backend_jm_cc_disable(
+	struct kbase_hwcnt_backend_jm *backend_jm)
+{
+	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+	u64 clk_enable_map = backend_jm->clk_enable_map;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(
+		clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+		/* turn off the cycle counter */
+		kbase_pm_release_gpu_cycle_counter(backend_jm->kctx->kbdev);
+	}
+	if (kbase_hwcnt_clk_enable_map_enabled(
+		clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+
+		kbase_clk_rate_trace_manager_unsubscribe(
+			rtm, &backend_jm->rate_listener);
+	}
+}
+
+
+/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(
+	struct kbase_hwcnt_backend *backend)
+{
+	(void)backend;
+	return ktime_get_raw_ns();
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable_nolock(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_physical_enable_map phys;
+	struct kbase_instr_hwcnt_enable enable;
+	u64 timestamp_ns;
+
+	if (!backend_jm || !enable_map || backend_jm->enabled ||
+	    (enable_map->metadata != backend_jm->info->metadata))
+		return -EINVAL;
+
+	kctx = backend_jm->kctx;
+	kbdev = backend_jm->kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys, enable_map);
+
+	enable.fe_bm = phys.fe_bm;
+	enable.shader_bm = phys.shader_bm;
+	enable.tiler_bm = phys.tiler_bm;
+	enable.mmu_l2_bm = phys.mmu_l2_bm;
+	enable.use_secondary = backend_jm->info->use_secondary;
+	enable.dump_buffer = backend_jm->gpu_dump_va;
+	enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+
+	timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+
+	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (errcode)
+		goto error;
+
+	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+	backend_jm->enabled = true;
+
+	kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
+
+	return 0;
+error:
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable(
+	struct kbase_hwcnt_backend *backend,
+	const struct kbase_hwcnt_enable_map *enable_map)
+{
+	unsigned long flags;
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_device *kbdev;
+
+	if (!backend_jm)
+		return -EINVAL;
+
+	kbdev = backend_jm->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(
+		backend, enable_map);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_jm_dump_disable(
+	struct kbase_hwcnt_backend *backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+
+	if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+		return;
+
+	kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
+
+	errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
+	WARN_ON(errcode);
+
+	backend_jm->enabled = false;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_jm_dump_clear(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+
+	if (!backend_jm || !backend_jm->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_clear(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_jm_dump_request(
+	struct kbase_hwcnt_backend *backend,
+	u64 *dump_time_ns)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_device *kbdev;
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 current_cycle_count;
+	size_t clk;
+	int ret;
+
+	if (!backend_jm || !backend_jm->enabled)
+		return -EINVAL;
+
+	kbdev = backend_jm->kctx->kbdev;
+	metadata = backend_jm->info->metadata;
+
+	/* Disable pre-emption, to make the timestamp as accurate as possible */
+	preempt_disable();
+	{
+		*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+		ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
+
+		kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+			if (!kbase_hwcnt_clk_enable_map_enabled(
+				backend_jm->clk_enable_map, clk))
+				continue;
+
+			if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+				/* Read cycle count for top clock domain. */
+				kbase_backend_get_gpu_time_norequest(
+					kbdev, &current_cycle_count,
+					NULL, NULL);
+			} else {
+				/*
+				 * Estimate cycle count for non-top clock
+				 * domain.
+				 */
+				current_cycle_count = kbase_ccswe_cycle_at(
+					&backend_jm->ccswe_shader_cores,
+					*dump_time_ns);
+			}
+			backend_jm->cycle_count_elapsed[clk] =
+				current_cycle_count -
+				backend_jm->prev_cycle_count[clk];
+
+			/*
+			 * Keep the current cycle count for later calculation.
+			 */
+			backend_jm->prev_cycle_count[clk] = current_cycle_count;
+		}
+	}
+	preempt_enable();
+
+	return ret;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_jm_dump_wait(
+	struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+
+	if (!backend_jm || !backend_jm->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_jm_dump_get(
+	struct kbase_hwcnt_backend *backend,
+	struct kbase_hwcnt_dump_buffer *dst,
+	const struct kbase_hwcnt_enable_map *dst_enable_map,
+	bool accumulate)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		(struct kbase_hwcnt_backend_jm *)backend;
+	size_t clk;
+
+	if (!backend_jm || !dst || !dst_enable_map ||
+	    (backend_jm->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	/* Invalidate the kernel buffer before reading from it. */
+	kbase_sync_mem_regions(
+		backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+
+	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
+		if (!kbase_hwcnt_clk_enable_map_enabled(
+			dst_enable_map->clk_enable_map, clk))
+			continue;
+
+		/* Extract elapsed cycle count for each clock domain. */
+		dst->clk_cnt_buf[clk] = backend_jm->cycle_count_elapsed[clk];
+	}
+
+	return kbase_hwcnt_gpu_dump_get(
+		dst, backend_jm->cpu_dump_va, dst_enable_map,
+		backend_jm->pm_core_mask, accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer.
+ * @info:        Non-NULL pointer to JM backend info.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ *               is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_dump_alloc(
+	const struct kbase_hwcnt_backend_jm_info *info,
+	struct kbase_context *kctx,
+	u64 *gpu_dump_va)
+{
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+
+	WARN_ON(!info);
+	WARN_ON(!kctx);
+	WARN_ON(!gpu_dump_va);
+
+	flags = BASE_MEM_PROT_CPU_RD |
+		BASE_MEM_PROT_GPU_WR |
+		BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+		BASE_MEM_CACHED_CPU;
+
+	if (kctx->kbdev->mmu_mode->flags & KBASE_MMU_MODE_HAS_NON_CACHEABLE)
+		flags |= BASE_MEM_UNCACHED_GPU;
+
+	nr_pages = PFN_UP(info->dump_bytes);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va);
+
+	if (!reg)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_jm_dump_free(
+	struct kbase_context *kctx,
+	u64 gpu_dump_va)
+{
+	WARN_ON(!kctx);
+	if (gpu_dump_va)
+		kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend.
+ * @backend: Pointer to JM backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_destroy(
+	struct kbase_hwcnt_backend_jm *backend)
+{
+	if (!backend)
+		return;
+
+	if (backend->kctx) {
+		struct kbase_context *kctx = backend->kctx;
+		struct kbase_device *kbdev = kctx->kbdev;
+
+		if (backend->cpu_dump_va)
+			kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+		if (backend->gpu_dump_va)
+			kbasep_hwcnt_backend_jm_dump_free(
+				kctx, backend->gpu_dump_va);
+
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+		kbase_destroy_context(kctx);
+	}
+
+	kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_create() - Create a JM backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_create(
+	const struct kbase_hwcnt_backend_jm_info *info,
+	struct kbase_hwcnt_backend_jm **out_backend)
+{
+
+	int errcode;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_backend_jm *backend = NULL;
+
+	WARN_ON(!info);
+	WARN_ON(!out_backend);
+
+	kbdev = info->kbdev;
+
+	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+	if (!backend)
+		goto alloc_error;
+
+	backend->info = info;
+
+	backend->kctx = kbase_create_context(kbdev, true,
+		BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+	if (!backend->kctx)
+		goto alloc_error;
+
+	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+	errcode = kbasep_hwcnt_backend_jm_dump_alloc(
+		info, backend->kctx, &backend->gpu_dump_va);
+	if (errcode)
+		goto error;
+
+	backend->cpu_dump_va = kbase_phy_alloc_mapping_get(backend->kctx,
+		backend->gpu_dump_va, &backend->vmap);
+	if (!backend->cpu_dump_va)
+		goto alloc_error;
+
+	kbase_ccswe_init(&backend->ccswe_shader_cores);
+	backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+
+#ifdef CONFIG_MALI_NO_MALI
+	/* The dummy model needs the CPU mapping. */
+	gpu_model_set_dummy_prfcnt_base_cpu(backend->cpu_dump_va);
+#endif
+
+	*out_backend = backend;
+	return 0;
+
+alloc_error:
+	errcode = -ENOMEM;
+error:
+	kbasep_hwcnt_backend_jm_destroy(backend);
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_init(
+	const struct kbase_hwcnt_backend_info *info,
+	struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend = NULL;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_jm_create(
+		(const struct kbase_hwcnt_backend_jm_info *) info, &backend);
+	if (errcode)
+		return errcode;
+
+	*out_backend = (struct kbase_hwcnt_backend *)backend;
+
+	return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
+{
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_jm_dump_disable(backend);
+	kbasep_hwcnt_backend_jm_destroy(
+		(struct kbase_hwcnt_backend_jm *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_info_destroy(
+	const struct kbase_hwcnt_backend_jm_info *info)
+{
+	if (!info)
+		return;
+
+	kbase_hwcnt_gpu_metadata_destroy(info->metadata);
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_info_create(
+	struct kbase_device *kbdev,
+	const struct kbase_hwcnt_backend_jm_info **out_info)
+{
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+	struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_info);
+
+	errcode = kbase_hwcnt_gpu_info_init(kbdev, &hwcnt_gpu_info);
+	if (errcode)
+		return errcode;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		goto error;
+
+	info->kbdev = kbdev;
+
+#ifdef CONFIG_MALI_PRFCNT_SET_SECONDARY
+	info->use_secondary = true;
+#else
+	info->use_secondary = false;
+#endif
+
+	errcode = kbase_hwcnt_gpu_metadata_create(
+		&hwcnt_gpu_info, info->use_secondary,
+		&info->metadata,
+		&info->dump_bytes);
+	if (errcode)
+		goto error;
+
+	*out_info = info;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_jm_info_destroy(info);
+	return errcode;
+}
+
+int kbase_hwcnt_backend_jm_create(
+	struct kbase_device *kbdev,
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+	if (!kbdev || !iface)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info);
+
+	if (errcode)
+		return errcode;
+
+	iface->metadata = info->metadata;
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->init = kbasep_hwcnt_backend_jm_init;
+	iface->term = kbasep_hwcnt_backend_jm_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_jm_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_jm_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_jm_destroy(
+	struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_jm_info_destroy(
+		(const struct kbase_hwcnt_backend_jm_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h b/mali_kbase/mali_kbase_hwcnt_backend_jm.h
index 7712f14..f15faeb 100644
--- a/mali_kbase/mali_kbase_hwcnt_backend_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_backend_jm.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,19 +21,19 @@
  */
 
 /**
- * Concrete implementation of mali_kbase_hwcnt_backend interface for GPU
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM
  * backend.
  */
 
-#ifndef _KBASE_HWCNT_BACKEND_GPU_H_
-#define _KBASE_HWCNT_BACKEND_GPU_H_
+#ifndef _KBASE_HWCNT_BACKEND_JM_H_
+#define _KBASE_HWCNT_BACKEND_JM_H_
 
 #include "mali_kbase_hwcnt_backend.h"
 
 struct kbase_device;
 
 /**
- * kbase_hwcnt_backend_gpu_create() - Create a GPU hardware counter backend
+ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend
  *                                    interface.
  * @kbdev: Non-NULL pointer to kbase device.
  * @iface: Non-NULL pointer to backend interface structure that is filled in
@@ -43,19 +43,19 @@ struct kbase_device;
  *
  * Return: 0 on success, else error code.
  */
-int kbase_hwcnt_backend_gpu_create(
+int kbase_hwcnt_backend_jm_create(
 	struct kbase_device *kbdev,
 	struct kbase_hwcnt_backend_interface *iface);
 
 /**
- * kbase_hwcnt_backend_gpu_destroy() - Destroy a GPU hardware counter backend
+ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
  *                                     interface.
  * @iface: Pointer to interface to destroy.
  *
  * Can be safely called on an all-zeroed interface, or on an already destroyed
  * interface.
  */
-void kbase_hwcnt_backend_gpu_destroy(
+void kbase_hwcnt_backend_jm_destroy(
 	struct kbase_hwcnt_backend_interface *iface);
 
-#endif /* _KBASE_HWCNT_BACKEND_GPU_H_ */
+#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.c b/mali_kbase/mali_kbase_hwcnt_gpu.c
index 095c765..1034328 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -138,6 +138,8 @@ static int kbasep_hwcnt_backend_gpu_metadata_v4_create(
 		}
 	}
 
+	desc.clk_cnt = v4_info->clk_cnt;
+
 	errcode = kbase_hwcnt_metadata_create(&desc, metadata);
 
 	/* Always clean up, as metadata will make a copy of the input args */
@@ -258,6 +260,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_v5_create(
 
 	desc.grp_cnt = 1;
 	desc.grps = &group;
+	desc.clk_cnt = v5_info->clk_cnt;
 
 	/* The JM, Tiler, and L2s are always available, and are before cores */
 	desc.avail_mask = (1ull << non_sc_block_count) - 1;
@@ -287,6 +290,8 @@ int kbase_hwcnt_gpu_info_init(
 	struct kbase_device *kbdev,
 	struct kbase_hwcnt_gpu_info *info)
 {
+	size_t clk;
+
 	if (!kbdev || !info)
 		return -EINVAL;
 
@@ -307,6 +312,14 @@ int kbase_hwcnt_gpu_info_init(
 		info->v5.core_mask = core_mask;
 	}
 #endif
+
+	/* Determine the number of available clock domains. */
+	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+			break;
+	}
+	info->v5.clk_cnt = clk;
+
 	return 0;
 }
 
@@ -563,7 +576,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 
-	u64 jm_bm = 0;
+	u64 fe_bm = 0;
 	u64 shader_bm = 0;
 	u64 tiler_bm = 0;
 	u64 mmu_l2_bm = 0;
@@ -601,7 +614,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 				mmu_l2_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
-				jm_bm |= *blk_map;
+				fe_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
 				break;
@@ -613,7 +626,7 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
-				jm_bm |= *blk_map;
+				fe_bm |= *blk_map;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
 				tiler_bm |= *blk_map;
@@ -635,8 +648,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(
 		}
 	}
 
-	dst->jm_bm =
-		kbasep_hwcnt_backend_gpu_block_map_to_physical(jm_bm, 0);
+	dst->fe_bm =
+		kbasep_hwcnt_backend_gpu_block_map_to_physical(fe_bm, 0);
 	dst->shader_bm =
 		kbasep_hwcnt_backend_gpu_block_map_to_physical(shader_bm, 0);
 	dst->tiler_bm =
@@ -653,7 +666,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 	const struct kbase_hwcnt_metadata *metadata;
 
 	u64 ignored_hi;
-	u64 jm_bm;
+	u64 fe_bm;
 	u64 shader_bm;
 	u64 tiler_bm;
 	u64 mmu_l2_bm;
@@ -665,7 +678,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 	metadata = dst->metadata;
 
 	kbasep_hwcnt_backend_gpu_block_map_from_physical(
-		src->jm_bm, &jm_bm, &ignored_hi);
+		src->fe_bm, &fe_bm, &ignored_hi);
 	kbasep_hwcnt_backend_gpu_block_map_from_physical(
 		src->shader_bm, &shader_bm, &ignored_hi);
 	kbasep_hwcnt_backend_gpu_block_map_from_physical(
@@ -698,7 +711,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 				*blk_map = mmu_l2_bm;
 				break;
 			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_JM:
-				*blk_map = jm_bm;
+				*blk_map = fe_bm;
 				break;
 			case KBASE_HWCNT_GPU_V4_BLOCK_TYPE_RESERVED:
 				break;
@@ -710,7 +723,7 @@ void kbase_hwcnt_gpu_enable_map_from_physical(
 			WARN_ON(blk_val_cnt != KBASE_HWCNT_V5_VALUES_PER_BLOCK);
 			switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_JM:
-				*blk_map = jm_bm;
+				*blk_map = fe_bm;
 				break;
 			case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
 				*blk_map = tiler_bm;
diff --git a/mali_kbase/mali_kbase_hwcnt_gpu.h b/mali_kbase/mali_kbase_hwcnt_gpu.h
index 12891e0..13c1af3 100644
--- a/mali_kbase/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/mali_kbase_hwcnt_gpu.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -80,13 +80,13 @@ enum kbase_hwcnt_gpu_v5_block_type {
 /**
  * struct kbase_hwcnt_physical_enable_map - Representation of enable map
  *                                          directly used by GPU.
- * @jm_bm:     Job Manager counters selection bitmask.
+ * @fe_bm:     Front end (JM/CSHW) counters selection bitmask.
  * @shader_bm: Shader counters selection bitmask.
  * @tiler_bm:  Tiler counters selection bitmask.
  * @mmu_l2_bm: MMU_L2 counters selection bitmask.
  */
 struct kbase_hwcnt_physical_enable_map {
-	u32 jm_bm;
+	u32 fe_bm;
 	u32 shader_bm;
 	u32 tiler_bm;
 	u32 mmu_l2_bm;
@@ -96,6 +96,7 @@ struct kbase_hwcnt_physical_enable_map {
  * struct kbase_hwcnt_gpu_v4_info - Information about hwcnt blocks on v4 GPUs.
  * @cg_count: Core group count.
  * @cgs:      Non-NULL pointer to array of cg_count coherent group structures.
+ * @clk_cnt:  Number of clock domains available.
  *
  * V4 devices are Mali-T6xx or Mali-T72x, and have one or more core groups,
  * where each core group may have a physically different layout.
@@ -103,16 +104,19 @@ struct kbase_hwcnt_physical_enable_map {
 struct kbase_hwcnt_gpu_v4_info {
 	size_t cg_count;
 	const struct mali_base_gpu_coherent_group *cgs;
+	u8 clk_cnt;
 };
 
 /**
  * struct kbase_hwcnt_gpu_v5_info - Information about hwcnt blocks on v5 GPUs.
  * @l2_count:   L2 cache count.
  * @core_mask:  Shader core mask. May be sparse.
+ * @clk_cnt:    Number of clock domains available.
  */
 struct kbase_hwcnt_gpu_v5_info {
 	size_t l2_count;
 	u64 core_mask;
+	u8 clk_cnt;
 };
 
 /**
diff --git a/mali_kbase/mali_kbase_hwcnt_legacy.c b/mali_kbase/mali_kbase_hwcnt_legacy.c
index b0e6aee..794ef39 100644
--- a/mali_kbase/mali_kbase_hwcnt_legacy.c
+++ b/mali_kbase/mali_kbase_hwcnt_legacy.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -69,7 +69,7 @@ int kbase_hwcnt_legacy_client_create(
 		goto error;
 
 	/* Translate from the ioctl enable map to the internal one */
-	phys_em.jm_bm = enable->jm_bm;
+	phys_em.fe_bm = enable->fe_bm;
 	phys_em.shader_bm = enable->shader_bm;
 	phys_em.tiler_bm = enable->tiler_bm;
 	phys_em.mmu_l2_bm = enable->mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_hwcnt_reader.h b/mali_kbase/mali_kbase_hwcnt_reader.h
index 10706b8..8cd3835 100644
--- a/mali_kbase/mali_kbase_hwcnt_reader.h
+++ b/mali_kbase/mali_kbase_hwcnt_reader.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,31 +23,53 @@
 #ifndef _KBASE_HWCNT_READER_H_
 #define _KBASE_HWCNT_READER_H_
 
+#include <stddef.h>
+
 /* The ids of ioctl commands. */
 #define KBASE_HWCNT_READER 0xBE
 #define KBASE_HWCNT_READER_GET_HWVER       _IOR(KBASE_HWCNT_READER, 0x00, u32)
 #define KBASE_HWCNT_READER_GET_BUFFER_SIZE _IOR(KBASE_HWCNT_READER, 0x01, u32)
 #define KBASE_HWCNT_READER_DUMP            _IOW(KBASE_HWCNT_READER, 0x10, u32)
 #define KBASE_HWCNT_READER_CLEAR           _IOW(KBASE_HWCNT_READER, 0x11, u32)
-#define KBASE_HWCNT_READER_GET_BUFFER      _IOR(KBASE_HWCNT_READER, 0x20,\
+#define KBASE_HWCNT_READER_GET_BUFFER      _IOC(_IOC_READ, KBASE_HWCNT_READER, 0x20,\
+		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_GET_BUFFER_WITH_CYCLES      _IOR(KBASE_HWCNT_READER, 0x20,\
 		struct kbase_hwcnt_reader_metadata)
-#define KBASE_HWCNT_READER_PUT_BUFFER      _IOW(KBASE_HWCNT_READER, 0x21,\
+#define KBASE_HWCNT_READER_PUT_BUFFER      _IOC(_IOC_WRITE, KBASE_HWCNT_READER, 0x21,\
+		offsetof(struct kbase_hwcnt_reader_metadata, cycles))
+#define KBASE_HWCNT_READER_PUT_BUFFER_WITH_CYCLES      _IOW(KBASE_HWCNT_READER, 0x21,\
 		struct kbase_hwcnt_reader_metadata)
 #define KBASE_HWCNT_READER_SET_INTERVAL    _IOW(KBASE_HWCNT_READER, 0x30, u32)
 #define KBASE_HWCNT_READER_ENABLE_EVENT    _IOW(KBASE_HWCNT_READER, 0x40, u32)
 #define KBASE_HWCNT_READER_DISABLE_EVENT   _IOW(KBASE_HWCNT_READER, 0x41, u32)
 #define KBASE_HWCNT_READER_GET_API_VERSION _IOW(KBASE_HWCNT_READER, 0xFF, u32)
+#define KBASE_HWCNT_READER_GET_API_VERSION_WITH_FEATURES \
+		_IOW(KBASE_HWCNT_READER, 0xFF, \
+		     struct kbase_hwcnt_reader_api_version)
+
+/**
+ * struct kbase_hwcnt_reader_metadata_cycles - GPU clock cycles
+ * @top:           the number of cycles associated with the main clock for the
+ *                 GPU
+ * @shader_cores:  the cycles that have elapsed on the GPU shader cores
+ */
+struct kbase_hwcnt_reader_metadata_cycles {
+	u64 top;
+	u64 shader_cores;
+};
 
 /**
  * struct kbase_hwcnt_reader_metadata - hwcnt reader sample buffer metadata
  * @timestamp:  time when sample was collected
  * @event_id:   id of an event that triggered sample collection
  * @buffer_idx: position in sampling area where sample buffer was stored
+ * @cycles:     the GPU cycles that occurred since the last sample
  */
 struct kbase_hwcnt_reader_metadata {
 	u64 timestamp;
 	u32 event_id;
 	u32 buffer_idx;
+	struct kbase_hwcnt_reader_metadata_cycles cycles;
 };
 
 /**
@@ -67,5 +89,18 @@ enum base_hwcnt_reader_event {
 	BASE_HWCNT_READER_EVENT_COUNT
 };
 
+/**
+ * struct kbase_hwcnt_reader_api_version - hwcnt reader API version
+ * @versoin:  API version
+ * @features: available features in this API version
+ */
+#define KBASE_HWCNT_READER_API_VERSION_NO_FEATURE                  (0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP          (1 << 0)
+#define KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES (1 << 1)
+struct kbase_hwcnt_reader_api_version {
+	u32 version;
+	u32 features;
+};
+
 #endif /* _KBASE_HWCNT_READER_H_ */
 
diff --git a/mali_kbase/mali_kbase_hwcnt_types.c b/mali_kbase/mali_kbase_hwcnt_types.c
index 1e9efde..73ea609 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.c
+++ b/mali_kbase/mali_kbase_hwcnt_types.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,6 +55,10 @@ int kbase_hwcnt_metadata_create(
 	if (!desc || !out_metadata)
 		return -EINVAL;
 
+	/* The maximum number of clock domains is 64. */
+	if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE))
+		return -EINVAL;
+
 	/* Calculate the bytes needed to tightly pack the metadata */
 
 	/* Top level metadata */
@@ -158,6 +162,7 @@ int kbase_hwcnt_metadata_create(
 		enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
 	metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
 	metadata->avail_mask = desc->avail_mask;
+	metadata->clk_cnt = desc->clk_cnt;
 
 	WARN_ON(size != offset);
 	/* Due to the block alignment, there should be exactly one enable map
@@ -187,12 +192,17 @@ int kbase_hwcnt_enable_map_alloc(
 	if (!metadata || !enable_map)
 		return -EINVAL;
 
-	enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
-	if (!enable_map_buf)
-		return -ENOMEM;
+	if (metadata->enable_map_bytes > 0) {
+		enable_map_buf =
+			kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+		if (!enable_map_buf)
+			return -ENOMEM;
+	} else {
+		enable_map_buf = NULL;
+	}
 
 	enable_map->metadata = metadata;
-	enable_map->enable_map = enable_map_buf;
+	enable_map->hwcnt_enable_map = enable_map_buf;
 	return 0;
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_alloc);
@@ -202,8 +212,8 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
 	if (!enable_map)
 		return;
 
-	kfree(enable_map->enable_map);
-	enable_map->enable_map = NULL;
+	kfree(enable_map->hwcnt_enable_map);
+	enable_map->hwcnt_enable_map = NULL;
 	enable_map->metadata = NULL;
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_enable_map_free);
@@ -212,17 +222,25 @@ int kbase_hwcnt_dump_buffer_alloc(
 	const struct kbase_hwcnt_metadata *metadata,
 	struct kbase_hwcnt_dump_buffer *dump_buf)
 {
-	u32 *buf;
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
+	u8 *buf;
 
 	if (!metadata || !dump_buf)
 		return -EINVAL;
 
-	buf = kmalloc(metadata->dump_buf_bytes, GFP_KERNEL);
+	dump_buf_bytes = metadata->dump_buf_bytes;
+	clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+
+	/* Make a single allocation for both dump_buf and clk_cnt_buf. */
+	buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
 	dump_buf->metadata = metadata;
-	dump_buf->dump_buf = buf;
+	dump_buf->dump_buf = (u32 *)buf;
+	dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
+
 	return 0;
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_alloc);
@@ -246,10 +264,16 @@ int kbase_hwcnt_dump_buffer_array_alloc(
 	size_t buf_idx;
 	unsigned int order;
 	unsigned long addr;
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
 
 	if (!metadata || !dump_bufs)
 		return -EINVAL;
 
+	dump_buf_bytes = metadata->dump_buf_bytes;
+	clk_cnt_buf_bytes =
+		sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
+
 	/* Allocate memory for the dump buffer struct array */
 	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
 	if (!buffers)
@@ -258,7 +282,7 @@ int kbase_hwcnt_dump_buffer_array_alloc(
 	/* Allocate pages for the actual dump buffers, as they tend to be fairly
 	 * large.
 	 */
-	order = get_order(metadata->dump_buf_bytes * n);
+	order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
 	addr = __get_free_pages(GFP_KERNEL, order);
 
 	if (!addr) {
@@ -273,10 +297,14 @@ int kbase_hwcnt_dump_buffer_array_alloc(
 
 	/* Set the buffer of each dump buf */
 	for (buf_idx = 0; buf_idx < n; buf_idx++) {
-		const size_t offset = metadata->dump_buf_bytes * buf_idx;
+		const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+		const size_t clk_cnt_buf_offset =
+			(dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
 
 		buffers[buf_idx].metadata = metadata;
-		buffers[buf_idx].dump_buf = (u32 *)(addr + offset);
+		buffers[buf_idx].dump_buf = (u32 *)(addr + dump_buf_offset);
+		buffers[buf_idx].clk_cnt_buf =
+			(u64 *)(addr + clk_cnt_buf_offset);
 	}
 
 	return 0;
@@ -324,6 +352,9 @@ void kbase_hwcnt_dump_buffer_zero(
 
 		kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
 	}
+
+	memset(dst->clk_cnt_buf, 0,
+		sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero);
 
@@ -334,6 +365,9 @@ void kbase_hwcnt_dump_buffer_zero_strict(
 		return;
 
 	memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+
+	memset(dst->clk_cnt_buf, 0,
+		sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_zero_strict);
 
@@ -384,6 +418,7 @@ void kbase_hwcnt_dump_buffer_copy(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t grp, blk, blk_inst;
+	size_t clk;
 
 	if (WARN_ON(!dst) ||
 	    WARN_ON(!src) ||
@@ -413,6 +448,12 @@ void kbase_hwcnt_dump_buffer_copy(
 
 		kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
 	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+		if (kbase_hwcnt_clk_enable_map_enabled(
+			dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
+	}
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy);
 
@@ -423,6 +464,7 @@ void kbase_hwcnt_dump_buffer_copy_strict(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t grp, blk, blk_inst;
+	size_t clk;
 
 	if (WARN_ON(!dst) ||
 	    WARN_ON(!src) ||
@@ -451,6 +493,14 @@ void kbase_hwcnt_dump_buffer_copy_strict(
 		kbase_hwcnt_dump_buffer_block_copy_strict(
 			dst_blk, src_blk, blk_em, val_cnt);
 	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+		bool clk_enabled =
+			kbase_hwcnt_clk_enable_map_enabled(
+				dst_enable_map->clk_enable_map, clk);
+
+		dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+	}
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_copy_strict);
 
@@ -461,6 +511,7 @@ void kbase_hwcnt_dump_buffer_accumulate(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t grp, blk, blk_inst;
+	size_t clk;
 
 	if (WARN_ON(!dst) ||
 	    WARN_ON(!src) ||
@@ -494,6 +545,12 @@ void kbase_hwcnt_dump_buffer_accumulate(
 		kbase_hwcnt_dump_buffer_block_accumulate(
 			dst_blk, src_blk, hdr_cnt, ctr_cnt);
 	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+		if (kbase_hwcnt_clk_enable_map_enabled(
+			dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+	}
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate);
 
@@ -504,6 +561,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	size_t grp, blk, blk_inst;
+	size_t clk;
 
 	if (WARN_ON(!dst) ||
 	    WARN_ON(!src) ||
@@ -534,5 +592,13 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(
 		kbase_hwcnt_dump_buffer_block_accumulate_strict(
 			dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt);
 	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
+		if (kbase_hwcnt_clk_enable_map_enabled(
+			dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+		else
+			dst->clk_cnt_buf[clk] = 0;
+	}
 }
 KBASE_EXPORT_TEST_API(kbase_hwcnt_dump_buffer_accumulate_strict);
diff --git a/mali_kbase/mali_kbase_hwcnt_types.h b/mali_kbase/mali_kbase_hwcnt_types.h
index 4d78c84..6a2640f 100644
--- a/mali_kbase/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/mali_kbase_hwcnt_types.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2018 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -136,11 +136,13 @@ struct kbase_hwcnt_group_description {
  * @grps:       Non-NULL pointer to an array of grp_cnt group descriptions,
  *              describing each Hardware Counter Group in the system.
  * @avail_mask: Flat Availability Mask for all block instances in the system.
+ * @clk_cnt:    The number of clock domains in the system. The maximum is 64.
  */
 struct kbase_hwcnt_description {
 	size_t grp_cnt;
 	const struct kbase_hwcnt_group_description *grps;
 	u64 avail_mask;
+	u8 clk_cnt;
 };
 
 /**
@@ -220,6 +222,7 @@ struct kbase_hwcnt_group_metadata {
  * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
  * @dump_buf_bytes:   The size in bytes of a Dump Buffer needed for the system.
  * @avail_mask:       The Availability Mask for the system.
+ * @clk_cnt:          The number of clock domains in the system.
  */
 struct kbase_hwcnt_metadata {
 	size_t grp_cnt;
@@ -227,6 +230,7 @@ struct kbase_hwcnt_metadata {
 	size_t enable_map_bytes;
 	size_t dump_buf_bytes;
 	u64 avail_mask;
+	u8 clk_cnt;
 };
 
 /**
@@ -234,13 +238,16 @@ struct kbase_hwcnt_metadata {
  *                                 bitfields.
  * @metadata:   Non-NULL pointer to metadata used to identify, and to describe
  *              the layout of the enable map.
- * @enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an array
- *              of u64 bitfields, each bit of which enables one hardware
+ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an
+ *              array of u64 bitfields, each bit of which enables one hardware
  *              counter.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ *              counter for a given clock domain.
  */
 struct kbase_hwcnt_enable_map {
 	const struct kbase_hwcnt_metadata *metadata;
-	u64 *enable_map;
+	u64 *hwcnt_enable_map;
+	u64 clk_enable_map;
 };
 
 /**
@@ -250,10 +257,13 @@ struct kbase_hwcnt_enable_map {
  *            the layout of the Dump Buffer.
  * @dump_buf: Non-NULL pointer of size metadata->dump_buf_bytes to an array
  *            of u32 values.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ *               for each clock domain.
  */
 struct kbase_hwcnt_dump_buffer {
 	const struct kbase_hwcnt_metadata *metadata;
 	u32 *dump_buf;
+	u64 *clk_cnt_buf;
 };
 
 /**
@@ -473,7 +483,7 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
  *         block instance.
  */
 #define kbase_hwcnt_enable_map_block_instance(map, grp, blk, blk_inst) \
-	((map)->enable_map + \
+	((map)->hwcnt_enable_map + \
 	 (map)->metadata->grp_metadata[(grp)].enable_map_index + \
 	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_index + \
 	 (map)->metadata->grp_metadata[(grp)].blk_metadata[(blk)].enable_map_stride * (blk_inst))
@@ -520,7 +530,11 @@ static inline void kbase_hwcnt_enable_map_block_disable_all(
 static inline void kbase_hwcnt_enable_map_disable_all(
 	struct kbase_hwcnt_enable_map *dst)
 {
-	memset(dst->enable_map, 0, dst->metadata->enable_map_bytes);
+	if (dst->hwcnt_enable_map != NULL)
+		memset(dst->hwcnt_enable_map, 0,
+		       dst->metadata->enable_map_bytes);
+
+	dst->clk_enable_map = 0;
 }
 
 /**
@@ -569,6 +583,8 @@ static inline void kbase_hwcnt_enable_map_enable_all(
 	kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
 		kbase_hwcnt_enable_map_block_enable_all(
 			dst, grp, blk, blk_inst);
+
+	dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
 }
 
 /**
@@ -582,9 +598,13 @@ static inline void kbase_hwcnt_enable_map_copy(
 	struct kbase_hwcnt_enable_map *dst,
 	const struct kbase_hwcnt_enable_map *src)
 {
-	memcpy(dst->enable_map,
-	       src->enable_map,
-	       dst->metadata->enable_map_bytes);
+	if (dst->hwcnt_enable_map != NULL) {
+		memcpy(dst->hwcnt_enable_map,
+		       src->hwcnt_enable_map,
+		       dst->metadata->enable_map_bytes);
+	}
+
+	dst->clk_enable_map = src->clk_enable_map;
 }
 
 /**
@@ -602,8 +622,12 @@ static inline void kbase_hwcnt_enable_map_union(
 		dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
 	size_t i;
 
-	for (i = 0; i < bitfld_count; i++)
-		dst->enable_map[i] |= src->enable_map[i];
+	if (dst->hwcnt_enable_map != NULL) {
+		for (i = 0; i < bitfld_count; i++)
+			dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i];
+	}
+
+	dst->clk_enable_map |= src->clk_enable_map;
 }
 
 /**
@@ -656,6 +680,12 @@ static inline bool kbase_hwcnt_enable_map_any_enabled(
 	const struct kbase_hwcnt_enable_map *enable_map)
 {
 	size_t grp, blk, blk_inst;
+	const u64 clk_enable_map_mask =
+		(1ull << enable_map->metadata->clk_cnt) - 1;
+
+	if (enable_map->metadata->clk_cnt > 0 &&
+		(enable_map->clk_enable_map & clk_enable_map_mask))
+		return true;
 
 	kbase_hwcnt_metadata_for_each_block(
 		enable_map->metadata, grp, blk, blk_inst) {
@@ -1084,4 +1114,29 @@ static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(
 	}
 }
 
+/**
+ * @brief Iterate over each clock domain in the metadata.
+ *
+ * @param[in] md          Non-NULL pointer to metadata.
+ * @param[in] clk         size_t variable used as clock iterator.
+ */
+#define kbase_hwcnt_metadata_for_each_clock(md, clk)    \
+	for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
+
+/**
+ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled
+ *                                        in clk_enable_map.
+ * @clk_enable_map: An enable map for clock domains.
+ * @index:          Index of the enable map for clock domain.
+ *
+ * Return: true if the index of the clock domain is enabled, else false.
+ */
+static inline bool kbase_hwcnt_clk_enable_map_enabled(
+	const u64 clk_enable_map, const size_t index)
+{
+	if (clk_enable_map & (1ull << index))
+		return true;
+	return false;
+}
+
 #endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/mali_kbase_ioctl.h b/mali_kbase/mali_kbase_ioctl.h
index 977b194..17e7601 100644
--- a/mali_kbase/mali_kbase_ioctl.h
+++ b/mali_kbase/mali_kbase_ioctl.h
@@ -166,7 +166,7 @@ struct kbase_ioctl_mem_free {
 /**
  * struct kbase_ioctl_hwcnt_reader_setup - Setup HWC dumper/reader
  * @buffer_count: requested number of dumping buffers
- * @jm_bm:        counters selection bitmask (JM)
+ * @fe_bm:        counters selection bitmask (Front end)
  * @shader_bm:    counters selection bitmask (Shader)
  * @tiler_bm:     counters selection bitmask (Tiler)
  * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
@@ -175,7 +175,7 @@ struct kbase_ioctl_mem_free {
  */
 struct kbase_ioctl_hwcnt_reader_setup {
 	__u32 buffer_count;
-	__u32 jm_bm;
+	__u32 fe_bm;
 	__u32 shader_bm;
 	__u32 tiler_bm;
 	__u32 mmu_l2_bm;
@@ -187,14 +187,14 @@ struct kbase_ioctl_hwcnt_reader_setup {
 /**
  * struct kbase_ioctl_hwcnt_enable - Enable hardware counter collection
  * @dump_buffer:  GPU address to write counters to
- * @jm_bm:        counters selection bitmask (JM)
+ * @fe_bm:        counters selection bitmask (Front end)
  * @shader_bm:    counters selection bitmask (Shader)
  * @tiler_bm:     counters selection bitmask (Tiler)
  * @mmu_l2_bm:    counters selection bitmask (MMU_L2)
  */
 struct kbase_ioctl_hwcnt_enable {
 	__u64 dump_buffer;
-	__u32 jm_bm;
+	__u32 fe_bm;
 	__u32 shader_bm;
 	__u32 tiler_bm;
 	__u32 mmu_l2_bm;
diff --git a/mali_kbase/mali_kbase_jd.c b/mali_kbase/mali_kbase_jd.c
index 1a830dd..8f22ceb 100644
--- a/mali_kbase/mali_kbase_jd.c
+++ b/mali_kbase/mali_kbase_jd.c
@@ -32,6 +32,7 @@
 #include <linux/ratelimit.h>
 
 #include <mali_kbase_jm.h>
+#include <mali_kbase_kinstr_jm.h>
 #include <mali_kbase_hwaccess_jm.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <mali_linux_trace.h>
@@ -39,6 +40,8 @@
 #include "mali_kbase_dma_fence.h"
 #include <mali_kbase_cs_experimental.h>
 
+#include <mali_kbase_caps.h>
+
 #define beenthere(kctx, f, a...)  dev_dbg(kctx->kbdev->dev, "%s:" f, __func__, ##a)
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
@@ -52,11 +55,6 @@
 			((katom->core_req & BASE_JD_REQ_ATOM_TYPE) ==    \
 							BASE_JD_REQ_DEP)))
 
-/* Minimum API version that supports the just-in-time memory allocation pressure
- * limit feature.
- */
-#define MIN_API_VERSION_WITH_JPL KBASE_API_VERSION(11, 20)
-
 /*
  * This is the kernel side of the API. Only entry points are:
  * - kbase_jd_submit(): Called from userspace to submit a single bag
@@ -77,6 +75,15 @@ get_compat_pointer(struct kbase_context *kctx, const u64 p)
 	return u64_to_user_ptr(p);
 }
 
+/* Mark an atom as complete, and trace it in kinstr_jm */
+static void jd_mark_atom_complete(struct kbase_jd_atom *katom)
+{
+	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
+	kbase_kinstr_jm_atom_complete(katom);
+	dev_dbg(katom->kctx->kbdev->dev, "Atom %p status to completed\n",
+		(void *)katom);
+}
+
 /* Runs an atom, either by handing to the JS or by immediately running it in the case of soft-jobs
  *
  * Returns whether the JS needs a reschedule.
@@ -97,24 +104,18 @@ static bool jd_run_atom(struct kbase_jd_atom *katom)
 		/* Dependency only atom */
 		trace_sysgraph(SGR_SUBMIT, kctx->id,
 				kbase_jd_atom_id(katom->kctx, katom));
-		katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-		dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
-			(void *)katom);
+		jd_mark_atom_complete(katom);
 		return 0;
 	} else if (katom->core_req & BASE_JD_REQ_SOFT_JOB) {
 		/* Soft-job */
 		if (katom->will_fail_event_code) {
 			kbase_finish_soft_job(katom);
-			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-			dev_dbg(kctx->kbdev->dev,
-				"Atom %p status to completed\n", (void *)katom);
+			jd_mark_atom_complete(katom);
 			return 0;
 		}
 		if (kbase_process_soft_job(katom) == 0) {
 			kbase_finish_soft_job(katom);
-			katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-			dev_dbg(kctx->kbdev->dev,
-				"Atom %p status to completed\n", (void *)katom);
+			jd_mark_atom_complete(katom);
 		}
 		return 0;
 	}
@@ -205,7 +206,7 @@ static void kbase_jd_post_external_resources(struct kbase_jd_atom *katom)
  * jctx.lock must be held when this is called.
  */
 
-static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom_v2 *user_atom)
+static int kbase_jd_pre_external_resources(struct kbase_jd_atom *katom, const struct base_jd_atom *user_atom)
 {
 	int err_ret_val = -EINVAL;
 	u32 res_no;
@@ -465,8 +466,6 @@ static inline void jd_resolve_dep(struct list_head *out_list,
 	}
 }
 
-KBASE_EXPORT_TEST_API(jd_resolve_dep);
-
 /**
  * is_dep_valid - Validate that a dependency is valid for early dependency
  *                submission
@@ -558,7 +557,7 @@ static void jd_try_submitting_deps(struct list_head *out_list,
 	}
 }
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
  * jd_update_jit_usage - Update just-in-time physical memory usage for an atom.
  *
@@ -698,7 +697,7 @@ static void jd_update_jit_usage(struct kbase_jd_atom *katom)
 
 	kbase_jit_retry_pending_alloc(kctx);
 }
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 /*
  * Perform the necessary handling of an atom that has finished running
@@ -723,9 +722,10 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 
 	KBASE_DEBUG_ASSERT(katom->status != KBASE_JD_ATOM_STATE_UNUSED);
 
-#if MALI_JIT_PRESSURE_LIMIT
-	jd_update_jit_usage(katom);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (kbase_ctx_flag(kctx, KCTX_JPL_ENABLED))
+		jd_update_jit_usage(katom);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	/* This is needed in case an atom is failed due to being invalid, this
 	 * can happen *before* the jobs that the atom depends on have completed */
@@ -736,9 +736,7 @@ bool jd_done_nolock(struct kbase_jd_atom *katom,
 		}
 	}
 
-	katom->status = KBASE_JD_ATOM_STATE_COMPLETED;
-	dev_dbg(kctx->kbdev->dev, "Atom %p status to completed\n",
-		(void *)katom);
+	jd_mark_atom_complete(katom);
 	list_add_tail(&katom->jd_item, &completed_jobs);
 
 	while (!list_empty(&completed_jobs)) {
@@ -870,8 +868,23 @@ static const char *kbasep_map_core_reqs_to_string(base_jd_core_req core_req)
 }
 #endif
 
+/* Trace an atom submission. */
+static void jd_trace_atom_submit(struct kbase_context *const kctx,
+				 struct kbase_jd_atom *const katom,
+				 int *priority)
+{
+	struct kbase_device *const kbdev = kctx->kbdev;
+
+	KBASE_TLSTREAM_TL_NEW_ATOM(kbdev, katom, kbase_jd_atom_id(kctx, katom));
+	KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+	if (priority)
+		KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, *priority);
+	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
+	kbase_kinstr_jm_atom_queue(katom);
+}
+
 static bool jd_submit_atom(struct kbase_context *const kctx,
-	const struct base_jd_atom_v2 *const user_atom,
+	const struct base_jd_atom *const user_atom,
 	const struct base_jd_fragment *const user_jc_incr,
 	struct kbase_jd_atom *const katom)
 {
@@ -901,6 +914,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	katom->jc = user_atom->jc;
 	katom->core_req = user_atom->core_req;
 	katom->jobslot = user_atom->jobslot;
+	katom->seq_nr = user_atom->seq_nr;
 	katom->atom_flags = 0;
 	katom->retry_count = 0;
 	katom->need_cache_flush_cores_retained = 0;
@@ -913,19 +927,19 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 
 	trace_sysgraph(SGR_ARRIVE, kctx->id, user_atom->atom_number);
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	/* Older API version atoms might have random values where jit_id now
 	 * lives, but we must maintain backwards compatibility - handle the
 	 * issue.
 	 */
-	if (kctx->api_version < MIN_API_VERSION_WITH_JPL) {
+	if (!mali_kbase_supports_jit_pressure_limit(kctx->api_version)) {
 		katom->jit_ids[0] = 0;
 		katom->jit_ids[1] = 0;
 	} else {
 		katom->jit_ids[0] = user_atom->jit_id[0];
 		katom->jit_ids[1] = user_atom->jit_id[1];
 	}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	katom->renderpass_id = user_atom->renderpass_id;
 
@@ -961,17 +975,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 				/* Wrong dependency setup. Atom will be sent
 				 * back to user space. Do not record any
 				 * dependencies. */
-				KBASE_TLSTREAM_TL_NEW_ATOM(
-						kbdev,
-						katom,
-						kbase_jd_atom_id(kctx, katom));
-				KBASE_TLSTREAM_TL_RET_ATOM_CTX(
-						kbdev,
-						katom, kctx);
-				KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(
-						kbdev,
-						katom,
-						TL_ATOM_STATE_IDLE);
+				jd_trace_atom_submit(kctx, katom, NULL);
 
 				return jd_done_nolock(katom, NULL);
 			}
@@ -1013,13 +1017,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 			/* This atom will be sent back to user space.
 			 * Do not record any dependencies.
 			 */
-			KBASE_TLSTREAM_TL_NEW_ATOM(
-					kbdev,
-					katom,
-					kbase_jd_atom_id(kctx, katom));
-			KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
-			KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom,
-					TL_ATOM_STATE_IDLE);
+			jd_trace_atom_submit(kctx, katom, NULL);
 
 			will_fail = true;
 
@@ -1078,13 +1076,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 	katom->sched_priority = sched_prio;
 
 	/* Create a new atom. */
-	KBASE_TLSTREAM_TL_NEW_ATOM(
-			kbdev,
-			katom,
-			kbase_jd_atom_id(kctx, katom));
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_STATE(kbdev, katom, TL_ATOM_STATE_IDLE);
-	KBASE_TLSTREAM_TL_ATTRIB_ATOM_PRIORITY(kbdev, katom, katom->sched_priority);
-	KBASE_TLSTREAM_TL_RET_ATOM_CTX(kbdev, katom, kctx);
+	jd_trace_atom_submit(kctx, katom, &katom->sched_priority);
 
 #if !MALI_INCREMENTAL_RENDERING
 	/* Reject atoms for incremental rendering if not supported */
@@ -1151,8 +1143,8 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		}
 	}
 
-#if !MALI_JIT_PRESSURE_LIMIT
-	if ((kctx->api_version >= MIN_API_VERSION_WITH_JPL) &&
+#if !MALI_JIT_PRESSURE_LIMIT_BASE
+	if (mali_kbase_supports_jit_pressure_limit(kctx->api_version) &&
 		(user_atom->jit_id[0] || user_atom->jit_id[1])) {
 		/* JIT pressure limit is disabled, but we are receiving non-0
 		 * JIT IDs - atom is invalid.
@@ -1160,7 +1152,7 @@ static bool jd_submit_atom(struct kbase_context *const kctx,
 		katom->event_code = BASE_JD_EVENT_JOB_INVALID;
 		return jd_done_nolock(katom, NULL);
 	}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	/* Validate the atom. Function will return error if the atom is
 	 * malformed.
@@ -1233,6 +1225,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	struct kbase_device *kbdev;
 	u32 latest_flush;
 
+	bool jd_atom_is_v2 = (stride == sizeof(struct base_jd_atom_v2) ||
+	                      stride == offsetof(struct base_jd_atom_v2, renderpass_id));
+
 	/*
 	 * kbase_jd_submit isn't expected to fail and so all errors with the
 	 * jobs are reported by immediately failing them (through event system)
@@ -1247,7 +1242,9 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	}
 
 	if (stride != offsetof(struct base_jd_atom_v2, renderpass_id) &&
-		stride != sizeof(struct base_jd_atom_v2)) {
+		stride != sizeof(struct base_jd_atom_v2) &&
+		stride != offsetof(struct base_jd_atom, renderpass_id) &&
+		stride != sizeof(struct base_jd_atom)) {
 		dev_err(kbdev->dev,
 			"Stride %u passed to job_submit isn't supported by the kernel\n",
 			stride);
@@ -1258,16 +1255,29 @@ int kbase_jd_submit(struct kbase_context *kctx,
 	latest_flush = kbase_backend_get_current_flush_id(kbdev);
 
 	for (i = 0; i < nr_atoms; i++) {
-		struct base_jd_atom_v2 user_atom;
+		struct base_jd_atom user_atom;
 		struct base_jd_fragment user_jc_incr;
 		struct kbase_jd_atom *katom;
 
-		if (copy_from_user(&user_atom, user_addr, stride) != 0) {
-			dev_err(kbdev->dev,
-				"Invalid atom address %p passed to job_submit\n",
-				user_addr);
-			err = -EFAULT;
-			break;
+		if (unlikely(jd_atom_is_v2)) {
+			if (copy_from_user(&user_atom.jc, user_addr, sizeof(struct base_jd_atom_v2)) != 0) {
+				dev_err(kbdev->dev,
+					"Invalid atom address %p passed to job_submit\n",
+					user_addr);
+				err = -EFAULT;
+				break;
+			}
+
+			/* no seq_nr in v2 */
+			user_atom.seq_nr = 0;
+		} else {
+			if (copy_from_user(&user_atom, user_addr, stride) != 0) {
+				dev_err(kbdev->dev,
+					"Invalid atom address %p passed to job_submit\n",
+					user_addr);
+				err = -EFAULT;
+				break;
+			}
 		}
 
 		if (stride == offsetof(struct base_jd_atom_v2, renderpass_id)) {
diff --git a/mali_kbase/mali_kbase_kinstr_jm.c b/mali_kbase/mali_kbase_kinstr_jm.c
new file mode 100644
index 0000000..1e91a7c
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm.c
@@ -0,0 +1,896 @@
+/*
+ *
+ * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm.c
+ * Kernel driver public interface to job manager atom tracing
+ */
+
+#include "mali_kbase_kinstr_jm.h"
+#include "mali_kbase_kinstr_jm_reader.h"
+
+#include "mali_kbase.h"
+#include "mali_kbase_linux.h"
+
+#include <mali_kbase_jm_rb.h>
+
+#include <asm/barrier.h>
+#include <linux/anon_inodes.h>
+#include <linux/circ_buf.h>
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/ktime.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/rculist_bl.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/version.h>
+#include <linux/wait.h>
+
+#if KERNEL_VERSION(5, 1, 0) <= LINUX_VERSION_CODE
+#include <linux/build_bug.h>
+#else
+// Stringify the expression if no message is given.
+#define static_assert(e, ...)  __static_assert(e, #__VA_ARGS__, #e)
+#define __static_assert(e, msg, ...) _Static_assert(e, msg)
+#endif
+
+#if KERNEL_VERSION(4, 16, 0) >= LINUX_VERSION_CODE
+typedef unsigned int __poll_t;
+#endif
+
+#ifndef ENOTSUP
+#define ENOTSUP EOPNOTSUPP
+#endif
+
+/* The module printing prefix */
+#define PR_ "mali_kbase_kinstr_jm: "
+
+/* Allows us to perform ASM goto for the tracing
+ * https://www.kernel.org/doc/Documentation/static-keys.txt
+ */
+#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
+DEFINE_STATIC_KEY_FALSE(basep_kinstr_jm_reader_static_key);
+#else
+struct static_key basep_kinstr_jm_reader_static_key = STATIC_KEY_INIT_FALSE;
+#define static_branch_inc(key) static_key_slow_inc(key)
+#define static_branch_dec(key) static_key_slow_dec(key)
+#endif /* KERNEL_VERSION(4 ,3, 0) <= LINUX_VERSION_CODE */
+
+#define KBASE_KINSTR_JM_VERSION 1
+
+/**
+ * struct kbase_kinstr_jm - The context for the kernel job manager atom tracing
+ * @readers: a bitlocked list of opened readers. Readers are attached to the
+ *           private data of a file descriptor that the user opens with the
+ *           KBASE_IOCTL_KINSTR_JM_FD IO control call.
+ * @refcount: reference count for the context. Any reader will have a link
+ *            back to the context so that they can remove themselves from the
+ *            list.
+ *
+ * This is opaque outside this compilation unit
+ */
+struct kbase_kinstr_jm {
+	struct hlist_bl_head readers;
+	struct kref refcount;
+};
+
+/**
+ * struct kbase_kinstr_jm_atom_state_change - Represents an atom changing to a
+ *                                            new state
+ * @timestamp: Raw monotonic nanoseconds of the state change
+ * @state:     The state that the atom has moved to
+ * @atom:      The atom number that has changed state
+ * @flags:     Flags associated with the state change. See
+ *             KBASE_KINSTR_JM_ATOM_STATE_FLAG_* defines.
+ * @reserved:  Reserved for future use.
+ * @data:      Extra data for the state change. Active member depends on state.
+ *
+ * We can add new fields to the structure and old user code will gracefully
+ * ignore the new fields.
+ *
+ * We can change the size of the structure and old user code will gracefully
+ * skip over the new size via `struct kbase_kinstr_jm_fd_out->size`.
+ *
+ * If we remove fields, the version field in `struct
+ * kbase_kinstr_jm_fd_out->version` will be incremented and old user code will
+ * gracefully fail and tell the user that the kernel API is too new and has
+ * backwards-incompatible changes. Note that one userspace can opt to handle
+ * multiple kernel major versions of the structure.
+ *
+ * If we need to change the _meaning_ of one of the fields, i.e. the state
+ * machine has had a incompatible change, we can keep the same members in the
+ * structure and update the version as above. User code will no longer
+ * recognise that it has the supported field and can gracefully explain to the
+ * user that the kernel API is no longer supported.
+ *
+ * When making changes to this structure, make sure they are either:
+ *  - additions to the end (for minor version bumps (i.e. only a size increase))
+ *  such that the layout of existing fields doesn't change, or;
+ *  - update the version reported to userspace so that it can fail explicitly.
+ */
+struct kbase_kinstr_jm_atom_state_change {
+	u64 timestamp;
+	s8 state; /* enum kbase_kinstr_jm_reader_atom_state */
+	u8 atom;
+	u8 flags;
+	u8 reserved[1];
+	/* Tagged union based on state. Ensure members are aligned correctly! */
+	union {
+		struct {
+			u8 slot;
+		} start;
+		u8 padding[4];
+	} data;
+};
+static_assert(
+	((1 << 8 * sizeof(((struct kbase_kinstr_jm_atom_state_change *)0)->state)) - 1) >=
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT);
+
+#define KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW BIT(0)
+
+/**
+ * struct reader_changes - The circular buffer of kernel atom state changes
+ * @data:      The allocated buffer. This is allocated when the user requests
+ *             the reader file descriptor. It is released when the user calls
+ *             close() on the fd. When accessing this, lock the producer spin
+ *             lock to prevent races on the allocated memory. The consume lock
+ *             does not need to be held because newly-inserted data will always
+ *             be outside the currenly-read range.
+ * @producer:  The producing spinlock which allows us to push changes into the
+ *             buffer at the same time as a user read occurring. This needs to
+ *             be locked when saving/restoring the IRQ because we can receive an
+ *             interrupt from the GPU when an atom completes. The CPU could have
+ *             a task preempted that is holding this lock.
+ * @consumer:  The consuming mutex which locks around the user read().
+ *             Must be held when updating the tail of the circular buffer.
+ * @head:      The head of the circular buffer. Can be used with Linux @c CIRC_
+ *             helpers. The producer should lock and update this with an SMP
+ *             store when a new change lands. The consumer can read with an
+ *             SMP load. This allows the producer to safely insert new changes
+ *             into the circular buffer.
+ * @tail:      The tail of the circular buffer. Can be used with Linux @c CIRC_
+ *             helpers. The producer should do a READ_ONCE load and the consumer
+ *             should SMP store.
+ * @size:      The number of changes that are allowed in @c data. Can be used
+ *             with Linux @c CIRC_ helpers. Will always be a power of two. The
+ *             producer lock should be held when updating this and stored with
+ *             an SMP release memory barrier. This means that the consumer can
+ *             do an SMP load.
+ * @threshold: The number of changes above which threads polling on the reader
+ *             file descriptor will be woken up.
+ */
+struct reader_changes {
+	struct kbase_kinstr_jm_atom_state_change *data;
+	spinlock_t producer;
+	struct mutex consumer;
+	u32 head;
+	u32 tail;
+	u32 size;
+	u32 threshold;
+};
+
+/**
+ * reader_changes_is_valid_size() - Determines if requested changes buffer size
+ *                                  is valid.
+ * @size: The requested memory size
+ *
+ * We have a constraint that the underlying physical buffer must be a
+ * power of two so that we can use the efficient circular buffer helpers that
+ * the kernel provides. It also needs to be representable within a u32.
+ *
+ * Return:
+ * * true  - the size is valid
+ * * false - the size is invalid
+ */
+static inline bool reader_changes_is_valid_size(const size_t size)
+{
+	typedef struct reader_changes changes_t;
+	const size_t elem_size = sizeof(*((changes_t *)0)->data);
+	const size_t size_size = sizeof(((changes_t *)0)->size);
+	const size_t size_max = (1ull << (size_size * 8)) - 1;
+
+	return is_power_of_2(size) && /* Is a power of two */
+	       ((size / elem_size) <= size_max); /* Small enough */
+}
+
+/**
+ * reader_changes_init() - Initializes the reader changes and allocates the
+ *                         changes buffer
+ * @changes: The context pointer, must point to a zero-inited allocated reader
+ *           changes structure. We may support allocating the structure in the
+ *           future.
+ * @size: The requested changes buffer size
+ *
+ * Return:
+ * (0, U16_MAX] - the number of data elements allocated
+ * -EINVAL - a pointer was invalid
+ * -ENOTSUP - we do not support allocation of the context
+ * -ERANGE - the requested memory size was invalid
+ * -ENOMEM - could not allocate the memory
+ * -EADDRINUSE - the buffer memory was already allocated
+ */
+static int reader_changes_init(struct reader_changes *const changes,
+			       const size_t size)
+{
+	BUILD_BUG_ON((PAGE_SIZE % sizeof(*changes->data)) != 0);
+
+	if (!reader_changes_is_valid_size(size)) {
+		pr_warn(PR_ "invalid size %zu\n", size);
+		return -ERANGE;
+	}
+
+	changes->data = vmalloc(size);
+	if (!changes->data)
+		return -ENOMEM;
+
+	spin_lock_init(&changes->producer);
+	mutex_init(&changes->consumer);
+
+	changes->size = size / sizeof(*changes->data);
+	changes->threshold = min(((size_t)(changes->size)) / 4,
+			     ((size_t)(PAGE_SIZE)) / sizeof(*changes->data));
+
+	return changes->size;
+}
+
+/**
+ * reader_changes_term() - Cleans up a reader changes structure
+ * @changes: The context to clean up
+ *
+ * Releases the allocated state changes memory
+ */
+static void reader_changes_term(struct reader_changes *const changes)
+{
+	struct kbase_kinstr_jm_atom_state_change *data = NULL;
+	unsigned long irq;
+
+	/*
+	 * Although changes->data is used on the consumer side, too, no active
+	 * consumer is possible by the time we clean up the reader changes, so
+	 * no need to take the consumer lock. However, we do need the producer
+	 * lock because the list removal can race with list traversal.
+	 */
+	spin_lock_irqsave(&changes->producer, irq);
+	swap(changes->data, data);
+	spin_unlock_irqrestore(&changes->producer, irq);
+
+	mutex_destroy(&changes->consumer);
+	vfree(data);
+}
+
+/**
+ * reader_changes_count_locked() - Retrieves the count of state changes from the
+ * tail to the physical end of the buffer
+ * @changes: The state changes context
+ *
+ * The consumer mutex must be held. Uses the CIRC_CNT_TO_END macro to
+ * determine the count, so there may be more items. However, that's the maximum
+ * number that can be read in one contiguous read.
+ *
+ * Return: the number of changes in the circular buffer until the end of the
+ * allocation
+ */
+static u32 reader_changes_count_locked(struct reader_changes *const changes)
+{
+	u32 head;
+
+	lockdep_assert_held_once(&changes->consumer);
+
+	head = smp_load_acquire(&changes->head);
+
+	return CIRC_CNT_TO_END(head, changes->tail, changes->size);
+}
+
+/**
+ * reader_changes_count() - Retrieves the count of state changes from the
+ * tail to the physical end of the buffer
+ * @changes: The state changes context
+ *
+ * Return: the number of changes in the circular buffer until the end of the
+ * allocation
+ */
+static u32 reader_changes_count(struct reader_changes *const changes)
+{
+	u32 ret;
+
+	mutex_lock(&changes->consumer);
+	ret = reader_changes_count_locked(changes);
+	mutex_unlock(&changes->consumer);
+	return ret;
+}
+
+/**
+ * reader_changes_push() - Pushes a change into the reader circular buffer.
+ * @changes:    The buffer to insert the change into
+ * @change:     Kernel atom change to insert
+ * @wait_queue: The queue to be kicked when changes should be read from
+ *              userspace. Kicked when a threshold is reached or there is
+ *              overflow.
+ */
+static void reader_changes_push(
+	struct reader_changes *const changes,
+	const struct kbase_kinstr_jm_atom_state_change *const change,
+	wait_queue_head_t *const wait_queue)
+{
+	u32 head, tail, size, space;
+	unsigned long irq;
+	struct kbase_kinstr_jm_atom_state_change *data;
+
+	spin_lock_irqsave(&changes->producer, irq);
+
+	/* We may be called for a reader_changes that's awaiting cleanup. */
+	data = changes->data;
+	if (!data)
+		goto unlock;
+
+	size = changes->size;
+	head = changes->head;
+	tail = smp_load_acquire(&changes->tail);
+
+	space = CIRC_SPACE(head, tail, size);
+	if (space >= 1) {
+		data[head] = *change;
+		if (space == 1) {
+			data[head].flags |=
+				KBASE_KINSTR_JM_ATOM_STATE_FLAG_OVERFLOW;
+			pr_warn(PR_ "overflow of circular buffer\n");
+		}
+		smp_store_release(&changes->head, (head + 1) & (size - 1));
+	}
+
+	/* Wake for either overflow or over-threshold cases. */
+	if (CIRC_CNT(head + 1, tail, size) >= changes->threshold)
+		wake_up_interruptible(wait_queue);
+
+unlock:
+	spin_unlock_irqrestore(&changes->producer, irq);
+}
+
+/**
+ * struct reader - Allows the kernel state changes to be read by user space.
+ * @node: The node in the @c readers locked list
+ * @rcu_head: storage for the RCU callback to free this reader (see kfree_rcu)
+ * @changes: The circular buffer of user changes
+ * @wait_queue: A wait queue for poll
+ * @context: a pointer to the parent context that created this reader. Can be
+ *           used to remove the reader from the list of readers. Reference
+ *           counted.
+ *
+ * The reader is a circular buffer in kernel space. State changes are pushed
+ * into the buffer. The flow from user space is:
+ *
+ *   * Request file descriptor with KBASE_IOCTL_KINSTR_JM_FD. This will
+ *     allocate the kernel side circular buffer with a size specified in the
+ *     ioctl argument.
+ *   * The user will then poll the file descriptor for data
+ *   * Upon receiving POLLIN, perform a read() on the file descriptor to get
+ *     the data out.
+ *   * The buffer memory will be freed when the file descriptor is closed
+ */
+struct reader {
+	struct hlist_bl_node node;
+	struct rcu_head rcu_head;
+	struct reader_changes changes;
+	wait_queue_head_t wait_queue;
+	struct kbase_kinstr_jm *context;
+};
+
+static struct kbase_kinstr_jm *
+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx);
+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx);
+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx,
+					struct reader *const reader);
+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx,
+					struct reader *const reader);
+
+/**
+ * reader_term() - Terminate a instrumentation job manager reader context.
+ * @reader: Pointer to context to be terminated.
+ */
+static void reader_term(struct reader *const reader)
+{
+	if (!reader)
+		return;
+
+	kbase_kinstr_jm_readers_del(reader->context, reader);
+	reader_changes_term(&reader->changes);
+	kbase_kinstr_jm_ref_put(reader->context);
+
+	kfree_rcu(reader, rcu_head);
+}
+
+/**
+ * reader_init() - Initialise a instrumentation job manager reader context.
+ * @out_reader:  Non-NULL pointer to where the pointer to the created context
+ *               will be stored on success.
+ * @ctx:         the pointer to the parent context. Reference count will be
+ *               increased if initialization is successful
+ * @num_changes: The number of changes to allocate a buffer for
+ *
+ * Return: 0 on success, else error code.
+ */
+static int reader_init(struct reader **const out_reader,
+		       struct kbase_kinstr_jm *const ctx,
+		       size_t const num_changes)
+{
+	struct reader *reader = NULL;
+	const size_t change_size = sizeof(struct kbase_kinstr_jm_atom_state_change);
+	int status;
+
+	if (!out_reader || !ctx || !num_changes)
+		return -EINVAL;
+
+	reader = kzalloc(sizeof(*reader), GFP_KERNEL);
+	if (!reader)
+		return -ENOMEM;
+
+	INIT_HLIST_BL_NODE(&reader->node);
+	init_waitqueue_head(&reader->wait_queue);
+
+	reader->context = kbase_kinstr_jm_ref_get(ctx);
+
+	status = reader_changes_init(&reader->changes, num_changes * change_size);
+	if (status < 0)
+		goto fail;
+
+	status = kbase_kinstr_jm_readers_add(ctx, reader);
+	if (status < 0)
+		goto fail;
+
+	*out_reader = reader;
+
+	return 0;
+
+fail:
+	kbase_kinstr_jm_ref_put(reader->context);
+	kfree(reader);
+	return status;
+}
+
+/**
+ * reader_release() - Invoked when the reader file descriptor is released
+ * @node: The inode that the file descriptor that the file corresponds to. In
+ *        our case our reader file descriptor is backed by an anonymous node so
+ *        not much is in this.
+ * @file: the file data. Our reader context is held in the private data
+ * Return: zero on success
+ */
+static int reader_release(struct inode *const node, struct file *const file)
+{
+	struct reader *const reader = file->private_data;
+
+	reader_term(reader);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+/**
+ * reader_changes_copy_to_user() - Copy any changes from a changes structure to
+ * the user-provided buffer.
+ * @changes: The changes structure from which to copy.
+ * @buffer: The user buffer to copy the data to.
+ * @buffer_size: The number of bytes in the buffer.
+ * Return: The number of bytes copied or negative errno on failure.
+ */
+static ssize_t reader_changes_copy_to_user(struct reader_changes *const changes,
+					   char __user *buffer,
+					   size_t buffer_size)
+{
+	ssize_t ret = 0;
+	struct kbase_kinstr_jm_atom_state_change const *src_buf = READ_ONCE(
+		changes->data);
+	size_t const entry_size = sizeof(*src_buf);
+	size_t changes_tail, changes_count, read_size;
+
+	/* Needed for the quick buffer capacity calculation below.
+	 * Note that we can't use is_power_of_2() since old compilers don't
+	 * understand it's a constant expression.
+	 */
+#define is_power_of_two(x) ((x) && !((x) & ((x) - 1)))
+	static_assert(is_power_of_two(
+			sizeof(struct kbase_kinstr_jm_atom_state_change)));
+#undef is_power_of_two
+
+	lockdep_assert_held_once(&changes->consumer);
+
+	/* Read continuously until either:
+	 * - we've filled the output buffer, or
+	 * - there are no changes when we check.
+	 *
+	 * If more changes arrive while we're copying to the user, we can copy
+	 * those as well, space permitting.
+	 */
+	do {
+		changes_tail = changes->tail;
+		changes_count = reader_changes_count_locked(changes);
+		read_size = min(changes_count * entry_size,
+				buffer_size & ~(entry_size - 1));
+
+		if (!read_size)
+			break;
+
+		if (copy_to_user(buffer, &(src_buf[changes_tail]), read_size))
+			return -EFAULT;
+
+		buffer += read_size;
+		buffer_size -= read_size;
+		ret += read_size;
+		changes_tail = (changes_tail + read_size / entry_size) &
+			(changes->size - 1);
+		smp_store_release(&changes->tail, changes_tail);
+	} while (read_size);
+
+	return ret;
+}
+
+/**
+ * reader_read() - Handles a read call on the reader file descriptor
+ *
+ * @filp: The file that the read was performed on
+ * @buffer: The destination buffer
+ * @buffer_size: The maximum number of bytes to read
+ * @offset: The offset into the 'file' to read from.
+ *
+ * Note the destination buffer needs to be fully mapped in userspace or the read
+ * will fault.
+ *
+ * Return:
+ * * The number of bytes read or:
+ * * -EBADF - the file descriptor did not have an attached reader
+ * * -EFAULT - memory access fault
+ * * -EAGAIN - if the file is set to nonblocking reads with O_NONBLOCK and there
+ *             is no data available
+ *
+ * Note: The number of bytes read will always be a multiple of the size of an
+ * entry.
+ */
+static ssize_t reader_read(struct file *const filp,
+			   char __user *const buffer,
+			   size_t const buffer_size,
+			   loff_t *const offset)
+{
+	struct reader *const reader = filp->private_data;
+	struct reader_changes *changes;
+	ssize_t ret;
+
+	if (!reader)
+		return -EBADF;
+
+	if (buffer_size < sizeof(struct kbase_kinstr_jm_atom_state_change))
+		return -ENOBUFS;
+
+#if KERNEL_VERSION(5, 0, 0) <= LINUX_VERSION_CODE
+	if (!access_ok(buffer, buffer_size))
+		return -EIO;
+#else
+	if (!access_ok(VERIFY_WRITE, buffer, buffer_size))
+		return -EIO;
+#endif
+
+	changes = &reader->changes;
+
+	mutex_lock(&changes->consumer);
+	if (!reader_changes_count_locked(changes)) {
+		if (filp->f_flags & O_NONBLOCK) {
+			ret = -EAGAIN;
+			goto exit;
+		}
+
+		if (wait_event_interruptible(
+				reader->wait_queue,
+				!!reader_changes_count_locked(changes))) {
+			ret = -EINTR;
+			goto exit;
+		}
+	}
+
+	ret = reader_changes_copy_to_user(changes, buffer, buffer_size);
+
+exit:
+	mutex_unlock(&changes->consumer);
+	return ret;
+}
+
+/**
+ * reader_poll() - Handles a poll call on the reader file descriptor
+ * @file: The file that the poll was performed on
+ * @wait: The poll table
+ *
+ * The results of the poll will be unreliable if there is no mapped memory as
+ * there is no circular buffer to push atom state changes into.
+ *
+ * Return:
+ * * 0 - no data ready
+ * * POLLIN - state changes have been buffered
+ * * -EBADF - the file descriptor did not have an attached reader
+ * * -EINVAL - the IO control arguments were invalid
+ */
+static __poll_t reader_poll(struct file *const file,
+			    struct poll_table_struct *const wait)
+{
+	struct reader *reader;
+	struct reader_changes *changes;
+
+	if (unlikely(!file || !wait))
+		return -EINVAL;
+
+	reader = file->private_data;
+	if (unlikely(!reader))
+		return -EBADF;
+
+	changes = &reader->changes;
+
+	if (reader_changes_count(changes) >= changes->threshold)
+		return POLLIN;
+
+	poll_wait(file, &reader->wait_queue, wait);
+
+	return (reader_changes_count(changes) > 0) ? POLLIN : 0;
+}
+
+/* The file operations virtual function table */
+static const struct file_operations file_operations = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.read = reader_read,
+	.poll = reader_poll,
+	.release = reader_release
+};
+
+/* The maximum amount of readers that can be created on a context. */
+static const size_t kbase_kinstr_jm_readers_max = 16;
+
+/**
+ * kbasep_kinstr_jm_release() - Invoked when the reference count is dropped
+ * @ref: the context reference count
+ */
+static void kbase_kinstr_jm_release(struct kref *const ref)
+{
+	struct kbase_kinstr_jm *const ctx =
+		container_of(ref, struct kbase_kinstr_jm, refcount);
+
+	kfree(ctx);
+}
+
+/**
+ * kbase_kinstr_jm_ref_get() - Reference counts the instrumentation context
+ * @ctx: the context to reference count
+ * Return: the reference counted context
+ */
+static struct kbase_kinstr_jm *
+kbase_kinstr_jm_ref_get(struct kbase_kinstr_jm *const ctx)
+{
+	if (likely(ctx))
+		kref_get(&ctx->refcount);
+	return ctx;
+}
+
+/**
+ * kbase_kinstr_jm_ref_put() - Dereferences the instrumentation context
+ * @ctx: the context to lower the reference count on
+ */
+static void kbase_kinstr_jm_ref_put(struct kbase_kinstr_jm *const ctx)
+{
+	if (likely(ctx))
+		kref_put(&ctx->refcount, kbase_kinstr_jm_release);
+}
+
+/**
+ * kbase_kinstr_jm_readers_add() - Adds a reader to the list of readers
+ * @ctx: the instrumentation context
+ * @reader: the reader to add
+ *
+ * Return:
+ * 0 - success
+ * -ENOMEM - too many readers already added.
+ */
+static int kbase_kinstr_jm_readers_add(struct kbase_kinstr_jm *const ctx,
+					struct reader *const reader)
+{
+	struct hlist_bl_head *const readers = &ctx->readers;
+	struct hlist_bl_node *node;
+	struct reader *temp;
+	size_t count = 0;
+
+	hlist_bl_lock(readers);
+
+	hlist_bl_for_each_entry_rcu(temp, node, readers, node)
+		++count;
+
+	if (kbase_kinstr_jm_readers_max < count) {
+		hlist_bl_unlock(readers);
+		return -ENOMEM;
+	}
+
+	hlist_bl_add_head_rcu(&reader->node, readers);
+
+	hlist_bl_unlock(readers);
+
+	static_branch_inc(&basep_kinstr_jm_reader_static_key);
+
+	return 0;
+}
+
+/**
+ * readers_del() - Deletes a reader from the list of readers
+ * @ctx: the instrumentation context
+ * @reader: the reader to delete
+ */
+static void kbase_kinstr_jm_readers_del(struct kbase_kinstr_jm *const ctx,
+					struct reader *const reader)
+{
+	struct hlist_bl_head *const readers = &ctx->readers;
+
+	hlist_bl_lock(readers);
+	hlist_bl_del_rcu(&reader->node);
+	hlist_bl_unlock(readers);
+
+	static_branch_dec(&basep_kinstr_jm_reader_static_key);
+}
+
+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx,
+			   union kbase_kinstr_jm_fd *jm_fd_arg)
+{
+	struct kbase_kinstr_jm_fd_in const *in;
+	struct reader *reader;
+	size_t const change_size = sizeof(struct
+					  kbase_kinstr_jm_atom_state_change);
+	int status;
+	int fd;
+	int i;
+
+	if (!ctx || !jm_fd_arg)
+		return -EINVAL;
+
+	in = &jm_fd_arg->in;
+
+	if (!is_power_of_2(in->count))
+		return -EINVAL;
+
+	for (i = 0; i < sizeof(in->padding); ++i)
+		if (in->padding[i])
+			return -EINVAL;
+
+	status = reader_init(&reader, ctx, in->count);
+	if (status < 0)
+		return status;
+
+	jm_fd_arg->out.version = KBASE_KINSTR_JM_VERSION;
+	jm_fd_arg->out.size = change_size;
+	memset(&jm_fd_arg->out.padding, 0, sizeof(jm_fd_arg->out.padding));
+
+	fd = anon_inode_getfd("[mali_kinstr_jm]", &file_operations, reader,
+			      O_CLOEXEC);
+	if (fd < 0)
+		reader_term(reader);
+
+	return fd;
+}
+
+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **const out_ctx)
+{
+	struct kbase_kinstr_jm *ctx = NULL;
+
+	if (!out_ctx)
+		return -EINVAL;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	INIT_HLIST_BL_HEAD(&ctx->readers);
+	kref_init(&ctx->refcount);
+
+	*out_ctx = ctx;
+
+	return 0;
+}
+
+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *const ctx)
+{
+	kbase_kinstr_jm_ref_put(ctx);
+}
+
+void kbasep_kinstr_jm_atom_state(
+	struct kbase_jd_atom *const katom,
+	const enum kbase_kinstr_jm_reader_atom_state state)
+{
+	struct kbase_context *const kctx = katom->kctx;
+	struct kbase_kinstr_jm *const ctx = kctx->kinstr_jm;
+	const u8 id = kbase_jd_atom_id(kctx, katom);
+	struct kbase_kinstr_jm_atom_state_change change = {
+		.timestamp = ktime_get_raw_ns(), .atom = id, .state = state
+	};
+	struct reader *reader;
+	struct hlist_bl_node *node;
+
+	WARN(KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT < state || 0 > state,
+	     PR_ "unsupported katom (%u) state (%i)", id, state);
+
+	switch (state) {
+	case KBASE_KINSTR_JM_READER_ATOM_STATE_START:
+		change.data.start.slot = katom->jobslot;
+		break;
+	default:
+		break;
+	}
+
+	rcu_read_lock();
+	hlist_bl_for_each_entry_rcu(reader, node, &ctx->readers, node)
+		reader_changes_push(
+			&reader->changes, &change, &reader->wait_queue);
+	rcu_read_unlock();
+}
+
+KBASE_EXPORT_TEST_API(kbasep_kinstr_jm_atom_state);
+
+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const katom)
+{
+	struct kbase_context *const kctx = katom->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	const int slot = katom->slot_nr;
+	struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0);
+
+	BUILD_BUG_ON(SLOT_RB_SIZE != 2);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS))
+		return;
+	if (WARN_ON(!submitted))
+		return;
+
+	if (submitted == katom)
+		kbase_kinstr_jm_atom_state_start(katom);
+}
+
+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const katom)
+{
+	struct kbase_context *const kctx = katom->kctx;
+	struct kbase_device *const kbdev = kctx->kbdev;
+	const int slot = katom->slot_nr;
+	struct kbase_jd_atom *const submitted = kbase_gpu_inspect(kbdev, slot, 0);
+	struct kbase_jd_atom *const queued = kbase_gpu_inspect(kbdev, slot, 1);
+
+	BUILD_BUG_ON(SLOT_RB_SIZE != 2);
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	if (WARN_ON(slot < 0 || slot >= GPU_MAX_JOB_SLOTS))
+		return;
+	if (WARN_ON(!submitted))
+		return;
+	if (WARN_ON((submitted != katom) && (queued != katom)))
+		return;
+
+	if (queued == katom)
+		return;
+
+	if (katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+		kbase_kinstr_jm_atom_state_stop(katom);
+	if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+		kbase_kinstr_jm_atom_state_start(queued);
+}
diff --git a/mali_kbase/mali_kbase_kinstr_jm.h b/mali_kbase/mali_kbase_kinstr_jm.h
new file mode 100644
index 0000000..555edfe
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm.h
@@ -0,0 +1,283 @@
+/*
+ *
+ * (C) COPYRIGHT 2019,2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm.h
+ * Kernel driver public interface to job manager atom tracing. This API provides
+ * a method to get the atom state changes into user space.
+ *
+ * The flow of operation is:
+ *
+ * | kernel                              | user                                |
+ * | ----------------------------------- | ----------------------------------- |
+ * | Initialize API with                 |                                     |
+ * | kbase_kinstr_jm_init()              |                                     |
+ * |                                     |                                     |
+ * | Kernel code injects states with     |                                     |
+ * | kbase_kinstr_jm_atom_state_*() APIs |                                     |
+ * |                                     | Call ioctl() to get file descriptor |
+ * |                                     | via KBASE_IOCTL_KINSTR_JM_FD        |
+ * | Allocates a reader attached to FD   |                                     |
+ * | Allocates circular buffer and       |                                     |
+ * | patches, via ASM goto, the          |                                     |
+ * | kbase_kinstr_jm_atom_state_*()      |                                     |
+ * |                                     | loop:                               |
+ * |                                     |   Call poll() on FD for POLLIN      |
+ * |   When threshold of changes is hit, |                                     |
+ * |   the poll is interrupted with      |                                     |
+ * |   POLLIN. If circular buffer is     |                                     |
+ * |   full then store the missed count  |                                     |
+ * |   and interrupt poll                |   Call read() to get data from      |
+ * |                                     |   circular buffer via the fd        |
+ * |   Kernel advances tail of circular  |                                     |
+ * |   buffer                            |                                     |
+ * |                                     | Close file descriptor               |
+ * | Deallocates circular buffer         |                                     |
+ * |                                     |                                     |
+ * | Terminate API with                  |                                     |
+ * | kbase_kinstr_jm_term()              |                                     |
+ *
+ * All tracepoints are guarded on a static key. The static key is activated when
+ * a user space reader gets created. This means that there is negligible cost
+ * inserting the tracepoints into code when there are no readers.
+ */
+
+#ifndef _KBASE_KINSTR_JM_H_
+#define _KBASE_KINSTR_JM_H_
+
+#include "mali_kbase_kinstr_jm_reader.h"
+
+#ifdef __KERNEL__
+#include <linux/version.h>
+#include <linux/static_key.h>
+#else
+/* empty wrapper macros for userspace */
+#define static_branch_unlikely(key) (1)
+#define KERNEL_VERSION(a, b, c) (0)
+#define LINUX_VERSION_CODE (1)
+#endif /* __KERNEL__ */
+
+/* Forward declarations */
+struct kbase_context;
+struct kbase_kinstr_jm;
+struct kbase_jd_atom;
+union kbase_kinstr_jm_fd;
+
+/**
+ * kbase_kinstr_jm_init() - Initialise an instrumentation job manager context.
+ * @ctx: Non-NULL pointer to where the pointer to the created context will
+ *       be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_kinstr_jm_init(struct kbase_kinstr_jm **ctx);
+
+/**
+ * kbase_kinstr_jm_term() - Terminate an instrumentation job manager context.
+ * @ctx: Pointer to context to be terminated.
+ */
+void kbase_kinstr_jm_term(struct kbase_kinstr_jm *ctx);
+
+/**
+ * kbase_kinstr_jm_get_fd() - Retrieves a file descriptor that can be used to
+ * read the atom state changes from userspace
+ *
+ * @ctx: Pointer to the initialized context
+ * @jm_fd_arg: Pointer to the union containing the in/out params
+ * Return: -1 on failure, valid file descriptor on success
+ */
+int kbase_kinstr_jm_get_fd(struct kbase_kinstr_jm *const ctx,
+			   union kbase_kinstr_jm_fd *jm_fd_arg);
+
+/**
+ * kbasep_kinstr_jm_atom_state() - Signifies that an atom has changed state
+ * @atom: The atom that has changed state
+ * @state: The new state of the atom
+ *
+ * This performs the actual storage of the state ready for user space to
+ * read the data. It is only called when the static key is enabled from
+ * kbase_kinstr_jm_atom_state(). There is almost never a need to invoke this
+ * function directly.
+ */
+void kbasep_kinstr_jm_atom_state(
+	struct kbase_jd_atom *const atom,
+	const enum kbase_kinstr_jm_reader_atom_state state);
+
+/* Allows ASM goto patching to reduce tracing overhead. This is
+ * incremented/decremented when readers are created and terminated. This really
+ * shouldn't be changed externally, but if you do, make sure you use
+ * a static_key_inc()/static_key_dec() pair.
+ */
+#if KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE
+extern struct static_key_false basep_kinstr_jm_reader_static_key;
+#else
+/* Pre-4.3 kernels have a different API for static keys, but work
+ * mostly the same with less type safety. */
+extern struct static_key basep_kinstr_jm_reader_static_key;
+#define static_branch_unlikely(key) static_key_false(key)
+#endif /* KERNEL_VERSION(4, 3, 0) <= LINUX_VERSION_CODE */
+
+/**
+ * kbase_kinstr_jm_atom_state() - Signifies that an atom has changed state
+ * @atom: The atom that has changed state
+ * @state: The new state of the atom
+ *
+ * This uses a static key to reduce overhead when tracing is disabled
+ */
+static inline void kbase_kinstr_jm_atom_state(
+	struct kbase_jd_atom *const atom,
+	const enum kbase_kinstr_jm_reader_atom_state state)
+{
+	if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+		kbasep_kinstr_jm_atom_state(atom, state);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_queue() - Signifies that an atom has entered a
+ *                                      hardware or software queue.
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_queue(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state(
+		atom, KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_start() - Signifies that work has started on an
+ *                                      atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_start(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state(
+		atom, KBASE_KINSTR_JM_READER_ATOM_STATE_START);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_stop() - Signifies that work has stopped on an
+ *                                     atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_stop(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state(
+		atom, KBASE_KINSTR_JM_READER_ATOM_STATE_STOP);
+}
+
+/**
+ * kbase_kinstr_jm_atom_state_complete() - Signifies that all work has completed
+ *                                         on an atom
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_state_complete(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state(
+		atom, KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE);
+}
+
+/**
+ * kbase_kinstr_jm_atom_queue() - A software *or* hardware atom is queued for
+ *                                execution
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_queue(struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state_queue(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_complete() - A software *or* hardware atom is fully
+ *                                   completed
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_complete(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state_complete(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_sw_start() - A software atom has started work
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_sw_start(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state_start(atom);
+}
+
+/**
+ * kbase_kinstr_jm_atom_sw_stop() - A software atom has stopped work
+ * @atom: The atom that has changed state
+ */
+static inline void kbase_kinstr_jm_atom_sw_stop(
+	struct kbase_jd_atom *const atom)
+{
+	kbase_kinstr_jm_atom_state_stop(atom);
+}
+
+/**
+ * kbasep_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted
+ * @atom: The atom that has been submitted
+ *
+ * This private implementation should not be called directly, it is protected
+ * by a static key in kbase_kinstr_jm_atom_hw_submit(). Use that instead.
+ */
+void kbasep_kinstr_jm_atom_hw_submit(struct kbase_jd_atom *const atom);
+
+/**
+ * kbase_kinstr_jm_atom_hw_submit() - A hardware atom has been submitted
+ * @atom: The atom that has been submitted
+ */
+static inline void kbase_kinstr_jm_atom_hw_submit(
+	struct kbase_jd_atom *const atom)
+{
+	if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+		kbasep_kinstr_jm_atom_hw_submit(atom);
+}
+
+/**
+ * kbasep_kinstr_jm_atom_hw_release() - A hardware atom has been released
+ * @atom: The atom that has been released
+ *
+ * This private implementation should not be called directly, it is protected
+ * by a static key in kbase_kinstr_jm_atom_hw_release(). Use that instead.
+ */
+void kbasep_kinstr_jm_atom_hw_release(struct kbase_jd_atom *const atom);
+
+/**
+ * kbase_kinstr_jm_atom_hw_release() - A hardware atom has been released
+ * @atom: The atom that has been released
+ */
+static inline void kbase_kinstr_jm_atom_hw_release(
+	struct kbase_jd_atom *const atom)
+{
+	if (static_branch_unlikely(&basep_kinstr_jm_reader_static_key))
+		kbasep_kinstr_jm_atom_hw_release(atom);
+}
+
+#endif /* _KBASE_KINSTR_JM_H_ */
diff --git a/mali_kbase/mali_kbase_kinstr_jm_reader.h b/mali_kbase/mali_kbase_kinstr_jm_reader.h
new file mode 100644
index 0000000..e267e6b
--- /dev/null
+++ b/mali_kbase/mali_kbase_kinstr_jm_reader.h
@@ -0,0 +1,70 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * mali_kbase_kinstr_jm_reader.h
+ * Provides an ioctl API to read kernel atom state changes. The flow of the
+ * API is:
+ *    1. Obtain the file descriptor with ``KBASE_IOCTL_KINSTR_JM_FD``
+ *    2. Determine the buffer structure layout via the above ioctl's returned
+ *       size and version fields in ``struct kbase_kinstr_jm_fd_out``
+ *    4. Poll the file descriptor for ``POLLIN``
+ *    5. Get data with read() on the fd
+ *    6. Use the structure version to understand how to read the data from the
+ *       buffer
+ *    7. Repeat 4-6
+ *    8. Close the file descriptor
+ */
+
+#ifndef _KBASE_KINSTR_JM_READER_H_
+#define _KBASE_KINSTR_JM_READER_H_
+
+/**
+ * enum kbase_kinstr_jm_reader_atom_state - Determines the work state of an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE:    Signifies that an atom has
+ *                                              entered a hardware queue
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_START:    Signifies that work has started
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_STOP:     Signifies that work has stopped
+ *                                              on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE: Signifies that work has
+ *                                              completed on an atom
+ * @KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT:    The number of state enumerations
+ *
+ * We can add new states to the end of this if they do not break the existing
+ * state machine. Old user mode code can gracefully ignore states they do not
+ * understand.
+ *
+ * If we need to make a breaking change to the state machine, we can do that by
+ * changing the version reported by KBASE_IOCTL_KINSTR_JM_FD. This will
+ * mean that old user mode code will fail to understand the new state field in
+ * the structure and gracefully not use the state change API.
+ */
+enum kbase_kinstr_jm_reader_atom_state {
+	KBASE_KINSTR_JM_READER_ATOM_STATE_QUEUE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_START,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_STOP,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COMPLETE,
+	KBASE_KINSTR_JM_READER_ATOM_STATE_COUNT
+};
+
+#endif /* _KBASE_KINSTR_JM_READER_H_ */
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 4a1004b..8cf7e5d 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -43,6 +43,7 @@
 #include <mali_kbase_mem_pool_group.h>
 #include <mmu/mali_kbase_mmu.h>
 #include <mali_kbase_config_defaults.h>
+#include <mali_kbase_trace_gpu_mem.h>
 
 /*
  * Alignment of objects allocated by the GPU inside a just-in-time memory
@@ -847,13 +848,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 	if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
 		return -EINVAL;
 
-#if MALI_JIT_PRESSURE_LIMIT
 	if (phys_pages_limit > jit_va_pages)
-#else
-	if (phys_pages_limit != jit_va_pages)
-#endif /* MALI_JIT_PRESSURE_LIMIT */
 		return -EINVAL;
 
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (phys_pages_limit != jit_va_pages)
+		kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
 	kbase_gpu_vm_lock(kctx);
 
 #ifdef CONFIG_64BIT
@@ -870,11 +872,11 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
 		kctx->trim_level = trim_level;
 		kctx->jit_va = true;
 		kctx->jit_group_id = group_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 		kctx->jit_phys_pages_limit = phys_pages_limit;
 		dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n",
 				phys_pages_limit);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 	}
 
 	kbase_gpu_vm_unlock(kctx);
@@ -976,6 +978,12 @@ int kbase_mem_init(struct kbase_device *kbdev)
 	/* Initialize memory usage */
 	atomic_set(&memdev->used_pages, 0);
 
+	spin_lock_init(&kbdev->gpu_mem_usage_lock);
+	kbdev->total_gpu_pages = 0;
+	kbdev->process_root = RB_ROOT;
+	kbdev->dma_buf_root = RB_ROOT;
+	mutex_init(&kbdev->dma_buf_lock);
+
 #ifdef IR_THRESHOLD
 	atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
 #else
@@ -1053,6 +1061,11 @@ void kbase_mem_term(struct kbase_device *kbdev)
 
 	kbase_mem_pool_group_term(&kbdev->mem_pools);
 
+	WARN_ON(kbdev->total_gpu_pages);
+	WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
+	WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
+	mutex_destroy(&kbdev->dma_buf_lock);
+
 	if (kbdev->mgm_dev)
 		module_put(kbdev->mgm_dev->owner);
 }
@@ -2033,6 +2046,9 @@ no_new_partial:
 			(u64)new_page_count);
 
 	alloc->nents += nr_pages_requested;
+
+	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
+
 done:
 	return 0;
 
@@ -2209,6 +2225,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 			(u64)new_page_count);
 
 	alloc->nents += nr_pages_requested;
+
+	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
+
 done:
 	return new_pages;
 
@@ -2374,6 +2393,8 @@ int kbase_free_phy_pages_helper(
 			kbdev,
 			kctx->id,
 			(u64)new_page_count);
+
+		kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
 	}
 
 	return 0;
@@ -2496,6 +2517,8 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
 				kbdev,
 				kctx->id,
 				(u64)new_page_count);
+
+		kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
 	}
 }
 
@@ -2558,6 +2581,8 @@ void kbase_mem_kref_free(struct kref *kref)
 					alloc->imported.umm.dma_attachment,
 					alloc->imported.umm.sgt,
 					DMA_BIDIRECTIONAL);
+			kbase_remove_dma_buf_usage(alloc->imported.umm.kctx,
+						   alloc);
 		}
 		dma_buf_detach(alloc->imported.umm.dma_buf,
 			       alloc->imported.umm.dma_attachment);
@@ -2643,18 +2668,28 @@ bool kbase_check_alloc_flags(unsigned long flags)
 	/* GPU executable memory cannot:
 	 * - Be written by the GPU
 	 * - Be grown on GPU page fault
-	 * - Have the top of its initial commit aligned to 'extent' */
+	 */
 	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
-			(BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
-			BASE_MEM_TILER_ALIGN_TOP)))
+			(BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
+		return false;
+
+	/* GPU executable memory also cannot have the top of its initial
+	 * commit aligned to 'extent'
+	 */
+	if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
+			BASE_MEM_TILER_ALIGN_TOP))
 		return false;
 
 	/* To have an allocation lie within a 4GB chunk is required only for
-	 * TLS memory, which will never be used to contain executable code
-	 * and also used for Tiler heap.
+	 * TLS memory, which will never be used to contain executable code.
 	 */
 	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
-			(BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP)))
+			BASE_MEM_PROT_GPU_EX))
+		return false;
+
+	/* TLS memory should also not be used for tiler heap */
+	if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
+			BASE_MEM_TILER_ALIGN_TOP))
 		return false;
 
 	/* GPU should have at least read or write access otherwise there is no
@@ -2751,9 +2786,13 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
 		return -EINVAL;
 	}
 
-	if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
-			test_reg.extent == 0) {
-		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
+	if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extent == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF but extent == 0\n");
+		return -EINVAL;
+	}
+
+	if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extent == 0)) {
+		dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP but extent == 0\n");
 		return -EINVAL;
 	}
 
@@ -2983,7 +3022,7 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
 KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
 		kbase_jit_debugfs_phys_get);
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data)
 {
 	struct kbase_context *kctx = data->kctx;
@@ -3038,7 +3077,7 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data)
 
 KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops,
 		kbase_jit_debugfs_trim_get);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 void kbase_jit_debugfs_init(struct kbase_context *kctx)
 {
@@ -3078,7 +3117,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
 	 */
 	debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry,
 			kctx, &kbase_jit_debugfs_phys_fops);
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	/*
 	 * Debugfs entry for getting the number of pages used
 	 * by JIT allocations for estimating the physical pressure
@@ -3093,7 +3132,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
 	 */
 	debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry,
 			kctx, &kbase_jit_debugfs_trim_fops);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 }
 #endif /* CONFIG_DEBUG_FS */
 
@@ -3153,14 +3192,16 @@ int kbase_jit_init(struct kbase_context *kctx)
  * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
  * the alignment requirements.
  */
-static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx,
-	struct kbase_va_region *walker, const struct base_jit_alloc_info *info)
+static bool meet_size_and_tiler_align_top_requirements(
+	const struct kbase_va_region *walker,
+	const struct base_jit_alloc_info *info)
 {
 	bool meet_reqs = true;
 
 	if (walker->nr_pages != info->va_pages)
 		meet_reqs = false;
-	else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) {
+
+	if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) {
 		size_t align = info->extent;
 		size_t align_mask = align - 1;
 
@@ -3171,7 +3212,7 @@ static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kct
 	return meet_reqs;
 }
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /* Function will guarantee *@freed will not exceed @pages_needed
  */
 static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
@@ -3308,8 +3349,10 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
 	struct kbase_va_region *reg, *tmp;
 	size_t total_freed = 0;
 
-	kbase_gpu_vm_lock(kctx);
-	mutex_lock(&kctx->jit_evict_lock);
+	lockdep_assert_held(&kctx->jctx.lock);
+	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
 	list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) {
 		int err;
 		size_t freed = 0u;
@@ -3328,18 +3371,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
 		if (!pages_needed)
 			break;
 	}
-	mutex_unlock(&kctx->jit_evict_lock);
-	kbase_gpu_vm_unlock(kctx);
 
 	trace_mali_jit_trim(total_freed);
 
 	return total_freed;
 }
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 static int kbase_jit_grow(struct kbase_context *kctx,
-		const struct base_jit_alloc_info *info,
-		struct kbase_va_region *reg)
+			  const struct base_jit_alloc_info *info,
+			  struct kbase_va_region *reg,
+			  struct kbase_sub_alloc **prealloc_sas)
 {
 	size_t delta;
 	size_t pages_required;
@@ -3347,15 +3389,13 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 	struct kbase_mem_pool *pool;
 	int ret = -ENOMEM;
 	struct tagged_addr *gpu_pages;
-	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
-	int i;
 
 	if (info->commit_pages > reg->nr_pages) {
 		/* Attempted to grow larger than maximum size */
 		return -EINVAL;
 	}
 
-	kbase_gpu_vm_lock(kctx);
+	lockdep_assert_held(&kctx->reg_lock);
 
 	/* Make the physical backing no longer reclaimable */
 	if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
@@ -3372,14 +3412,6 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 	pages_required = delta;
 
 #ifdef CONFIG_MALI_2MB_ALLOC
-	/* Preallocate memory for the sub-allocation structs */
-	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
-		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]),
-				GFP_KERNEL);
-		if (!prealloc_sas[i])
-			goto update_failed;
-	}
-
 	if (pages_required >= (SZ_2M / SZ_4K)) {
 		pool = &kctx->mem_pools.large[kctx->jit_group_id];
 		/* Round up to number of 2 MB pages required */
@@ -3405,15 +3437,18 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 	 */
 	while (kbase_mem_pool_size(pool) < pages_required) {
 		int pool_delta = pages_required - kbase_mem_pool_size(pool);
+		int ret;
 
 		kbase_mem_pool_unlock(pool);
 		spin_unlock(&kctx->mem_partials_lock);
+
 		kbase_gpu_vm_unlock(kctx);
+		ret = kbase_mem_pool_grow(pool, pool_delta);
+		kbase_gpu_vm_lock(kctx);
 
-		if (kbase_mem_pool_grow(pool, pool_delta))
-			goto update_failed_unlocked;
+		if (ret)
+			goto update_failed;
 
-		kbase_gpu_vm_lock(kctx);
 		spin_lock(&kctx->mem_partials_lock);
 		kbase_mem_pool_lock(pool);
 	}
@@ -3459,11 +3494,6 @@ done:
 	reg->extent = info->extent;
 
 update_failed:
-	kbase_gpu_vm_unlock(kctx);
-update_failed_unlocked:
-	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
-		kfree(prealloc_sas[i]);
-
 	return ret;
 }
 
@@ -3492,9 +3522,9 @@ static void trace_jit_stats(struct kbase_context *kctx,
 		max_allocations, alloc_count, va_pages, ph_pages);
 }
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
- * get_jit_backed_pressure() - calculate the physical backing of all JIT
+ * get_jit_phys_backing() - calculate the physical backing of all JIT
  * allocations
  *
  * @kctx: Pointer to the kbase context whose active JIT allocations will be
@@ -3502,83 +3532,48 @@ static void trace_jit_stats(struct kbase_context *kctx,
  *
  * Return: number of pages that are committed by JIT allocations
  */
-static size_t get_jit_backed_pressure(struct kbase_context *kctx)
+static size_t get_jit_phys_backing(struct kbase_context *kctx)
 {
-	size_t backed_pressure = 0;
-	int jit_id;
-
-	lockdep_assert_held(&kctx->jctx.lock);
+	struct kbase_va_region *walker;
+	size_t backing = 0;
 
-	kbase_gpu_vm_lock(kctx);
-	for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) {
-		struct kbase_va_region *reg = kctx->jit_alloc[jit_id];
+	lockdep_assert_held(&kctx->jit_evict_lock);
 
-		if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) {
-			/* If region has no report, be pessimistic */
-			if (reg->used_pages == reg->nr_pages) {
-				backed_pressure += reg->nr_pages;
-			} else {
-				backed_pressure +=
-					kbase_reg_current_backed_size(reg);
-			}
-		}
+	list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
+		backing += kbase_reg_current_backed_size(walker);
 	}
-	kbase_gpu_vm_unlock(kctx);
 
-	return backed_pressure;
+	return backing;
 }
 
-/**
- * jit_trim_necessary_pages() - calculate and trim the least pages possible to
- * satisfy a new JIT allocation
- *
- * @kctx: Pointer to the kbase context
- * @info: Pointer to JIT allocation information for the new allocation
- *
- * Before allocating a new just-in-time memory region or reusing a previous
- * one, ensure that the total JIT physical page usage also will not exceed the
- * pressure limit.
- *
- * If there are no reported-on allocations, then we already guarantee this will
- * be the case - because our current pressure then only comes from the va_pages
- * of each JIT region, hence JIT physical page usage is guaranteed to be
- * bounded by this.
- *
- * However as soon as JIT allocations become "reported on", the pressure is
- * lowered to allow new JIT regions to be allocated. It is after such a point
- * that the total JIT physical page usage could (either now or in the future on
- * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly
- * allocated JIT regions. Hence, trim any "reported on" regions.
- *
- * Any pages freed will go into the pool and be allocated from there in
- * kbase_mem_alloc().
- */
-static void jit_trim_necessary_pages(struct kbase_context *kctx,
-		const struct base_jit_alloc_info *info)
+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx,
+				    size_t needed_pages)
 {
-	size_t backed_pressure = 0;
-	size_t needed_pages = 0;
+	size_t jit_backing = 0;
+	size_t pages_to_trim = 0;
 
-	backed_pressure = get_jit_backed_pressure(kctx);
+	lockdep_assert_held(&kctx->jctx.lock);
+	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	jit_backing = get_jit_phys_backing(kctx);
 
 	/* It is possible that this is the case - if this is the first
 	 * allocation after "ignore_pressure_limit" allocation.
 	 */
-	if (backed_pressure > kctx->jit_phys_pages_limit) {
-		needed_pages +=
-			(backed_pressure - kctx->jit_phys_pages_limit)
-			+ info->va_pages;
+	if (jit_backing > kctx->jit_phys_pages_limit) {
+		pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) +
+				 needed_pages;
 	} else {
-		size_t backed_diff =
-			kctx->jit_phys_pages_limit - backed_pressure;
+		size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing;
 
-		if (info->va_pages > backed_diff)
-			needed_pages += info->va_pages - backed_diff;
+		if (needed_pages > backed_diff)
+			pages_to_trim += needed_pages - backed_diff;
 	}
 
-	if (needed_pages) {
-		size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx,
-			needed_pages);
+	if (pages_to_trim) {
+		size_t trimmed_pages =
+			kbase_mem_jit_trim_pages(kctx, pages_to_trim);
 
 		/* This should never happen - we already asserted that
 		 * we are not violating JIT pressure limit in earlier
@@ -3586,10 +3581,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx,
 		 * must have enough unused pages to satisfy the new
 		 * allocation
 		 */
-		WARN_ON(trimmed_pages < needed_pages);
+		WARN_ON(trimmed_pages < pages_to_trim);
 	}
 }
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 /**
  * jit_allow_allocate() - check whether basic conditions are satisfied to allow
@@ -3608,8 +3603,8 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
 {
 	lockdep_assert_held(&kctx->jctx.lock);
 
-#if MALI_JIT_PRESSURE_LIMIT
-	if (likely(!ignore_pressure_limit) &&
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (!ignore_pressure_limit &&
 			((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) ||
 			(info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) {
 		dev_dbg(kctx->kbdev->dev,
@@ -3618,7 +3613,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
 			kctx->jit_phys_pages_limit);
 		return false;
 	}
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
 		/* Too many current allocations */
@@ -3644,123 +3639,152 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
 	return true;
 }
 
+static struct kbase_va_region *
+find_reasonable_region(const struct base_jit_alloc_info *info,
+		       struct list_head *pool_head, bool ignore_usage_id)
+{
+	struct kbase_va_region *closest_reg = NULL;
+	struct kbase_va_region *walker;
+	size_t current_diff = SIZE_MAX;
+
+	list_for_each_entry(walker, pool_head, jit_node) {
+		if ((ignore_usage_id ||
+		     walker->jit_usage_id == info->usage_id) &&
+		    walker->jit_bin_id == info->bin_id &&
+		    meet_size_and_tiler_align_top_requirements(walker, info)) {
+			size_t min_size, max_size, diff;
+
+			/*
+			 * The JIT allocations VA requirements have been met,
+			 * it's suitable but other allocations might be a
+			 * better fit.
+			 */
+			min_size = min_t(size_t, walker->gpu_alloc->nents,
+					 info->commit_pages);
+			max_size = max_t(size_t, walker->gpu_alloc->nents,
+					 info->commit_pages);
+			diff = max_size - min_size;
+
+			if (current_diff > diff) {
+				current_diff = diff;
+				closest_reg = walker;
+			}
+
+			/* The allocation is an exact match */
+			if (current_diff == 0)
+				break;
+		}
+	}
+
+	return closest_reg;
+}
+
 struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 		const struct base_jit_alloc_info *info,
 		bool ignore_pressure_limit)
 {
 	struct kbase_va_region *reg = NULL;
+	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
+	int i;
 
 	lockdep_assert_held(&kctx->jctx.lock);
 
 	if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
 		return NULL;
 
-#if MALI_JIT_PRESSURE_LIMIT
-	if (!ignore_pressure_limit)
-		jit_trim_necessary_pages(kctx, info);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#ifdef CONFIG_MALI_2MB_ALLOC
+	/* Preallocate memory for the sub-allocation structs */
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+		prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+		if (!prealloc_sas[i])
+			goto end;
+	}
+#endif
 
+	kbase_gpu_vm_lock(kctx);
 	mutex_lock(&kctx->jit_evict_lock);
 
 	/*
 	 * Scan the pool for an existing allocation which meets our
 	 * requirements and remove it.
 	 */
-	if (info->usage_id != 0) {
+	if (info->usage_id != 0)
 		/* First scan for an allocation with the same usage ID */
-		struct kbase_va_region *walker;
-		size_t current_diff = SIZE_MAX;
-
-		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
-
-			if (walker->jit_usage_id == info->usage_id &&
-					walker->jit_bin_id == info->bin_id &&
-					meet_size_and_tiler_align_top_requirements(
-							kctx, walker, info)) {
-				size_t min_size, max_size, diff;
-
-				/*
-				 * The JIT allocations VA requirements have been
-				 * met, it's suitable but other allocations
-				 * might be a better fit.
-				 */
-				min_size = min_t(size_t,
-						walker->gpu_alloc->nents,
-						info->commit_pages);
-				max_size = max_t(size_t,
-						walker->gpu_alloc->nents,
-						info->commit_pages);
-				diff = max_size - min_size;
-
-				if (current_diff > diff) {
-					current_diff = diff;
-					reg = walker;
-				}
+		reg = find_reasonable_region(info, &kctx->jit_pool_head, false);
 
-				/* The allocation is an exact match */
-				if (current_diff == 0)
-					break;
-			}
-		}
-	}
-
-	if (!reg) {
+	if (!reg)
 		/* No allocation with the same usage ID, or usage IDs not in
 		 * use. Search for an allocation we can reuse.
 		 */
-		struct kbase_va_region *walker;
-		size_t current_diff = SIZE_MAX;
-
-		list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) {
-
-			if (walker->jit_bin_id == info->bin_id &&
-					meet_size_and_tiler_align_top_requirements(
-							kctx, walker, info)) {
-				size_t min_size, max_size, diff;
-
-				/*
-				 * The JIT allocations VA requirements have been
-				 * met, it's suitable but other allocations
-				 * might be a better fit.
-				 */
-				min_size = min_t(size_t,
-						walker->gpu_alloc->nents,
-						info->commit_pages);
-				max_size = max_t(size_t,
-						walker->gpu_alloc->nents,
-						info->commit_pages);
-				diff = max_size - min_size;
-
-				if (current_diff > diff) {
-					current_diff = diff;
-					reg = walker;
-				}
-
-				/* The allocation is an exact match, so stop
-				 * looking.
-				 */
-				if (current_diff == 0)
-					break;
-			}
-		}
-	}
+		reg = find_reasonable_region(info, &kctx->jit_pool_head, true);
 
 	if (reg) {
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+		size_t needed_pages = 0;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+		int ret;
+
 		/*
 		 * Remove the found region from the pool and add it to the
 		 * active list.
 		 */
 		list_move(&reg->jit_node, &kctx->jit_active_head);
 
+		WARN_ON(reg->gpu_alloc->evicted);
+
 		/*
 		 * Remove the allocation from the eviction list as it's no
 		 * longer eligible for eviction. This must be done before
 		 * dropping the jit_evict_lock
 		 */
 		list_del_init(&reg->gpu_alloc->evict_node);
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+		if (!ignore_pressure_limit) {
+			if (info->commit_pages > reg->gpu_alloc->nents)
+				needed_pages = info->commit_pages -
+					       reg->gpu_alloc->nents;
+
+			/* Update early the recycled JIT region's estimate of
+			 * used_pages to ensure it doesn't get trimmed
+			 * undesirably. This is needed as the recycled JIT
+			 * region has been added to the active list but the
+			 * number of used pages for it would be zero, so it
+			 * could get trimmed instead of other allocations only
+			 * to be regrown later resulting in a breach of the JIT
+			 * physical pressure limit.
+			 * Also that trimming would disturb the accounting of
+			 * physical pages, i.e. the VM stats, as the number of
+			 * backing pages would have changed when the call to
+			 * kbase_mem_evictable_unmark_reclaim is made.
+			 *
+			 * The second call to update pressure at the end of
+			 * this function would effectively be a nop.
+			 */
+			kbase_jit_report_update_pressure(
+				kctx, reg, info->va_pages,
+				KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+
+			kbase_jit_request_phys_increase_locked(kctx,
+							       needed_pages);
+		}
+#endif
 		mutex_unlock(&kctx->jit_evict_lock);
 
-		if (kbase_jit_grow(kctx, info, reg) < 0) {
+		/* kbase_jit_grow() can release & reacquire 'kctx->reg_lock',
+		 * so any state protected by that lock might need to be
+		 * re-evaluated if more code is added here in future.
+		 */
+		ret = kbase_jit_grow(kctx, info, reg, prealloc_sas);
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+		if (!ignore_pressure_limit)
+			kbase_jit_done_phys_increase(kctx, needed_pages);
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+		kbase_gpu_vm_unlock(kctx);
+
+		if (ret < 0) {
 			/*
 			 * An update to an allocation from the pool failed,
 			 * chances are slim a new allocation would fair any
@@ -3770,10 +3794,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			dev_dbg(kctx->kbdev->dev,
 				"JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n",
 				info->va_pages, info->commit_pages);
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+			/* Undo the early change made to the recycled JIT
+			 * region's estimate of used_pages.
+			 */
+			if (!ignore_pressure_limit) {
+				kbase_jit_report_update_pressure(
+					kctx, reg, 0,
+					KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
+			}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 			mutex_lock(&kctx->jit_evict_lock);
 			list_move(&reg->jit_node, &kctx->jit_pool_head);
 			mutex_unlock(&kctx->jit_evict_lock);
-			return NULL;
+			reg = NULL;
+			goto end;
 		}
 	} else {
 		/* No suitable JIT allocation was found so create a new one */
@@ -3783,12 +3818,23 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 				BASEP_MEM_NO_USER_FREE;
 		u64 gpu_addr;
 
-		mutex_unlock(&kctx->jit_evict_lock);
-
 		if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
 			flags |= BASE_MEM_TILER_ALIGN_TOP;
 
 		flags |= base_mem_group_id_set(kctx->jit_group_id);
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+		if (!ignore_pressure_limit) {
+			flags |= BASEP_MEM_PERFORM_JIT_TRIM;
+			/* The corresponding call to 'done_phys_increase' would
+			 * be made inside the kbase_mem_alloc().
+			 */
+			kbase_jit_request_phys_increase_locked(
+				kctx, info->commit_pages);
+		}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+		mutex_unlock(&kctx->jit_evict_lock);
+		kbase_gpu_vm_unlock(kctx);
 
 		reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages,
 				info->extent, &flags, &gpu_addr);
@@ -3799,12 +3845,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			dev_dbg(kctx->kbdev->dev,
 				"Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n",
 				info->va_pages, info->commit_pages);
-			return NULL;
+			goto end;
 		}
 
-		mutex_lock(&kctx->jit_evict_lock);
-		list_add(&reg->jit_node, &kctx->jit_active_head);
-		mutex_unlock(&kctx->jit_evict_lock);
+		if (!ignore_pressure_limit) {
+			/* Due to enforcing of pressure limit, kbase_mem_alloc
+			 * was instructed to perform the trimming which in turn
+			 * would have ensured that the new JIT allocation is
+			 * already in the jit_active_head list, so nothing to
+			 * do here.
+			 */
+			WARN_ON(list_empty(&reg->jit_node));
+		} else {
+			mutex_lock(&kctx->jit_evict_lock);
+			list_add(&reg->jit_node, &kctx->jit_active_head);
+			mutex_unlock(&kctx->jit_evict_lock);
+		}
 	}
 
 	trace_mali_jit_alloc(reg, info->id);
@@ -3816,13 +3872,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 
 	reg->jit_usage_id = info->usage_id;
 	reg->jit_bin_id = info->bin_id;
-#if MALI_JIT_PRESSURE_LIMIT
+	reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC;
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
 		reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE;
 	reg->heap_info_gpu_addr = info->heap_info_gpu_addr;
 	kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
 			KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
+end:
+	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
+		kfree(prealloc_sas[i]);
 
 	return reg;
 }
@@ -3848,11 +3909,11 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 			kbase_mem_shrink(kctx, reg, old_pages - delta);
 	}
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	reg->heap_info_gpu_addr = 0;
 	kbase_jit_report_update_pressure(kctx, reg, 0,
 			KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	kctx->jit_current_allocations--;
 	kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
@@ -3863,6 +3924,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 	kbase_gpu_vm_lock(kctx);
 	reg->flags |= KBASE_REG_DONT_NEED;
+	reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC;
 	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
 	kbase_gpu_vm_unlock(kctx);
 
@@ -3962,6 +4024,9 @@ void kbase_jit_term(struct kbase_context *kctx)
 		kbase_mem_free_region(kctx, walker);
 		mutex_lock(&kctx->jit_evict_lock);
 	}
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	WARN_ON(kctx->jit_phys_pages_to_be_allocated);
+#endif
 	mutex_unlock(&kctx->jit_evict_lock);
 	kbase_gpu_vm_unlock(kctx);
 
@@ -3972,7 +4037,7 @@ void kbase_jit_term(struct kbase_context *kctx)
 	cancel_work_sync(&kctx->jit_work);
 }
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
 		struct kbase_va_region *reg, unsigned int flags)
 {
@@ -4015,9 +4080,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
 out:
 	return;
 }
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 void kbase_jit_report_update_pressure(struct kbase_context *kctx,
 		struct kbase_va_region *reg, u64 new_used_pages,
 		unsigned int flags)
@@ -4053,7 +4118,7 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx,
 	}
 
 }
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 bool kbase_has_exec_va_zone(struct kbase_context *kctx)
 {
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 6e921ec..a057f61 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -141,6 +141,7 @@ struct kbase_mem_phy_alloc {
 
 	union {
 		struct {
+			struct kbase_context *kctx;
 			struct dma_buf *dma_buf;
 			struct dma_buf_attachment *dma_attachment;
 			unsigned int current_mapping_usage_count;
@@ -330,7 +331,8 @@ struct kbase_va_region {
 
 /* Bit 22 is reserved.
  *
- * Do not remove, use the next unreserved bit for new flags */
+ * Do not remove, use the next unreserved bit for new flags
+ */
 #define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
 
 /* The top of the initial commit is aligned to extent pages.
@@ -367,6 +369,9 @@ struct kbase_va_region {
  */
 #define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
 
+/* Allocation is actively used for JIT memory */
+#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
+
 #define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
 
 /* only used with 32-bit clients */
@@ -398,7 +403,7 @@ struct kbase_va_region {
 	struct list_head jit_node;
 	u16 jit_usage_id;
 	u8 jit_bin_id;
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	/* Pointer to an object in GPU memory defining an end of an allocated
 	 * region
 	 *
@@ -423,7 +428,7 @@ struct kbase_va_region {
 	 * gpu_alloc->nents)
 	 */
 	size_t used_pages;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	int    va_refcnt;
 };
@@ -1497,7 +1502,7 @@ bool kbase_jit_evict(struct kbase_context *kctx);
  */
 void kbase_jit_term(struct kbase_context *kctx);
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
  * kbase_trace_jit_report_gpu_mem_trace_enabled - variant of
  * kbase_trace_jit_report_gpu_mem() that should only be called once the
@@ -1508,7 +1513,7 @@ void kbase_jit_term(struct kbase_context *kctx);
  */
 void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
 		struct kbase_va_region *reg, unsigned int flags);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 /**
  * kbase_trace_jit_report_gpu_mem - Trace information about the GPU memory used
@@ -1530,7 +1535,7 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
  * been included. Also gives no opportunity for the compiler to mess up
  * inlining it.
  */
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
 	do { \
 		if (trace_mali_jit_report_gpu_mem_enabled()) \
@@ -1540,9 +1545,9 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
 #else
 #define kbase_trace_jit_report_gpu_mem(kctx, reg, flags) \
 	CSTD_NOP(kctx, reg, flags)
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
  * kbase_jit_report_update_pressure - safely update the JIT physical page
  * pressure and JIT region's estimate of used_pages
@@ -1562,7 +1567,123 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
 void kbase_jit_report_update_pressure(struct kbase_context *kctx,
 		struct kbase_va_region *reg, u64 new_used_pages,
 		unsigned int flags);
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+
+/**
+ * jit_trim_necessary_pages() - calculate and trim the least pages possible to
+ * satisfy a new JIT allocation
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of JIT physical pages by which trimming is requested.
+ *                The actual number of pages trimmed could differ.
+ *
+ * Before allocating a new just-in-time memory region or reusing a previous
+ * one, ensure that the total JIT physical page usage also will not exceed the
+ * pressure limit.
+ *
+ * If there are no reported-on allocations, then we already guarantee this will
+ * be the case - because our current pressure then only comes from the va_pages
+ * of each JIT region, hence JIT physical page usage is guaranteed to be
+ * bounded by this.
+ *
+ * However as soon as JIT allocations become "reported on", the pressure is
+ * lowered to allow new JIT regions to be allocated. It is after such a point
+ * that the total JIT physical page usage could (either now or in the future on
+ * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly
+ * allocated JIT regions. Hence, trim any "reported on" regions.
+ *
+ * Any pages freed will go into the pool and be allocated from there in
+ * kbase_mem_alloc().
+ */
+void kbase_jit_trim_necessary_pages(struct kbase_context *kctx,
+				    size_t needed_pages);
+
+/*
+ * Same as kbase_jit_request_phys_increase(), except that Caller is supposed
+ * to take jit_evict_lock also on @kctx before calling this function.
+ */
+static inline void
+kbase_jit_request_phys_increase_locked(struct kbase_context *kctx,
+				       size_t needed_pages)
+{
+	lockdep_assert_held(&kctx->jctx.lock);
+	lockdep_assert_held(&kctx->reg_lock);
+	lockdep_assert_held(&kctx->jit_evict_lock);
+
+	kctx->jit_phys_pages_to_be_allocated += needed_pages;
+
+	kbase_jit_trim_necessary_pages(kctx,
+				       kctx->jit_phys_pages_to_be_allocated);
+}
+
+/**
+ * kbase_jit_request_phys_increase() - Increment the backing pages count and do
+ * the required trimming before allocating pages for a JIT allocation.
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of pages to be allocated for the JIT allocation.
+ *
+ * This function needs to be called before allocating backing pages for a
+ * just-in-time memory region. The backing pages are currently allocated when,
+ *
+ * - A new JIT region is created.
+ * - An old JIT region is reused from the cached pool.
+ * - GPU page fault occurs for the active JIT region.
+ * - Backing is grown for the JIT region through the commit ioctl.
+ *
+ * This function would ensure that the total JIT physical page usage does not
+ * exceed the pressure limit even when the backing pages get allocated
+ * simultaneously for multiple JIT allocations from different threads.
+ *
+ * There should be a matching call to kbase_jit_done_phys_increase(), after
+ * the pages have been allocated and accounted against the active JIT
+ * allocation.
+ *
+ * Caller is supposed to take reg_lock on @kctx before calling this function.
+ */
+static inline void kbase_jit_request_phys_increase(struct kbase_context *kctx,
+						   size_t needed_pages)
+{
+	lockdep_assert_held(&kctx->jctx.lock);
+	lockdep_assert_held(&kctx->reg_lock);
+
+	mutex_lock(&kctx->jit_evict_lock);
+	kbase_jit_request_phys_increase_locked(kctx, needed_pages);
+	mutex_unlock(&kctx->jit_evict_lock);
+}
+
+/**
+ * kbase_jit_done_phys_increase() - Decrement the backing pages count after the
+ * allocation of pages for a JIT allocation.
+ *
+ * @kctx: Pointer to the kbase context
+ * @needed_pages: Number of pages that were allocated for the JIT allocation.
+ *
+ * This function should be called after backing pages have been allocated and
+ * accounted against the active JIT allocation.
+ * The call should be made when the following have been satisfied:
+ *    when the allocation is on the jit_active_head.
+ *    when additional needed_pages have been allocated.
+ *    kctx->reg_lock was held during the above and has not yet been unlocked.
+ * Failure to call this function before unlocking the kctx->reg_lock when
+ * either the above have changed may result in over-accounting the memory.
+ * This ensures kbase_jit_trim_necessary_pages() gets a consistent count of
+ * the memory.
+ *
+ * A matching call to kbase_jit_request_phys_increase() should have been made,
+ * before the allocation of backing pages.
+ *
+ * Caller is supposed to take reg_lock on @kctx before calling this function.
+ */
+static inline void kbase_jit_done_phys_increase(struct kbase_context *kctx,
+						size_t needed_pages)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+
+	WARN_ON(kctx->jit_phys_pages_to_be_allocated < needed_pages);
+
+	kctx->jit_phys_pages_to_be_allocated -= needed_pages;
+}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 /**
  * kbase_has_exec_va_zone - EXEC_VA zone predicate
@@ -1742,7 +1863,6 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 int kbase_mem_do_sync_imported(struct kbase_context *kctx,
 		struct kbase_va_region *reg, enum kbase_sync_type sync_fn);
 
-
 /**
  * kbase_mem_copy_to_pinned_user_pages - Memcpy from source input page to
  * an unaligned address at a given offset from the start of a target page.
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 219e0af..d7863e1 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -49,6 +49,8 @@
 #include <tl/mali_kbase_tracepoints.h>
 #include <mali_kbase_ioctl.h>
 #include <mmu/mali_kbase_mmu.h>
+#include <mali_kbase_caps.h>
+#include <mali_kbase_trace_gpu_mem.h>
 
 #if ((KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE) || \
 	(KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE))
@@ -372,10 +374,12 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 	} else
 		reg->threshold_pages = 0;
 
-	if (*flags & (BASE_MEM_GROW_ON_GPF|BASE_MEM_TILER_ALIGN_TOP)) {
+	if (*flags & BASE_MEM_GROW_ON_GPF) {
 		/* kbase_check_alloc_sizes() already checks extent is valid for
 		 * assigning to reg->extent */
 		reg->extent = extent;
+	} else if (*flags & BASE_MEM_TILER_ALIGN_TOP) {
+		reg->extent = extent;
 	} else {
 		reg->extent = 0;
 	}
@@ -436,6 +440,17 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx,
 		*gpu_va = reg->start_pfn << PAGE_SHIFT;
 	}
 
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) {
+		kbase_jit_done_phys_increase(kctx, commit_pages);
+
+		mutex_lock(&kctx->jit_evict_lock);
+		WARN_ON(!list_empty(&reg->jit_node));
+		list_add(&reg->jit_node, &kctx->jit_active_head);
+		mutex_unlock(&kctx->jit_evict_lock);
+	}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+
 	kbase_gpu_vm_unlock(kctx);
 	return reg;
 
@@ -443,6 +458,13 @@ no_mmap:
 no_cookie:
 no_kern_mapping:
 no_mem:
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (*flags & BASEP_MEM_PERFORM_JIT_TRIM) {
+		kbase_gpu_vm_lock(kctx);
+		kbase_jit_done_phys_increase(kctx, commit_pages);
+		kbase_gpu_vm_unlock(kctx);
+	}
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 	kbase_mem_phy_alloc_put(reg->cpu_alloc);
 	kbase_mem_phy_alloc_put(reg->gpu_alloc);
 invalid_flags:
@@ -511,14 +533,23 @@ int kbase_mem_query(struct kbase_context *kctx,
 			*out |= BASE_MEM_COHERENT_SYSTEM;
 		if (KBASE_REG_SHARE_IN & reg->flags)
 			*out |= BASE_MEM_COHERENT_LOCAL;
-		if (kctx->api_version >= KBASE_API_VERSION(11, 2)) {
-			/* Prior to 11.2, these were known about by user-side
-			 * but we did not return them. Returning some of these
-			 * caused certain clients that were not expecting them
-			 * to fail, so we omit all of them as a special-case
-			 * for compatibility reasons */
+		if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) {
+			/* Prior to this version, this was known about by
+			 * user-side but we did not return them. Returning
+			 * it caused certain clients that were not expecting
+			 * it to fail, so we omit it as a special-case for
+			 * compatibility reasons
+			 */
 			if (KBASE_REG_PF_GROW & reg->flags)
 				*out |= BASE_MEM_GROW_ON_GPF;
+		}
+		if (mali_kbase_supports_mem_protected(kctx->api_version)) {
+			/* Prior to this version, this was known about by
+			 * user-side but we did not return them. Returning
+			 * it caused certain clients that were not expecting
+			 * it to fail, so we omit it as a special-case for
+			 * compatibility reasons
+			 */
 			if (KBASE_REG_PROTECTED & reg->flags)
 				*out |= BASE_MEM_PROTECTED;
 		}
@@ -705,6 +736,7 @@ void kbase_mem_evictable_mark_reclaim(struct kbase_mem_phy_alloc *alloc)
 			kbdev,
 			kctx->id,
 			(u64)new_page_count);
+	kbase_trace_gpu_mem_usage_dec(kbdev, kctx, alloc->nents);
 }
 
 /**
@@ -731,6 +763,7 @@ void kbase_mem_evictable_unmark_reclaim(struct kbase_mem_phy_alloc *alloc)
 			kbdev,
 			kctx->id,
 			(u64)new_page_count);
+	kbase_trace_gpu_mem_usage_inc(kbdev, kctx, alloc->nents);
 }
 
 int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
@@ -1056,6 +1089,8 @@ static void kbase_mem_umm_unmap_attachment(struct kbase_context *kctx,
 				 alloc->imported.umm.sgt, DMA_BIDIRECTIONAL);
 	alloc->imported.umm.sgt = NULL;
 
+	kbase_remove_dma_buf_usage(kctx, alloc);
+
 	memset(pa, 0xff, sizeof(*pa) * alloc->nents);
 	alloc->nents = 0;
 }
@@ -1123,6 +1158,7 @@ static int kbase_mem_umm_map_attachment(struct kbase_context *kctx,
 
 	/* Update nents as we now have pages to map */
 	alloc->nents = count;
+	kbase_add_dma_buf_usage(kctx, alloc);
 
 	return 0;
 
@@ -1383,6 +1419,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	reg->gpu_alloc->imported.umm.dma_attachment = dma_attachment;
 	reg->gpu_alloc->imported.umm.current_mapping_usage_count = 0;
 	reg->gpu_alloc->imported.umm.need_sync = need_sync;
+	reg->gpu_alloc->imported.umm.kctx = kctx;
 	reg->extent = 0;
 
 	if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
@@ -2024,7 +2061,7 @@ static int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 {
 	u64 old_pages;
-	u64 delta;
+	u64 delta = 0;
 	int res = -EINVAL;
 	struct kbase_va_region *reg;
 	bool read_locked = false;
@@ -2054,6 +2091,9 @@ int kbase_mem_commit(struct kbase_context *kctx, u64 gpu_addr, u64 new_pages)
 	if (0 == (reg->flags & KBASE_REG_GROWABLE))
 		goto out_unlock;
 
+	if (reg->flags & KBASE_REG_ACTIVE_JIT_ALLOC)
+		goto out_unlock;
+
 	/* Would overflow the VA region */
 	if (new_pages > reg->nr_pages)
 		goto out_unlock;
@@ -2216,8 +2256,6 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
 	kfree(map);
 }
 
-KBASE_EXPORT_TEST_API(kbase_cpu_vm_close);
-
 static struct kbase_aliased *get_aliased_alloc(struct vm_area_struct *vma,
 					struct kbase_va_region *reg,
 					pgoff_t *start_off,
@@ -2935,9 +2973,9 @@ KBASE_EXPORT_TEST_API(kbase_vunmap);
 
 static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5, 5, 0))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 19, 0))
 	/* To avoid the build breakage due to an unexported kernel symbol
-	 * 'mm_trace_rss_stat' from later kernels, i.e. from V5.5.0 onwards,
+	 * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards,
 	 * we inline here the equivalent of 'add_mm_counter()' from linux
 	 * kernel V5.4.0~8.
 	 */
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index b9ed8c3..7263b58 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -39,6 +39,8 @@
 #include <arbiter/mali_kbase_arbiter_pm.h>
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
 
+#include <mali_kbase_clk_rate_trace_mgr.h>
+
 int kbase_pm_powerup(struct kbase_device *kbdev, unsigned int flags)
 {
 	return kbase_hwaccess_pm_powerup(kbdev, flags);
@@ -101,6 +103,7 @@ int kbase_pm_context_active_handle_suspend(struct kbase_device *kbdev,
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 		kbase_arbiter_pm_vm_event(kbdev, KBASE_VM_REF_EVENT);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
+		kbase_clk_rate_trace_manager_gpu_active(kbdev);
 	}
 
 	kbase_pm_unlock(kbdev);
@@ -128,6 +131,7 @@ void kbase_pm_context_idle(struct kbase_device *kbdev)
 	if (c == 0) {
 		/* Last context has gone idle */
 		kbase_hwaccess_pm_gpu_idle(kbdev);
+		kbase_clk_rate_trace_manager_gpu_idle(kbdev);
 
 		/* Wake up anyone waiting for this to become 0 (e.g. suspend).
 		 * The waiters must synchronize with us by locking the pm.lock
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index cbb0c76..7a784ac 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -32,6 +32,7 @@
 #include <linux/dma-mapping.h>
 #include <mali_base_kernel.h>
 #include <mali_kbase_hwaccess_time.h>
+#include <mali_kbase_kinstr_jm.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include <mali_linux_trace.h>
@@ -899,7 +900,7 @@ int kbasep_jit_alloc_validate(struct kbase_context *kctx,
 	if (info->flags & ~(BASE_JIT_ALLOC_VALID_FLAGS))
 		return -EINVAL;
 
-#if !MALI_JIT_PRESSURE_LIMIT
+#if !MALI_JIT_PRESSURE_LIMIT_BASE
 	/* If just-in-time memory allocation pressure limit feature is disabled,
 	 * heap_info_gpu_addr must be zeroed-out
 	 */
@@ -1091,14 +1092,19 @@ static int kbase_jit_allocate_process(struct kbase_jd_atom *katom)
 		}
 	}
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 	/**
-	 * If this is the only JIT_ALLOC atom in-flight then allow it to exceed
-	 * the defined pressure limit.
+	 * If this is the only JIT_ALLOC atom in-flight or if JIT pressure limit
+	 * is disabled at the context scope, then bypass JIT pressure limit
+	 * logic in kbase_jit_allocate().
 	 */
-	if (kctx->jit_current_allocations == 0)
+	if (!kbase_ctx_flag(kctx, KCTX_JPL_ENABLED)
+		|| (kctx->jit_current_allocations == 0)) {
 		ignore_pressure_limit = true;
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+	}
+#else
+	ignore_pressure_limit = true;
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 	for (i = 0, info = katom->softjob_data; i < count; i++, info++) {
 		if (kctx->jit_alloc[info->id]) {
@@ -1358,12 +1364,16 @@ void kbase_jit_retry_pending_alloc(struct kbase_context *kctx)
 	list_for_each_safe(i, tmp, &jit_pending_alloc_list) {
 		struct kbase_jd_atom *pending_atom = list_entry(i,
 				struct kbase_jd_atom, queue);
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kctx->kbdev, pending_atom);
+		kbase_kinstr_jm_atom_sw_start(pending_atom);
 		if (kbase_jit_allocate_process(pending_atom) == 0) {
 			/* Atom has completed */
 			INIT_WORK(&pending_atom->work,
 					kbasep_jit_finish_worker);
 			queue_work(kctx->jctx.job_done_wq, &pending_atom->work);
 		}
+		KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kctx->kbdev, pending_atom);
+		kbase_kinstr_jm_atom_sw_stop(pending_atom);
 	}
 }
 
@@ -1538,6 +1548,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 	struct kbase_device *kbdev = kctx->kbdev;
 
 	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_START(kbdev, katom);
+	kbase_kinstr_jm_atom_sw_start(katom);
 
 	trace_sysgraph(SGR_SUBMIT, kctx->id,
 			kbase_jd_atom_id(kctx, katom));
@@ -1600,6 +1611,7 @@ int kbase_process_soft_job(struct kbase_jd_atom *katom)
 
 	/* Atom is complete */
 	KBASE_TLSTREAM_TL_EVENT_ATOM_SOFTJOB_END(kbdev, katom);
+	kbase_kinstr_jm_atom_sw_stop(katom);
 	return ret;
 }
 
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.c b/mali_kbase/mali_kbase_trace_gpu_mem.c
new file mode 100644
index 0000000..0a053da
--- /dev/null
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.c
@@ -0,0 +1,227 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_mem_linux.h>
+#include <mali_kbase_defs.h>
+#include <mali_kbase_trace_gpu_mem.h>
+
+/**
+ * struct kbase_dma_buf - Object instantiated when a dma-buf imported allocation
+ *                        is mapped to GPU for the first time within a process.
+ *                        Another instantiation is done for the case when that
+ *                        allocation is mapped for the first time to GPU.
+ *
+ * @dma_buf:              Reference to dma_buf been imported.
+ * @dma_buf_node:         Link node to maintain a rb_tree of kbase_dma_buf.
+ * @import_count:         The number of times the dma_buf was imported.
+ */
+struct kbase_dma_buf {
+	struct dma_buf *dma_buf;
+	struct rb_node dma_buf_node;
+	u32 import_count;
+};
+
+/**
+ * kbase_delete_dma_buf_mapping - Delete a dma buffer mapping.
+ *
+ * @kctx: Pointer to kbase context.
+ * @dma_buf: Pointer to a dma buffer mapping.
+ * @tree: Pointer to root of rb_tree containing the dma_buf's mapped.
+ *
+ * when we un-map any dma mapping we need to remove them from rb_tree,
+ * rb_tree is maintained at kbase_device level and kbase_process level
+ * by passing the root of kbase_device or kbase_process we can remove
+ * the node from the tree.
+ */
+static bool kbase_delete_dma_buf_mapping(struct kbase_context *kctx,
+					 struct dma_buf *dma_buf,
+					 struct rb_root *tree)
+{
+	struct kbase_dma_buf *buf_node = NULL;
+	struct rb_node *node = tree->rb_node;
+	bool mapping_removed = false;
+
+	lockdep_assert_held(&kctx->kbdev->dma_buf_lock);
+
+	while (node) {
+		buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node);
+
+		if (dma_buf == buf_node->dma_buf) {
+			WARN_ON(!buf_node->import_count);
+
+			buf_node->import_count--;
+
+			if (!buf_node->import_count) {
+				rb_erase(&buf_node->dma_buf_node, tree);
+				kfree(buf_node);
+				mapping_removed = true;
+			}
+
+			break;
+		}
+
+		if (dma_buf < buf_node->dma_buf)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+
+	WARN_ON(!buf_node);
+	return mapping_removed;
+}
+
+/**
+ * kbase_capture_dma_buf_mapping - capture a dma buffer mapping.
+ *
+ * @kctx: Pointer to kbase context.
+ * @dma_buf: Pointer to a dma buffer mapping.
+ * @root: Pointer to root of rb_tree containing the dma_buf's.
+ *
+ * We maintain a kbase_device level and kbase_process level rb_tree
+ * of all unique dma_buf's mapped to gpu memory. So when attach any
+ * dma_buf add it the rb_tree's. To add the unique mapping we need
+ * check if the mapping is not a duplicate and then add them.
+ */
+static bool kbase_capture_dma_buf_mapping(struct kbase_context *kctx,
+					  struct dma_buf *dma_buf,
+					  struct rb_root *root)
+{
+	struct kbase_dma_buf *buf_node = NULL;
+	struct rb_node *node = root->rb_node;
+	bool unique_buf_imported = true;
+
+	lockdep_assert_held(&kctx->kbdev->dma_buf_lock);
+
+	while (node) {
+		buf_node = rb_entry(node, struct kbase_dma_buf, dma_buf_node);
+
+		if (dma_buf == buf_node->dma_buf) {
+			unique_buf_imported = false;
+			break;
+		}
+
+		if (dma_buf < buf_node->dma_buf)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
+	}
+
+	if (unique_buf_imported) {
+		struct kbase_dma_buf *buf_node =
+			kzalloc(sizeof(*buf_node), GFP_KERNEL);
+
+		if (buf_node == NULL) {
+			dev_err(kctx->kbdev->dev, "Error allocating memory for kbase_dma_buf\n");
+			/* Dont account for it if we fail to allocate memory */
+			unique_buf_imported = false;
+		} else {
+			struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+			buf_node->dma_buf = dma_buf;
+			buf_node->import_count = 1;
+			while (*new) {
+				struct kbase_dma_buf *node;
+
+				parent = *new;
+				node = rb_entry(parent, struct kbase_dma_buf,
+						dma_buf_node);
+				if (dma_buf < node->dma_buf)
+					new = &(*new)->rb_left;
+				else
+					new = &(*new)->rb_right;
+			}
+			rb_link_node(&buf_node->dma_buf_node, parent, new);
+			rb_insert_color(&buf_node->dma_buf_node, root);
+		}
+	} else if (!WARN_ON(!buf_node)) {
+		buf_node->import_count++;
+	}
+
+	return unique_buf_imported;
+}
+
+void kbase_remove_dma_buf_usage(struct kbase_context *kctx,
+				struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool dev_mapping_removed, prcs_mapping_removed;
+
+	mutex_lock(&kbdev->dma_buf_lock);
+
+	dev_mapping_removed = kbase_delete_dma_buf_mapping(
+		kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root);
+
+	prcs_mapping_removed = kbase_delete_dma_buf_mapping(
+		kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root);
+
+	WARN_ON(dev_mapping_removed && !prcs_mapping_removed);
+
+	spin_lock(&kbdev->gpu_mem_usage_lock);
+	if (dev_mapping_removed)
+		kbdev->total_gpu_pages -= alloc->nents;
+
+	if (prcs_mapping_removed)
+		kctx->kprcs->total_gpu_pages -= alloc->nents;
+
+	if (dev_mapping_removed || prcs_mapping_removed)
+		kbase_trace_gpu_mem_usage(kbdev, kctx);
+	spin_unlock(&kbdev->gpu_mem_usage_lock);
+
+	mutex_unlock(&kbdev->dma_buf_lock);
+}
+
+void kbase_add_dma_buf_usage(struct kbase_context *kctx,
+				    struct kbase_mem_phy_alloc *alloc)
+{
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool unique_dev_dmabuf, unique_prcs_dmabuf;
+
+	mutex_lock(&kbdev->dma_buf_lock);
+
+	/* add dma_buf to device and process. */
+	unique_dev_dmabuf = kbase_capture_dma_buf_mapping(
+		kctx, alloc->imported.umm.dma_buf, &kbdev->dma_buf_root);
+
+	unique_prcs_dmabuf = kbase_capture_dma_buf_mapping(
+		kctx, alloc->imported.umm.dma_buf, &kctx->kprcs->dma_buf_root);
+
+	WARN_ON(unique_dev_dmabuf && !unique_prcs_dmabuf);
+
+	spin_lock(&kbdev->gpu_mem_usage_lock);
+	if (unique_dev_dmabuf)
+		kbdev->total_gpu_pages += alloc->nents;
+
+	if (unique_prcs_dmabuf)
+		kctx->kprcs->total_gpu_pages += alloc->nents;
+
+	if (unique_prcs_dmabuf || unique_dev_dmabuf)
+		kbase_trace_gpu_mem_usage(kbdev, kctx);
+	spin_unlock(&kbdev->gpu_mem_usage_lock);
+
+	mutex_unlock(&kbdev->dma_buf_lock);
+}
+
+#ifndef CONFIG_TRACE_GPU_MEM
+#define CREATE_TRACE_POINTS
+#include "mali_gpu_mem_trace.h"
+#endif
diff --git a/mali_kbase/mali_kbase_trace_gpu_mem.h b/mali_kbase/mali_kbase_trace_gpu_mem.h
new file mode 100644
index 0000000..b621525
--- /dev/null
+++ b/mali_kbase/mali_kbase_trace_gpu_mem.h
@@ -0,0 +1,101 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KBASE_TRACE_GPU_MEM_H_
+#define _KBASE_TRACE_GPU_MEM_H_
+
+#ifdef CONFIG_TRACE_GPU_MEM
+#include <trace/events/gpu_mem.h>
+#else
+#include "mali_gpu_mem_trace.h"
+#endif
+
+#define DEVICE_TGID ((u32) 0U)
+
+static void kbase_trace_gpu_mem_usage(struct kbase_device *kbdev,
+				      struct kbase_context *kctx)
+{
+	lockdep_assert_held(&kbdev->gpu_mem_usage_lock);
+
+	trace_gpu_mem_total(kbdev->id, DEVICE_TGID,
+			    kbdev->total_gpu_pages << PAGE_SHIFT);
+
+	if (likely(kctx))
+		trace_gpu_mem_total(kbdev->id, kctx->kprcs->tgid,
+				kctx->kprcs->total_gpu_pages << PAGE_SHIFT);
+}
+
+static inline void kbase_trace_gpu_mem_usage_dec(struct kbase_device *kbdev,
+				struct kbase_context *kctx, size_t pages)
+{
+	spin_lock(&kbdev->gpu_mem_usage_lock);
+
+	if (likely(kctx))
+		kctx->kprcs->total_gpu_pages -= pages;
+
+	kbdev->total_gpu_pages -= pages;
+
+	kbase_trace_gpu_mem_usage(kbdev, kctx);
+
+	spin_unlock(&kbdev->gpu_mem_usage_lock);
+}
+
+static inline void kbase_trace_gpu_mem_usage_inc(struct kbase_device *kbdev,
+				struct kbase_context *kctx, size_t pages)
+{
+	spin_lock(&kbdev->gpu_mem_usage_lock);
+
+	if (likely(kctx))
+		kctx->kprcs->total_gpu_pages += pages;
+
+	kbdev->total_gpu_pages += pages;
+
+	kbase_trace_gpu_mem_usage(kbdev, kctx);
+
+	spin_unlock(&kbdev->gpu_mem_usage_lock);
+}
+
+/**
+ * kbase_remove_dma_buf_usage - Remove a dma-buf entry captured.
+ *
+ * @kctx: Pointer to the kbase context
+ * @alloc: Pointer to the alloc to unmap
+ *
+ * Remove reference to dma buf been unmapped from kbase_device level
+ * rb_tree and Kbase_process level dma buf rb_tree.
+ */
+void kbase_remove_dma_buf_usage(struct kbase_context *kctx,
+				struct kbase_mem_phy_alloc *alloc);
+
+/**
+ * kbase_add_dma_buf_usage - Add a dma-buf entry captured.
+ *
+ * @kctx: Pointer to the kbase context
+ * @alloc: Pointer to the alloc to map in
+ *
+ * Add reference to dma buf been mapped to kbase_device level
+ * rb_tree and Kbase_process level dma buf rb_tree.
+ */
+void kbase_add_dma_buf_usage(struct kbase_context *kctx,
+				    struct kbase_mem_phy_alloc *alloc);
+
+#endif /* _KBASE_TRACE_GPU_MEM_H_ */
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index d96b565..72cec13 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -184,6 +184,7 @@ static int kbasep_vinstr_client_dump(
 	unsigned int read_idx;
 	struct kbase_hwcnt_dump_buffer *dump_buf;
 	struct kbase_hwcnt_reader_metadata *meta;
+	u8 clk_cnt;
 
 	WARN_ON(!vcli);
 	lockdep_assert_held(&vcli->vctx->lock);
@@ -212,9 +213,14 @@ static int kbasep_vinstr_client_dump(
 	/* Zero all non-enabled counters (current values are undefined) */
 	kbase_hwcnt_dump_buffer_zero_non_enabled(dump_buf, &vcli->enable_map);
 
+	clk_cnt = vcli->vctx->metadata->clk_cnt;
+
 	meta->timestamp = ts_end_ns;
 	meta->event_id = event_id;
 	meta->buffer_idx = write_idx;
+	meta->cycles.top = (clk_cnt > 0) ? dump_buf->clk_cnt_buf[0] : 0;
+	meta->cycles.shader_cores =
+	    (clk_cnt > 1) ? dump_buf->clk_cnt_buf[1] : 0;
 
 	/* Notify client. Make sure all changes to memory are visible. */
 	wmb();
@@ -404,12 +410,15 @@ static int kbasep_vinstr_client_create(
 	if (errcode)
 		goto error;
 
-	phys_em.jm_bm = setup->jm_bm;
+	phys_em.fe_bm = setup->fe_bm;
 	phys_em.shader_bm = setup->shader_bm;
 	phys_em.tiler_bm = setup->tiler_bm;
 	phys_em.mmu_l2_bm = setup->mmu_l2_bm;
 	kbase_hwcnt_gpu_enable_map_from_physical(&vcli->enable_map, &phys_em);
 
+	/* Enable all the available clk_enable_map. */
+	vcli->enable_map.clk_enable_map = (1ull << vctx->metadata->clk_cnt) - 1;
+
 	errcode = kbase_hwcnt_dump_buffer_array_alloc(
 		vctx->metadata, setup->buffer_count, &vcli->dump_bufs);
 	if (errcode)
@@ -675,23 +684,26 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
 	unsigned int idx = meta_idx % cli->dump_bufs.buf_cnt;
 
 	struct kbase_hwcnt_reader_metadata *meta = &cli->dump_bufs_meta[idx];
+	const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata);
+	const size_t min_size = min(size, meta_size);
 
 	/* Metadata sanity check. */
 	WARN_ON(idx != meta->buffer_idx);
 
-	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
-		return -EINVAL;
-
 	/* Check if there is any buffer available. */
-	if (atomic_read(&cli->write_idx) == meta_idx)
+	if (unlikely(atomic_read(&cli->write_idx) == meta_idx))
 		return -EAGAIN;
 
 	/* Check if previously taken buffer was put back. */
-	if (atomic_read(&cli->read_idx) != meta_idx)
+	if (unlikely(atomic_read(&cli->read_idx) != meta_idx))
 		return -EBUSY;
 
+	/* Clear user buffer to zero. */
+	if (unlikely(meta_size < size && clear_user(buffer, size)))
+		return -EFAULT;
+
 	/* Copy next available buffer's metadata to user. */
-	if (copy_to_user(buffer, meta, size))
+	if (unlikely(copy_to_user(buffer, meta, min_size)))
 		return -EFAULT;
 
 	atomic_inc(&cli->meta_idx);
@@ -715,24 +727,62 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
 	unsigned int read_idx = atomic_read(&cli->read_idx);
 	unsigned int idx = read_idx % cli->dump_bufs.buf_cnt;
 
-	struct kbase_hwcnt_reader_metadata meta;
-
-	if (sizeof(struct kbase_hwcnt_reader_metadata) != size)
-		return -EINVAL;
+	struct kbase_hwcnt_reader_metadata *meta;
+	const size_t meta_size = sizeof(struct kbase_hwcnt_reader_metadata);
+	const size_t max_size = max(size, meta_size);
+	int ret = 0;
+	u8 stack_kbuf[64];
+	u8 *kbuf = NULL;
+	size_t i;
 
 	/* Check if any buffer was taken. */
-	if (atomic_read(&cli->meta_idx) == read_idx)
+	if (unlikely(atomic_read(&cli->meta_idx) == read_idx))
 		return -EPERM;
 
+	if (likely(max_size <= sizeof(stack_kbuf))) {
+		/* Use stack buffer when the size is small enough. */
+		if (unlikely(meta_size > size))
+			memset(stack_kbuf, 0, sizeof(stack_kbuf));
+		kbuf = stack_kbuf;
+	} else {
+		kbuf = kzalloc(max_size, GFP_KERNEL);
+		if (unlikely(!kbuf))
+			return -ENOMEM;
+	}
+
+	/*
+	 * Copy user buffer to zero cleared kernel buffer which has enough
+	 * space for both user buffer and kernel metadata.
+	 */
+	if (unlikely(copy_from_user(kbuf, buffer, size))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * Make sure any "extra" data passed from userspace is zero.
+	 * It's meaningful only in case meta_size < size.
+	 */
+	for (i = meta_size; i < size; i++) {
+		/* Check if user data beyond meta size is zero. */
+		if (unlikely(kbuf[i] != 0)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
 	/* Check if correct buffer is put back. */
-	if (copy_from_user(&meta, buffer, size))
-		return -EFAULT;
-	if (idx != meta.buffer_idx)
-		return -EINVAL;
+	meta = (struct kbase_hwcnt_reader_metadata *)kbuf;
+	if (unlikely(idx != meta->buffer_idx)) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	atomic_inc(&cli->read_idx);
-
-	return 0;
+out:
+	if (unlikely(kbuf != stack_kbuf))
+		kfree(kbuf);
+	return ret;
 }
 
 /**
@@ -836,6 +886,42 @@ static long kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
 }
 
 /**
+ * The hwcnt reader's ioctl command - get API version.
+ * @cli:    The non-NULL pointer to the client
+ * @arg:    Command's argument.
+ * @size:   Size of arg.
+ *
+ * @return 0 on success, else error code.
+ */
+static long kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
+	struct kbase_vinstr_client *cli, unsigned long arg, size_t size)
+{
+	long ret = -EINVAL;
+	u8 clk_cnt = cli->vctx->metadata->clk_cnt;
+
+	if (size == sizeof(u32)) {
+		ret = put_user(HWCNT_READER_API, (u32 __user *)arg);
+	} else if (size == sizeof(struct kbase_hwcnt_reader_api_version)) {
+		struct kbase_hwcnt_reader_api_version api_version = {
+			.version = HWCNT_READER_API,
+			.features = KBASE_HWCNT_READER_API_VERSION_NO_FEATURE,
+		};
+
+		if (clk_cnt > 0)
+			api_version.features |=
+			    KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_TOP;
+		if (clk_cnt > 1)
+			api_version.features |=
+			    KBASE_HWCNT_READER_API_VERSION_FEATURE_CYCLES_SHADER_CORES;
+
+		ret = put_user(api_version,
+			       (struct kbase_hwcnt_reader_api_version __user *)
+			       arg);
+	}
+	return ret;
+}
+
+/**
  * kbasep_vinstr_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
  * @filp:   Non-NULL pointer to file structure.
  * @cmd:    User command.
@@ -858,42 +944,43 @@ static long kbasep_vinstr_hwcnt_reader_ioctl(
 	if (!cli)
 		return -EINVAL;
 
-	switch (cmd) {
-	case KBASE_HWCNT_READER_GET_API_VERSION:
-		rcode = put_user(HWCNT_READER_API, (u32 __user *)arg);
+	switch (_IOC_NR(cmd)) {
+	case _IOC_NR(KBASE_HWCNT_READER_GET_API_VERSION):
+		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_api_version(
+				cli, arg, _IOC_SIZE(cmd));
 		break;
-	case KBASE_HWCNT_READER_GET_HWVER:
+	case _IOC_NR(KBASE_HWCNT_READER_GET_HWVER):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_hwver(
 			cli, (u32 __user *)arg);
 		break;
-	case KBASE_HWCNT_READER_GET_BUFFER_SIZE:
+	case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER_SIZE):
 		rcode = put_user(
 			(u32)cli->vctx->metadata->dump_buf_bytes,
 			(u32 __user *)arg);
 		break;
-	case KBASE_HWCNT_READER_DUMP:
+	case _IOC_NR(KBASE_HWCNT_READER_DUMP):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_dump(cli);
 		break;
-	case KBASE_HWCNT_READER_CLEAR:
+	case _IOC_NR(KBASE_HWCNT_READER_CLEAR):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_clear(cli);
 		break;
-	case KBASE_HWCNT_READER_GET_BUFFER:
+	case _IOC_NR(KBASE_HWCNT_READER_GET_BUFFER):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_get_buffer(
 			cli, (void __user *)arg, _IOC_SIZE(cmd));
 		break;
-	case KBASE_HWCNT_READER_PUT_BUFFER:
+	case _IOC_NR(KBASE_HWCNT_READER_PUT_BUFFER):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_put_buffer(
 			cli, (void __user *)arg, _IOC_SIZE(cmd));
 		break;
-	case KBASE_HWCNT_READER_SET_INTERVAL:
+	case _IOC_NR(KBASE_HWCNT_READER_SET_INTERVAL):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_set_interval(
 			cli, (u32)arg);
 		break;
-	case KBASE_HWCNT_READER_ENABLE_EVENT:
+	case _IOC_NR(KBASE_HWCNT_READER_ENABLE_EVENT):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_enable_event(
 			cli, (enum base_hwcnt_reader_event)arg);
 		break;
-	case KBASE_HWCNT_READER_DISABLE_EVENT:
+	case _IOC_NR(KBASE_HWCNT_READER_DISABLE_EVENT):
 		rcode = kbasep_vinstr_hwcnt_reader_ioctl_disable_event(
 			cli, (enum base_hwcnt_reader_event)arg);
 		break;
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index f618755..36bfd09 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -288,7 +288,7 @@ DEFINE_EVENT_PRINT(mali_jit_softjob_template, mali_jit_free,
 	TP_printk("start=0x%llx va_pages=0x%zx backed_size=0x%zx",
 		__entry->start_addr, __entry->nr_pages, __entry->backed_pages));
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /* trace_mali_jit_report
  *
  * Tracepoint about the GPU data structure read to form a just-in-time memory
@@ -326,13 +326,13 @@ TRACE_EVENT(mali_jit_report,
 		),
 		__entry->read_val, __entry->used_pages)
 );
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 #if (KERNEL_VERSION(4, 1, 0) <= LINUX_VERSION_CODE)
 TRACE_DEFINE_ENUM(KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
 #endif
 
-#if MALI_JIT_PRESSURE_LIMIT
+#if MALI_JIT_PRESSURE_LIMIT_BASE
 /* trace_mali_jit_report_pressure
  *
  * Tracepoint about change in physical memory pressure, due to the information
@@ -366,7 +366,7 @@ TRACE_EVENT(mali_jit_report_pressure,
 			{ KBASE_JIT_REPORT_ON_ALLOC_OR_FREE,
 				"HAPPENED_ON_ALLOC_OR_FREE" }))
 );
-#endif /* MALI_JIT_PRESSURE_LIMIT */
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
 
 #ifndef __TRACE_SYSGRAPH_ENUM
 #define __TRACE_SYSGRAPH_ENUM
diff --git a/mali_kbase/mali_power_gpu_frequency_trace.c b/mali_kbase/mali_power_gpu_frequency_trace.c
new file mode 100644
index 0000000..b6fb5a0
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_frequency_trace.c
@@ -0,0 +1,27 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/* Create the trace point if not configured in kernel */
+#ifndef CONFIG_TRACE_POWER_GPU_FREQUENCY
+#define CREATE_TRACE_POINTS
+#include "mali_power_gpu_frequency_trace.h"
+#endif
diff --git a/mali_kbase/mali_power_gpu_frequency_trace.h b/mali_kbase/mali_power_gpu_frequency_trace.h
new file mode 100644
index 0000000..3b90ae4
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_frequency_trace.h
@@ -0,0 +1,69 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _TRACE_POWER_GPU_FREQUENCY_MALI
+#define _TRACE_POWER_GPU_FREQUENCY_MALI
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_power_gpu_frequency_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#if !defined(_TRACE_POWER_GPU_FREQUENCY_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_GPU_FREQUENCY_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(gpu,
+
+	TP_PROTO(unsigned int state, unsigned int gpu_id),
+
+	TP_ARGS(state, gpu_id),
+
+	TP_STRUCT__entry(
+		__field(	u32,		state		)
+		__field(	u32,		gpu_id		)
+	),
+
+	TP_fast_assign(
+		__entry->state = state;
+		__entry->gpu_id = gpu_id;
+	),
+
+	TP_printk("state=%lu gpu_id=%lu", (unsigned long)__entry->state,
+		  (unsigned long)__entry->gpu_id)
+);
+
+DEFINE_EVENT(gpu, gpu_frequency,
+
+	TP_PROTO(unsigned int frequency, unsigned int gpu_id),
+
+	TP_ARGS(frequency, gpu_id)
+);
+
+#endif /* _TRACE_POWER_GPU_FREQUENCY_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index db27832..734c9de 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -45,6 +45,7 @@
 #include <mmu/mali_kbase_mmu_internal.h>
 #include <mali_kbase_cs_experimental.h>
 
+#include <mali_kbase_trace_gpu_mem.h>
 #define KBASE_MMU_PAGE_ENTRIES 512
 
 /**
@@ -150,6 +151,13 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 	 * Depending on reg's flags, the base used for calculating multiples is
 	 * different
 	 */
+
+	/* multiple is based from the current backed size, even if the
+	 * current backed size/pfn for end of committed memory are not
+	 * themselves aligned to multiple
+	 */
+	remainder = minimum_extra % multiple;
+
 	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
 		/* multiple is based from the top of the initial commit, which
 		 * has been allocated in such a way that (start_pfn +
@@ -175,12 +183,6 @@ static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
 
 			remainder = pages_after_initial % multiple;
 		}
-	} else {
-		/* multiple is based from the current backed size, even if the
-		 * current backed size/pfn for end of committed memory are not
-		 * themselves aligned to multiple
-		 */
-		remainder = minimum_extra % multiple;
 	}
 
 	if (remainder == 0)
@@ -544,7 +546,9 @@ void page_fault_worker(struct work_struct *data)
 	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
 	int i;
 	size_t current_backed_size;
-
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	size_t pages_trimmed = 0;
+#endif
 
 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
 	fault = &faulting_as->pf_data;
@@ -568,6 +572,10 @@ void page_fault_worker(struct work_struct *data)
 
 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
 
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	mutex_lock(&kctx->jctx.lock);
+#endif
+
 	if (unlikely(fault->protected_mode)) {
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Protected mode fault", fault);
@@ -758,6 +766,13 @@ page_fault_retry:
 
 	pages_to_grow = 0;
 
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) {
+		kbase_jit_request_phys_increase(kctx, new_pages);
+		pages_trimmed = new_pages;
+	}
+#endif
+
 	spin_lock(&kctx->mem_partials_lock);
 	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
 			&grow_2mb_pool, prealloc_sas);
@@ -872,6 +887,13 @@ page_fault_retry:
 			}
 		}
 #endif
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+		if (pages_trimmed) {
+			kbase_jit_done_phys_increase(kctx, pages_trimmed);
+			pages_trimmed = 0;
+		}
+#endif
 		kbase_gpu_vm_unlock(kctx);
 	} else {
 		int ret = -ENOMEM;
@@ -918,6 +940,15 @@ page_fault_retry:
 	}
 
 fault_done:
+#if MALI_JIT_PRESSURE_LIMIT_BASE
+	if (pages_trimmed) {
+		kbase_gpu_vm_lock(kctx);
+		kbase_jit_done_phys_increase(kctx, pages_trimmed);
+		kbase_gpu_vm_unlock(kctx);
+	}
+	mutex_unlock(&kctx->jctx.lock);
+#endif
+
 	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
 		kfree(prealloc_sas[i]);
 
@@ -964,6 +995,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
 
 	atomic_add(1, &kbdev->memdev.used_pages);
 
+	kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
+
 	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++)
 		kbdev->mmu_mode->entry_invalidate(&page[i]);
 
@@ -1290,6 +1323,8 @@ static inline void cleanup_empty_pte(struct kbase_device *kbdev,
 		atomic_sub(1, &mmut->kctx->used_pages);
 	}
 	atomic_sub(1, &kbdev->memdev.used_pages);
+
+	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
 }
 
 u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
@@ -1932,6 +1967,8 @@ static void mmu_teardown_level(struct kbase_device *kbdev,
 		kbase_process_page_usage_dec(mmut->kctx, 1);
 		atomic_sub(1, &mmut->kctx->used_pages);
 	}
+
+	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
 }
 
 int kbase_mmu_init(struct kbase_device *const kbdev,
diff --git a/mali_kbase/platform/devicetree/Kbuild b/mali_kbase/platform/devicetree/Kbuild
index ce637fb..78343c0 100644
--- a/mali_kbase/platform/devicetree/Kbuild
+++ b/mali_kbase/platform/devicetree/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2012-2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,4 +21,5 @@
 
 mali_kbase-y += \
 	$(MALI_PLATFORM_DIR)/mali_kbase_config_devicetree.o \
-	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o
+	$(MALI_PLATFORM_DIR)/mali_kbase_runtime_pm.o \
+	$(MALI_PLATFORM_DIR)/mali_kbase_clk_rate_trace.o
diff --git a/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
new file mode 100644
index 0000000..11a8b77
--- /dev/null
+++ b/mali_kbase/platform/devicetree/mali_kbase_clk_rate_trace.c
@@ -0,0 +1,68 @@
+/*
+ *
+ * (C) COPYRIGHT 2015, 2017-2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <mali_kbase.h>
+#include <mali_kbase_defs.h>
+#include <linux/clk.h>
+#include "mali_kbase_config_platform.h"
+
+static void *enumerate_gpu_clk(struct kbase_device *kbdev,
+		unsigned int index)
+{
+	if (index >= kbdev->nr_clocks)
+		return NULL;
+
+	return kbdev->clocks[index];
+}
+
+static unsigned long get_gpu_clk_rate(struct kbase_device *kbdev,
+		void *gpu_clk_handle)
+{
+	return clk_get_rate((struct clk *)gpu_clk_handle);
+}
+
+static int gpu_clk_notifier_register(struct kbase_device *kbdev,
+		void *gpu_clk_handle, struct notifier_block *nb)
+{
+	compiletime_assert(offsetof(struct clk_notifier_data, clk) ==
+		offsetof(struct kbase_gpu_clk_notifier_data, gpu_clk_handle),
+		"mismatch in the offset of clk member");
+
+	compiletime_assert(sizeof(((struct clk_notifier_data *)0)->clk) ==
+	     sizeof(((struct kbase_gpu_clk_notifier_data *)0)->gpu_clk_handle),
+	     "mismatch in the size of clk member");
+
+	return clk_notifier_register((struct clk *)gpu_clk_handle, nb);
+}
+
+static void gpu_clk_notifier_unregister(struct kbase_device *kbdev,
+		void *gpu_clk_handle, struct notifier_block *nb)
+{
+	clk_notifier_unregister((struct clk *)gpu_clk_handle, nb);
+}
+
+struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops = {
+	.get_gpu_clk_rate = get_gpu_clk_rate,
+	.enumerate_gpu_clk = enumerate_gpu_clk,
+	.gpu_clk_notifier_register = gpu_clk_notifier_register,
+	.gpu_clk_notifier_unregister = gpu_clk_notifier_unregister,
+};
diff --git a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
index 5990313..2137b42 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/devicetree/mali_kbase_config_platform.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2014-2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,10 @@
  */
 #define PLATFORM_FUNCS (NULL)
 
+#define CLK_RATE_TRACE_OPS (&clk_rate_trace_ops)
+
 extern struct kbase_pm_callback_conf pm_callbacks;
+extern struct kbase_clk_rate_trace_op_conf clk_rate_trace_ops;
 
 /**
  * Autosuspend delay
diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild
index df16a77..c26bef7 100644
--- a/mali_kbase/tests/Kbuild
+++ b/mali_kbase/tests/Kbuild
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,3 +21,4 @@
 
 obj-$(CONFIG_MALI_KUTF) += kutf/
 obj-$(CONFIG_MALI_IRQ_LATENCY) += mali_kutf_irq_test/
+obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace/kernel/
diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig
index fa91aea..83a4d77 100644
--- a/mali_kbase/tests/Kconfig
+++ b/mali_kbase/tests/Kconfig
@@ -1,5 +1,5 @@
 #
-# (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -21,3 +21,4 @@
 
 source "drivers/gpu/arm/midgard/tests/kutf/Kconfig"
 source "drivers/gpu/arm/midgard/tests/mali_kutf_irq_test/Kconfig"
+source "drivers/gpu/arm/midgard/tests/mali_kutf_clk_rate_trace/kernel/Kconfig"
diff --git a/mali_kbase/tests/include/kutf/kutf_helpers.h b/mali_kbase/tests/include/kutf/kutf_helpers.h
index 15e168c..858b9c3 100644
--- a/mali_kbase/tests/include/kutf/kutf_helpers.h
+++ b/mali_kbase/tests/include/kutf/kutf_helpers.h
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -34,6 +34,14 @@
 #include <kutf/kutf_suite.h>
 
 /**
+ * kutf_helper_pending_input() - Check any pending lines sent by user space
+ * @context:    KUTF context
+ *
+ * Return: true if there are pending lines, otherwise false
+ */
+bool kutf_helper_pending_input(struct kutf_context *context);
+
+/**
  * kutf_helper_input_dequeue() - Dequeue a line sent by user space
  * @context:    KUTF context
  * @str_size:   Pointer to an integer to receive the size of the string
diff --git a/mali_kbase/tests/kutf/kutf_helpers.c b/mali_kbase/tests/kutf/kutf_helpers.c
index cab5add..4463b04 100644
--- a/mali_kbase/tests/kutf/kutf_helpers.c
+++ b/mali_kbase/tests/kutf/kutf_helpers.c
@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2017 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2017, 2020 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -29,10 +29,11 @@
 #include <linux/preempt.h>
 #include <linux/wait.h>
 #include <linux/uaccess.h>
+#include <linux/export.h>
 
 static DEFINE_SPINLOCK(kutf_input_lock);
 
-static bool pending_input(struct kutf_context *context)
+bool kutf_helper_pending_input(struct kutf_context *context)
 {
 	bool input_pending;
 
@@ -44,6 +45,7 @@ static bool pending_input(struct kutf_context *context)
 
 	return input_pending;
 }
+EXPORT_SYMBOL(kutf_helper_pending_input);
 
 char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
 {
@@ -59,7 +61,7 @@ char *kutf_helper_input_dequeue(struct kutf_context *context, size_t *str_size)
 		spin_unlock(&kutf_input_lock);
 
 		err = wait_event_interruptible(context->userdata.input_waitq,
-				pending_input(context));
+				kutf_helper_pending_input(context));
 
 		if (err)
 			return ERR_PTR(-EINTR);
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
new file mode 100644
index 0000000..f5565d3
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kbuild
@@ -0,0 +1,26 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ccflags-y += -I$(src)/../include -I$(src)/../../../ -I$(src)/../../ -I$(src)/../../backend/gpu -I$(srctree)/drivers/staging/android
+
+obj-$(CONFIG_MALI_CLK_RATE_TRACE_PORTAL) += mali_kutf_clk_rate_trace_test_portal.o
+
+mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
new file mode 100644
index 0000000..04b44cf
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Kconfig
@@ -0,0 +1,30 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+config CONFIG_MALI_CLK_RATE_TRACE_PORTAL
+ tristate "Mali GPU Clock Trace Test portal"
+ depends on MALI_MIDGARD && MALI_DEBUG && MALI_KUTF
+ default m
+ help
+   This option will build a test module mali_kutf_clk_rate_trace_test_portal
+   that can test the clocks integration into the platform and exercise some
+   basic trace test in the system. Choosing M here will generate a single
+   module called mali_kutf_clk_rate_trace_test_portal.
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile
new file mode 100644
index 0000000..71c78b8
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/Makefile
@@ -0,0 +1,57 @@
+#
+# (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU licence.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+#
+
+ifneq ($(KERNELRELEASE),)
+
+ccflags-y := \
+	-DMALI_UNIT_TEST=$(MALI_UNIT_TEST) \
+	-DMALI_CUSTOMER_RELEASE=$(MALI_CUSTOMER_RELEASE) \
+	-I$(src)/../../include \
+	-I$(src)/../../../../../../../include \
+	-I$(src)/../../../../ \
+	-I$(src)/../../../ \
+	-I$(src)/../../../backend/gpu \
+	-I$(src)/../../../debug \
+	-I$(src)/../../../debug/backend \
+	-I$(src)/ \
+	-I$(srctree)/drivers/staging/android \
+	-I$(srctree)/include/linux
+
+obj-m := mali_kutf_clk_rate_trace_test_portal.o
+mali_kutf_clk_rate_trace_test_portal-y := mali_kutf_clk_rate_trace_test.o
+
+else
+# linux build system bootstrap for out-of-tree module
+
+# default to building for the host
+ARCH ?= $(shell uname -m)
+
+ifeq ($(KDIR),)
+$(error Must specify KDIR to point to the kernel to target))
+endif
+
+all:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) KBUILD_EXTRA_SYMBOLS="$(CURDIR)/../../kutf/Module.symvers $(CURDIR)/../../../Module.symvers" modules
+
+clean:
+	$(MAKE) ARCH=$(ARCH) -C $(KDIR) M=$(CURDIR) clean
+
+endif
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp
new file mode 100644
index 0000000..0cc2904
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/build.bp
@@ -0,0 +1,34 @@
+/*
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * A copy of the licence is included with the program, and can also be obtained
+ * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+bob_kernel_module {
+    name: "mali_kutf_clk_rate_trace_test_portal",
+    defaults: [
+        "mali_kbase_shared_config_defaults",
+        "kernel_test_includes",
+    ],
+    srcs: [
+        "../mali_kutf_clk_rate_trace_test.h",
+        "Makefile",
+        "mali_kutf_clk_rate_trace_test.c",
+    ],
+    extra_symbols: [
+        "mali_kbase",
+        "kutf",
+    ],
+    enabled: false,
+    base_build_kutf: {
+        enabled: true,
+        kbuild_options: ["CONFIG_MALI_CLK_RATE_TRACE_PORTAL=m"],
+    },
+}
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
new file mode 100644
index 0000000..d466661
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -0,0 +1,886 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#include <linux/fdtable.h>
+#include <linux/module.h>
+
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/ktime.h>
+#include <linux/version.h>
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
+#include "mali_kbase.h"
+#include "mali_kbase_irq_internal.h"
+#include "mali_kbase_pm_internal.h"
+#include "mali_kbase_clk_rate_trace_mgr.h"
+
+#include <kutf/kutf_suite.h>
+#include <kutf/kutf_utils.h>
+#include <kutf/kutf_helpers.h>
+#include <kutf/kutf_helpers_user.h>
+
+#include "../mali_kutf_clk_rate_trace_test.h"
+
+#define MINOR_FOR_FIRST_KBASE_DEV	(-1)
+
+/* KUTF test application pointer for this test */
+struct kutf_application *kutf_app;
+
+enum portal_server_state {
+	PORTAL_STATE_NO_CLK,
+	PORTAL_STATE_LIVE,
+	PORTAL_STATE_CLOSING,
+};
+
+/**
+ * struct clk_trace_snapshot - Trace info data on a clock.
+ * @previous_rate:   Snapshot start point clock rate.
+ * @current_rate:    End point clock rate. It becomes the start rate of the
+ *                   next trace snapshot.
+ * @rate_up_cnt:     Count in the snapshot duration when the clock trace
+ *                   write is a rate of higher value than the last.
+ * @rate_down_cnt:   Count in the snapshot duration when the clock trace write
+ *                   is a rate of lower value than the last.
+ */
+struct clk_trace_snapshot {
+	unsigned long previous_rate;
+	unsigned long current_rate;
+	u32 rate_up_cnt;
+	u32 rate_down_cnt;
+};
+
+/**
+ * struct kutf_clk_rate_trace_fixture_data - Fixture data for the test.
+ * @kbdev:            kbase device for the GPU.
+ * @listener:         Clock rate change listener structure.
+ * @invoke_notify:    When true, invoke notify command is being executed.
+ * @snapshot:         Clock trace update snapshot data array. A snapshot
+ *                    for each clock contains info accumulated beteen two
+ *                    GET_TRACE_SNAPSHOT requests.
+ * @nclks:            Number of clocks visible to the trace portal.
+ * @pm_ctx_cnt:       Net count of PM (Power Management) context INC/DEC
+ *                    PM_CTX_CNT requests made to the portal. On change from
+ *                    0 to 1 (INC), or, 1 to 0 (DEC), a PM context action is
+ *                    triggered.
+ * @total_update_cnt: Total number of received trace write callbacks.
+ * @server_state:     Portal server operational state.
+ * @result_msg:       Message for the test result.
+ * @test_status:      Portal test reslt status.
+ */
+struct kutf_clk_rate_trace_fixture_data {
+	struct kbase_device *kbdev;
+	struct kbase_clk_rate_listener listener;
+	bool invoke_notify;
+	struct clk_trace_snapshot snapshot[BASE_MAX_NR_CLOCKS_REGULATORS];
+	unsigned int nclks;
+	unsigned int pm_ctx_cnt;
+	unsigned int total_update_cnt;
+	enum portal_server_state server_state;
+	char const *result_msg;
+	enum kutf_result_status test_status;
+};
+
+struct clk_trace_portal_input {
+	struct kutf_helper_named_val cmd_input;
+	enum kbasep_clk_rate_trace_req portal_cmd;
+	int named_val_err;
+};
+
+struct kbasep_cmd_name_pair {
+	enum kbasep_clk_rate_trace_req cmd;
+	const char *name;
+};
+
+struct kbasep_cmd_name_pair kbasep_portal_cmd_name_map[] = {
+			{PORTAL_CMD_GET_CLK_RATE_MGR, GET_CLK_RATE_MGR},
+			{PORTAL_CMD_GET_CLK_RATE_TRACE, GET_CLK_RATE_TRACE},
+			{PORTAL_CMD_GET_TRACE_SNAPSHOT, GET_TRACE_SNAPSHOT},
+			{PORTAL_CMD_INC_PM_CTX_CNT, INC_PM_CTX_CNT},
+			{PORTAL_CMD_DEC_PM_CTX_CNT, DEC_PM_CTX_CNT},
+			{PORTAL_CMD_CLOSE_PORTAL, CLOSE_PORTAL},
+			{PORTAL_CMD_INVOKE_NOTIFY_42KHZ, INVOKE_NOTIFY_42KHZ},
+		};
+
+/* Global pointer for the kutf_portal_trace_write() to use. When
+ * this pointer is engaged, new requests for create fixture will fail
+ * hence limiting the use of the portal at any time to a singleton.
+ */
+struct kutf_clk_rate_trace_fixture_data *g_ptr_portal_data;
+
+#define PORTAL_MSG_LEN (KUTF_MAX_LINE_LENGTH - MAX_REPLY_NAME_LEN)
+static char portal_msg_buf[PORTAL_MSG_LEN];
+
+static void kutf_portal_trace_write(
+	struct kbase_clk_rate_listener *listener,
+	u32 index, u32 new_rate)
+{
+	struct clk_trace_snapshot *snapshot;
+	struct kutf_clk_rate_trace_fixture_data *data = container_of(
+		listener, struct kutf_clk_rate_trace_fixture_data, listener);
+
+	lockdep_assert_held(&data->kbdev->pm.clk_rtm.lock);
+
+	if (WARN_ON(g_ptr_portal_data == NULL))
+		return;
+	if (WARN_ON(index >= g_ptr_portal_data->nclks))
+		return;
+
+	/* This callback is triggered by invoke notify command, skipping */
+	if (data->invoke_notify)
+		return;
+
+	snapshot = &g_ptr_portal_data->snapshot[index];
+	if (new_rate > snapshot->current_rate)
+		snapshot->rate_up_cnt++;
+	else
+		snapshot->rate_down_cnt++;
+	snapshot->current_rate = new_rate;
+	g_ptr_portal_data->total_update_cnt++;
+}
+
+static void kutf_set_pm_ctx_active(struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+	if (WARN_ON(data->pm_ctx_cnt != 1))
+		return;
+
+	kbase_pm_context_active(data->kbdev);
+	kbase_pm_wait_for_desired_state(data->kbdev);
+	kbase_pm_request_gpu_cycle_counter(data->kbdev);
+}
+
+static void kutf_set_pm_ctx_idle(struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+	if (WARN_ON(data->pm_ctx_cnt > 0))
+		return;
+
+	kbase_pm_context_idle(data->kbdev);
+	kbase_pm_release_gpu_cycle_counter(data->kbdev);
+}
+
+static char const *kutf_clk_trace_do_change_pm_ctx(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+	const unsigned int cnt = data->pm_ctx_cnt;
+	const enum kbasep_clk_rate_trace_req req = cmd->portal_cmd;
+	char const *errmsg = NULL;
+
+	WARN_ON(req != PORTAL_CMD_INC_PM_CTX_CNT &&
+		req != PORTAL_CMD_DEC_PM_CTX_CNT);
+
+	if (req == PORTAL_CMD_INC_PM_CTX_CNT && cnt < UINT_MAX) {
+		data->pm_ctx_cnt++;
+		if (data->pm_ctx_cnt == 1)
+			kutf_set_pm_ctx_active(context);
+	}
+
+	if (req == PORTAL_CMD_DEC_PM_CTX_CNT && cnt > 0) {
+		data->pm_ctx_cnt--;
+		if (data->pm_ctx_cnt == 0)
+			kutf_set_pm_ctx_idle(context);
+	}
+
+	/* Skip the length check, no chance of overflow for two ints */
+	snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+			"{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt);
+
+	if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+		pr_warn("Error in sending ack for adjusting pm_ctx_cnt\n");
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Error in sending ack for adjusting pm_ctx_cnt");
+	}
+
+	return errmsg;
+}
+
+static char const *kutf_clk_trace_do_get_rate(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+	unsigned long rate;
+	bool idle;
+	int ret;
+	int i;
+	char const *errmsg = NULL;
+
+	WARN_ON((cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_MGR) &&
+		(cmd->portal_cmd != PORTAL_CMD_GET_CLK_RATE_TRACE));
+
+	ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+			"{SEQ:%d, RATE:[", seq);
+
+	for (i = 0; i < data->nclks; i++) {
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		if (cmd->portal_cmd == PORTAL_CMD_GET_CLK_RATE_MGR)
+			rate = kbdev->pm.clk_rtm.clks[i]->clock_val;
+		else
+			rate = data->snapshot[i].current_rate;
+		idle = kbdev->pm.clk_rtm.gpu_idle;
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		if ((i + 1) == data->nclks)
+			ret += snprintf(portal_msg_buf + ret,
+				PORTAL_MSG_LEN - ret, "0x%lx], GPU_IDLE:%d}",
+				rate, idle);
+		else
+			ret += snprintf(portal_msg_buf + ret,
+				PORTAL_MSG_LEN - ret, "0x%lx, ", rate);
+
+		if (ret >= PORTAL_MSG_LEN) {
+			pr_warn("Message buf overflow with rate array data\n");
+			return kutf_dsprintf(&context->fixture_pool,
+						"Message buf overflow with rate array data");
+		}
+	}
+
+	if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+		pr_warn("Error in sending back rate array\n");
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Error in sending rate array");
+	}
+
+	return errmsg;
+}
+
+/**
+ * kutf_clk_trace_do_get_snapshot() - Send back the current snapshot
+ * @context:  KUTF context
+ * @cmd:      The decoded portal input request
+ *
+ * The accumulated clock rate trace information is kept inside as an snapshot
+ * record. A user request of getting the snapshot marks the closure of the
+ * current snapshot record, and the start of the next one. The response
+ * message contains the current snapshot record, with each clock's
+ * data sequentially placed inside (array marker) [ ].
+ */
+static char const *kutf_clk_trace_do_get_snapshot(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	struct clk_trace_snapshot snapshot;
+	int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+	int ret;
+	int i;
+	char const *fmt;
+	char const *errmsg = NULL;
+
+	WARN_ON(cmd->portal_cmd != PORTAL_CMD_GET_TRACE_SNAPSHOT);
+
+	ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+			"{SEQ:%d, SNAPSHOT_ARRAY:[", seq);
+
+	for (i = 0; i < data->nclks; i++) {
+		spin_lock(&data->kbdev->pm.clk_rtm.lock);
+		/* copy out the snapshot of the clock */
+		snapshot = data->snapshot[i];
+		/* Set the next snapshot start condition */
+		data->snapshot[i].previous_rate = snapshot.current_rate;
+		data->snapshot[i].rate_up_cnt = 0;
+		data->snapshot[i].rate_down_cnt = 0;
+		spin_unlock(&data->kbdev->pm.clk_rtm.lock);
+
+		/* Check i corresponding to the last clock */
+		if ((i + 1) == data->nclks)
+			fmt = "(0x%lx, 0x%lx, %u, %u)]}";
+		else
+			fmt = "(0x%lx, 0x%lx, %u, %u), ";
+		ret += snprintf(portal_msg_buf + ret, PORTAL_MSG_LEN - ret,
+			    fmt, snapshot.previous_rate, snapshot.current_rate,
+			    snapshot.rate_up_cnt, snapshot.rate_down_cnt);
+		if (ret >= PORTAL_MSG_LEN) {
+			pr_warn("Message buf overflow with snapshot data\n");
+			return kutf_dsprintf(&context->fixture_pool,
+					"Message buf overflow with snapshot data");
+		}
+	}
+
+	if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+		pr_warn("Error in sending back snapshot array\n");
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Error in sending snapshot array");
+	}
+
+	return errmsg;
+}
+
+/**
+ * kutf_clk_trace_do_invoke_notify_42k() - Invokes the stored notification callback
+ * @context:  KUTF context
+ * @cmd:      The decoded portal input request
+ *
+ * Invokes frequency change notification callbacks with a fake
+ * GPU frequency 42 kHz for the top clock domain.
+ */
+static char const *kutf_clk_trace_do_invoke_notify_42k(
+	struct kutf_context *context,
+	struct clk_trace_portal_input *cmd)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+	const unsigned long new_rate_hz = 42000;
+	int ret;
+	char const *errmsg = NULL;
+	struct kbase_clk_rate_trace_manager *clk_rtm = &data->kbdev->pm.clk_rtm;
+
+	WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVOKE_NOTIFY_42KHZ);
+
+	spin_lock(&clk_rtm->lock);
+
+	data->invoke_notify = true;
+	kbase_clk_rate_trace_manager_notify_all(
+		clk_rtm, 0, new_rate_hz);
+	data->invoke_notify = false;
+
+	spin_unlock(&clk_rtm->lock);
+
+	ret = snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+		       "{SEQ:%d, HZ:%lu}", seq, new_rate_hz);
+
+	if (ret >= PORTAL_MSG_LEN) {
+		pr_warn("Message buf overflow with invoked data\n");
+		return kutf_dsprintf(&context->fixture_pool,
+				"Message buf overflow with invoked data");
+	}
+
+	if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+		pr_warn("Error in sending ack for " INVOKE_NOTIFY_42KHZ "request\n");
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+			"Error in sending ack for " INVOKE_NOTIFY_42KHZ "request");
+	}
+
+	return errmsg;
+}
+
+static char const *kutf_clk_trace_do_close_portal(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	int seq = cmd->cmd_input.u.val_u64 & 0xFF;
+	char const *errmsg = NULL;
+
+	WARN_ON(cmd->portal_cmd != PORTAL_CMD_CLOSE_PORTAL);
+
+	data->server_state = PORTAL_STATE_CLOSING;
+
+	/* Skip the length check, no chance of overflow for two ints */
+	snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+			"{SEQ:%d, PM_CTX_CNT:%u}", seq, data->pm_ctx_cnt);
+
+	if (kutf_helper_send_named_str(context, "ACK", portal_msg_buf)) {
+		pr_warn("Error in sending ack for " CLOSE_PORTAL "reuquest\n");
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+			"Error in sending ack for " CLOSE_PORTAL "reuquest");
+	}
+
+	return errmsg;
+}
+
+static bool kutf_clk_trace_dequeue_portal_cmd(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	int i;
+	int err = kutf_helper_receive_named_val(context, &cmd->cmd_input);
+
+	cmd->named_val_err = err;
+	if (err == KUTF_HELPER_ERR_NONE &&
+		cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) {
+		/* All portal request commands are of format (named u64):
+		 *   CMD_NAME=1234
+		 * where, 1234 is a (variable) sequence number tag.
+		 */
+		for (i = 0; i < PORTAL_TOTAL_CMDS; i++) {
+			if (strcmp(cmd->cmd_input.val_name,
+				kbasep_portal_cmd_name_map[i].name))
+				continue;
+
+			cmd->portal_cmd = kbasep_portal_cmd_name_map[i].cmd;
+			return true;
+		}
+	}
+
+	cmd->portal_cmd = PORTAL_CMD_INVALID;
+	return false;
+}
+
+static void kutf_clk_trace_flag_result(struct kutf_context *context,
+			enum kutf_result_status result, char const *msg)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+
+	if (result > data->test_status) {
+		data->test_status = result;
+		if (msg)
+			data->result_msg = msg;
+		if (data->server_state == PORTAL_STATE_LIVE &&
+			result > KUTF_RESULT_WARN) {
+			data->server_state = PORTAL_STATE_CLOSING;
+		}
+	}
+}
+
+static bool kutf_clk_trace_process_portal_cmd(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	char const *errmsg = NULL;
+
+	BUILD_BUG_ON(ARRAY_SIZE(kbasep_portal_cmd_name_map) !=
+				PORTAL_TOTAL_CMDS);
+	WARN_ON(cmd->portal_cmd == PORTAL_CMD_INVALID);
+
+	switch (cmd->portal_cmd) {
+	case PORTAL_CMD_GET_CLK_RATE_MGR:
+		/* Fall through */
+	case PORTAL_CMD_GET_CLK_RATE_TRACE:
+		errmsg = kutf_clk_trace_do_get_rate(context, cmd);
+		break;
+	case PORTAL_CMD_GET_TRACE_SNAPSHOT:
+		errmsg = kutf_clk_trace_do_get_snapshot(context, cmd);
+		break;
+	case PORTAL_CMD_INC_PM_CTX_CNT:
+		/* Fall through */
+	case PORTAL_CMD_DEC_PM_CTX_CNT:
+		errmsg = kutf_clk_trace_do_change_pm_ctx(context, cmd);
+		break;
+	case PORTAL_CMD_CLOSE_PORTAL:
+		errmsg = kutf_clk_trace_do_close_portal(context, cmd);
+		break;
+	case PORTAL_CMD_INVOKE_NOTIFY_42KHZ:
+		errmsg = kutf_clk_trace_do_invoke_notify_42k(context, cmd);
+		break;
+	default:
+		pr_warn("Don't know how to handle portal_cmd: %d, abort session.\n",
+				cmd->portal_cmd);
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+				"Don't know how to handle portal_cmd: %d",
+				cmd->portal_cmd);
+		break;
+	}
+
+	if (errmsg)
+		kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg);
+
+	return (errmsg == NULL);
+}
+
+/**
+ * kutf_clk_trace_do_nack_response() - respond a NACK to erroneous input
+ * @context:  KUTF context
+ * @cmd:      The erroneous input request
+ *
+ * This function deal with an erroneous input request, and respond with
+ * a proper 'NACK' message.
+ */
+static int kutf_clk_trace_do_nack_response(struct kutf_context *context,
+				struct clk_trace_portal_input *cmd)
+{
+	int seq;
+	int err;
+	char const *errmsg = NULL;
+
+	WARN_ON(cmd->portal_cmd != PORTAL_CMD_INVALID);
+
+	if (cmd->named_val_err == KUTF_HELPER_ERR_NONE &&
+			  cmd->cmd_input.type == KUTF_HELPER_VALTYPE_U64) {
+		/* Keep seq number as % 256 */
+		seq = cmd->cmd_input.u.val_u64 & 255;
+		snprintf(portal_msg_buf, PORTAL_MSG_LEN,
+				 "{SEQ:%d, MSG: Unknown command '%s'.}", seq,
+				 cmd->cmd_input.val_name);
+		err = kutf_helper_send_named_str(context, "NACK",
+						portal_msg_buf);
+	} else
+		err = kutf_helper_send_named_str(context, "NACK",
+			"Wrong portal cmd format (Ref example: CMD_NAME=0X16)");
+
+	if (err) {
+		errmsg = kutf_dsprintf(&context->fixture_pool,
+						"Failed to send portal NACK response");
+		kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, errmsg);
+	}
+
+	return err;
+}
+
+/**
+ * kutf_clk_trace_barebone_check() - Sanity test on the clock tracing
+ * @context:	KUTF context
+ *
+ * This function carries out some basic test on the tracing operation:
+ *     1). GPU idle on test start, trace rate should be 0 (low power state)
+ *     2). Make sure GPU is powered up, the trace rate should match
+ *         that from the clcok manager's internal recorded rate
+ *     3). If the GPU active transition occurs following 2), there
+ *         must be rate change event from tracing.
+ */
+void kutf_clk_trace_barebone_check(struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+	bool fail = false;
+	bool idle[2] = { false };
+	char const *msg = NULL;
+	int i;
+
+	/* Check consistency if gpu happens to be idle */
+	spin_lock(&kbdev->pm.clk_rtm.lock);
+	idle[0] = kbdev->pm.clk_rtm.gpu_idle;
+	if (kbdev->pm.clk_rtm.gpu_idle) {
+		for (i = 0; i < data->nclks; i++) {
+			if (data->snapshot[i].current_rate) {
+				/* Idle should have a rate 0 */
+				fail = true;
+				break;
+			}
+		}
+	}
+	spin_unlock(&kbdev->pm.clk_rtm.lock);
+	if (fail) {
+		msg = kutf_dsprintf(&context->fixture_pool,
+				"GPU Idle not yielding 0-rate");
+		pr_err("Trace did not see idle rate\n");
+	} else {
+		/* Make local PM active if not done so yet */
+		if (data->pm_ctx_cnt == 0) {
+			/* Ensure the GPU is powered */
+			data->pm_ctx_cnt++;
+			kutf_set_pm_ctx_active(context);
+		}
+		/* Checking the rate is consistent */
+		spin_lock(&kbdev->pm.clk_rtm.lock);
+		idle[1] = kbdev->pm.clk_rtm.gpu_idle;
+		for (i = 0; i < data->nclks; i++) {
+			/* Rate match between the manager and the trace */
+			if (kbdev->pm.clk_rtm.clks[i]->clock_val !=
+				data->snapshot[i].current_rate) {
+				fail = true;
+				break;
+			}
+		}
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+		if (idle[1]) {
+			msg = kutf_dsprintf(&context->fixture_pool,
+				"GPU still idle after set_pm_ctx_active");
+			pr_err("GPU still idle after set_pm_ctx_active\n");
+		}
+
+		if (!msg && fail) {
+			msg = kutf_dsprintf(&context->fixture_pool,
+				"Trace rate not matching Clk manager's read");
+			pr_err("Trace rate not matching Clk manager's read\n");
+		}
+	}
+
+	if (!msg && idle[0] && !idle[1] && !data->total_update_cnt) {
+		msg = kutf_dsprintf(&context->fixture_pool,
+				"Trace update did not occur");
+		pr_err("Trace update did not occur\n");
+	}
+	if (msg)
+		kutf_clk_trace_flag_result(context, KUTF_RESULT_FAIL, msg);
+	else if (!data->total_update_cnt) {
+		msg = kutf_dsprintf(&context->fixture_pool,
+				"No trace update seen during the test!");
+		kutf_clk_trace_flag_result(context, KUTF_RESULT_WARN, msg);
+	}
+}
+
+static bool kutf_clk_trace_end_of_stream(struct clk_trace_portal_input *cmd)
+{
+	return (cmd->named_val_err == -EBUSY);
+}
+
+void kutf_clk_trace_no_clks_dummy(struct kutf_context *context)
+{
+	struct clk_trace_portal_input cmd;
+	unsigned long timeout = jiffies + HZ * 2;
+	bool has_cmd;
+
+	while (time_before(jiffies, timeout)) {
+		if (kutf_helper_pending_input(context)) {
+			has_cmd = kutf_clk_trace_dequeue_portal_cmd(context,
+									&cmd);
+			if (!has_cmd && kutf_clk_trace_end_of_stream(&cmd))
+				break;
+
+			kutf_helper_send_named_str(context, "NACK",
+				"Fatal! No clocks visible, aborting");
+		}
+		msleep(20);
+	}
+
+	kutf_clk_trace_flag_result(context, KUTF_RESULT_FATAL,
+				"No clocks visble to the portal");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_portal() - Service portal input
+ * @context:	KUTF context
+ *
+ * The test portal operates on input requests. If the input request is one
+ * of the recognized portal commands, it handles it accordingly. Otherwise
+ * a negative response 'NACK' is returned. The portal service terminates
+ * when a 'CLOSE_PORTAL' request is received, or due to an internal error.
+ * Both case would result in the server_state transitioned to CLOSING.
+ *
+ * If the portal is closed on request, a sanity test on the clock rate
+ * trace operation is undertaken via function:
+ *    kutf_clk_trace_barebone_check();
+ */
+static void mali_kutf_clk_rate_trace_test_portal(struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	struct clk_trace_portal_input new_cmd;
+
+	pr_debug("Test portal service start\n");
+
+	while (data->server_state == PORTAL_STATE_LIVE) {
+		if (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd))
+			kutf_clk_trace_process_portal_cmd(context, &new_cmd);
+		else if (kutf_clk_trace_end_of_stream(&new_cmd))
+			/* Dequeue on portal input, end of stream */
+			data->server_state = PORTAL_STATE_CLOSING;
+		else
+			kutf_clk_trace_do_nack_response(context, &new_cmd);
+	}
+
+	/* Closing, exhausting all the pending inputs with NACKs. */
+	if (data->server_state == PORTAL_STATE_CLOSING) {
+		while (kutf_helper_pending_input(context) &&
+		       (kutf_clk_trace_dequeue_portal_cmd(context, &new_cmd) ||
+				!kutf_clk_trace_end_of_stream(&new_cmd))) {
+			kutf_helper_send_named_str(context, "NACK",
+					"Portal closing down");
+		}
+	}
+
+	/* If no portal error, do a barebone test here irrespective
+	 * whatever the portal live session has been testing, which
+	 * is entirely driven by the user-side via portal requests.
+	 */
+	if (data->test_status <= KUTF_RESULT_WARN) {
+		if (data->server_state != PORTAL_STATE_NO_CLK)
+			kutf_clk_trace_barebone_check(context);
+		else {
+			/* No clocks case, NACK 2-sec for the fatal situation */
+			kutf_clk_trace_no_clks_dummy(context);
+		}
+	}
+
+	/* If we have changed pm_ctx count, drop it back */
+	if (data->pm_ctx_cnt) {
+		/* Although we count on portal requests, it only has material
+		 * impact when from 0 -> 1. So the reverse is a simple one off.
+		 */
+		data->pm_ctx_cnt = 0;
+		kutf_set_pm_ctx_idle(context);
+	}
+
+	/* Finally log the test result line */
+	if (data->test_status < KUTF_RESULT_WARN)
+		kutf_test_pass(context, data->result_msg);
+	else if (data->test_status == KUTF_RESULT_WARN)
+		kutf_test_warn(context, data->result_msg);
+	else if (data->test_status == KUTF_RESULT_FATAL)
+		kutf_test_fatal(context, data->result_msg);
+	else
+		kutf_test_fail(context, data->result_msg);
+
+	pr_debug("Test end\n");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_create_fixture() - Creates the fixture data
+ *                           required for mali_kutf_clk_rate_trace_test_portal.
+ * @context:	KUTF context.
+ *
+ * Return: Fixture data created on success or NULL on failure
+ */
+static void *mali_kutf_clk_rate_trace_create_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data;
+	struct kbase_device *kbdev;
+	unsigned long rate;
+	int i;
+
+	/* Acquire the kbase device */
+	pr_debug("Finding device\n");
+	kbdev = kbase_find_device(MINOR_FOR_FIRST_KBASE_DEV);
+	if (kbdev == NULL) {
+		kutf_test_fail(context, "Failed to find kbase device");
+		return NULL;
+	}
+
+	pr_debug("Creating fixture\n");
+	data = kutf_mempool_alloc(&context->fixture_pool,
+			sizeof(struct kutf_clk_rate_trace_fixture_data));
+	if (!data)
+		return NULL;
+
+	*data = (const struct kutf_clk_rate_trace_fixture_data) { 0 };
+	pr_debug("Hooking up the test portal to kbdev clk rate trace\n");
+	spin_lock(&kbdev->pm.clk_rtm.lock);
+
+	if (g_ptr_portal_data != NULL) {
+		pr_warn("Test portal is already in use, run aborted\n");
+		kutf_test_fail(context, "Portal allows single session only");
+		spin_unlock(&kbdev->pm.clk_rtm.lock);
+		return NULL;
+	}
+
+	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
+		if (kbdev->pm.clk_rtm.clks[i]) {
+			data->nclks++;
+			if (kbdev->pm.clk_rtm.gpu_idle)
+				rate = 0;
+			else
+				rate = kbdev->pm.clk_rtm.clks[i]->clock_val;
+			data->snapshot[i].previous_rate = rate;
+			data->snapshot[i].current_rate = rate;
+		}
+	}
+
+	spin_unlock(&kbdev->pm.clk_rtm.lock);
+
+	if (data->nclks) {
+		/* Subscribe this test server portal */
+		data->listener.notify = kutf_portal_trace_write;
+		data->invoke_notify = false;
+
+		kbase_clk_rate_trace_manager_subscribe(
+			&kbdev->pm.clk_rtm, &data->listener);
+		/* Update the kutf_server_portal fixture_data pointer */
+		g_ptr_portal_data = data;
+	}
+
+	data->kbdev = kbdev;
+	data->result_msg = NULL;
+	data->test_status = KUTF_RESULT_PASS;
+
+	if (data->nclks == 0) {
+		data->server_state = PORTAL_STATE_NO_CLK;
+		pr_debug("Kbdev has no clocks for rate trace");
+	} else
+		data->server_state = PORTAL_STATE_LIVE;
+
+	pr_debug("Created fixture\n");
+
+	return data;
+}
+
+/**
+ * Destroy fixture data previously created by
+ * mali_kutf_clk_rate_trace_create_fixture.
+ *
+ * @context:             KUTF context.
+ */
+static void mali_kutf_clk_rate_trace_remove_fixture(
+		struct kutf_context *context)
+{
+	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
+	struct kbase_device *kbdev = data->kbdev;
+
+	if (data->nclks) {
+		/* Clean up the portal trace write arrangement */
+		g_ptr_portal_data = NULL;
+
+		kbase_clk_rate_trace_manager_unsubscribe(
+			&kbdev->pm.clk_rtm, &data->listener);
+	}
+	pr_debug("Destroying fixture\n");
+	kbase_release_device(kbdev);
+	pr_debug("Destroyed fixture\n");
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_module_init() - Entry point for test mdoule.
+ */
+int mali_kutf_clk_rate_trace_test_module_init(void)
+{
+	struct kutf_suite *suite;
+	unsigned int filters;
+	union kutf_callback_data suite_data = { 0 };
+
+	pr_debug("Creating app\n");
+
+	g_ptr_portal_data = NULL;
+	kutf_app = kutf_create_application(CLK_RATE_TRACE_APP_NAME);
+
+	if (!kutf_app) {
+		pr_warn("Creation of app " CLK_RATE_TRACE_APP_NAME
+				" failed!\n");
+		return -ENOMEM;
+	}
+
+	pr_debug("Create suite %s\n", CLK_RATE_TRACE_SUITE_NAME);
+	suite = kutf_create_suite_with_filters_and_data(
+			kutf_app, CLK_RATE_TRACE_SUITE_NAME, 1,
+			mali_kutf_clk_rate_trace_create_fixture,
+			mali_kutf_clk_rate_trace_remove_fixture,
+			KUTF_F_TEST_GENERIC,
+			suite_data);
+
+	if (!suite) {
+		pr_warn("Creation of suite %s failed!\n",
+				CLK_RATE_TRACE_SUITE_NAME);
+		kutf_destroy_application(kutf_app);
+		return -ENOMEM;
+	}
+
+	filters = suite->suite_default_flags;
+	kutf_add_test_with_filters(
+			suite, 0x0, CLK_RATE_TRACE_PORTAL,
+			mali_kutf_clk_rate_trace_test_portal,
+			filters);
+
+	pr_debug("Init complete\n");
+	return 0;
+}
+
+/**
+ * mali_kutf_clk_rate_trace_test_module_exit() - Module exit point for this
+ *                                               test.
+ */
+void mali_kutf_clk_rate_trace_test_module_exit(void)
+{
+	pr_debug("Exit start\n");
+	kutf_destroy_application(kutf_app);
+	pr_debug("Exit complete\n");
+}
+
+
+module_init(mali_kutf_clk_rate_trace_test_module_init);
+module_exit(mali_kutf_clk_rate_trace_test_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
new file mode 100644
index 0000000..f46afd5
--- /dev/null
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/mali_kutf_clk_rate_trace_test.h
@@ -0,0 +1,148 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+#ifndef _KUTF_CLK_RATE_TRACE_TEST_H_
+#define _KUTF_CLK_RATE_TRACE_TEST_H_
+
+#define CLK_RATE_TRACE_APP_NAME "clk_rate_trace"
+#define CLK_RATE_TRACE_SUITE_NAME "rate_trace"
+#define CLK_RATE_TRACE_PORTAL "portal"
+
+/**
+ * enum kbasep_clk_rate_trace_req - request command to the clock rate trace
+ *                                  service portal.
+ *
+ * @PORTAL_CMD_GET_CLK_RATE_MGR:   Request the clock trace manager internal
+ *                                 data record. On a positive acknowledgement
+ *                                 the prevailing clock rates and the GPU idle
+ *                                 condition flag are returned.
+ * @PORTAL_CMD_GET_CLK_RATE_TRACE: Request the clock trace portal to return its
+ *                                 data record. On a positive acknowledgement
+ *                                 the last trace recorded clock rates and the
+ *                                 GPU idle condition flag are returned.
+ * @PORTAL_CMD_GET_TRACE_SNAPSHOT: Request the clock trace portal to return its
+ *                                 current snapshot data record. On a positive
+ *                                 acknowledgement the snapshot array matching
+ *                                 the number of clocks are returned. It also
+ *                                 starts a fresh snapshot inside the clock
+ *                                 trace portal.
+ * @PORTAL_CMD_INC_PM_CTX_CNT:     Request the clock trace portal to increase
+ *                                 its internal PM_CTX_COUNT. If this increase
+ *                                 yielded a count of 0 -> 1 change, the portal
+ *                                 will initiate a PM_CTX_ACTIVE call to the
+ *                                 Kbase power management. Futher increase
+ *                                 requests will limit to only affect the
+ *                                 portal internal count value.
+ * @PORTAL_CMD_DEC_PM_CTX_CNT:     Request the clock trace portal to decrease
+ *                                 its internal PM_CTX_COUNT. If this decrease
+ *                                 yielded a count of 1 -> 0 change, the portal
+ *                                 will initiate a PM_CTX_IDLE call to the
+ *                                 Kbase power management.
+ * @PORTAL_CMD_CLOSE_PORTAL:       Inform the clock trace portal service the
+ *                                 client has completed its session. The portal
+ *                                 will start the close down action. If no
+ *                                 error has occurred during the dynamic
+ *                                 interactive session, an inherent basic test
+ *                                 carrying out some sanity check on the clock
+ *                                 trace is undertaken.
+ * @PORTAL_CMD_INVOKE_NOTIFY_42KHZ: Invokes all clock rate trace manager callbacks
+ *                                 for the top clock domain with a new GPU frequency
+ *                                 set to 42 kHZ.
+ * @PORTAL_CMD_INVALID:            Valid commands termination marker. Must be
+ *                                 the highest enumeration value, as it
+ *                                 represents valid command array size.
+ * @PORTAL_TOTAL_CMDS:             Alias of PORTAL_CMD_INVALID.
+ */
+/* PORTAL_CMD_INVALID must be the last one, serving the size */
+enum kbasep_clk_rate_trace_req {
+	PORTAL_CMD_GET_CLK_RATE_MGR,
+	PORTAL_CMD_GET_CLK_RATE_TRACE,
+	PORTAL_CMD_GET_TRACE_SNAPSHOT,
+	PORTAL_CMD_INC_PM_CTX_CNT,
+	PORTAL_CMD_DEC_PM_CTX_CNT,
+	PORTAL_CMD_CLOSE_PORTAL,
+	PORTAL_CMD_INVOKE_NOTIFY_42KHZ,
+	PORTAL_CMD_INVALID,
+	PORTAL_TOTAL_CMDS = PORTAL_CMD_INVALID,
+};
+
+/**
+ * Portal service request command names. The portal request consists of a kutf
+ * named u64-value. For those above enumerated PORTAL_CMD, the names defined
+ * here are used to mark the name and then followed with a sequence number
+ * value. Example (manual script here for illustration):
+ *   exec 5<>run                   # open the portal kutf run as fd-5
+ *   echo GET_CLK_RATE_MGR=1 >&5   # send the cmd and sequence number 1
+ *   head -n 1 <&5                 # read back the 1-line server reseponse
+ *     ACK="{SEQ:1, RATE:[0x1ad27480], GPU_IDLE:1}"   # response string
+ *   echo GET_TRACE_SNAPSHOT=1 >&5 # send the cmd and sequence number 1
+ *   head -n 1 <&5                 # read back the 1-line server reseponse
+ *     ACK="{SEQ:1, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}"
+ *   echo CLOSE_PORTAL=1 >&5       # close the portal
+ *   cat <&5                       # read back all the response lines
+ *     ACK="{SEQ:1, PM_CTX_CNT:0}"      # response to close command
+ *     KUTF_RESULT_PASS:(explicit pass) # internal sanity test passed.
+ *   exec 5>&-                     # close the service portal fd.
+ *
+ * Expected request command return format:
+ *  GET_CLK_RATE_MGR:   ACK="{SEQ:12, RATE:[1080, 1280], GPU_IDLE:1}"
+ *    Note, the above contains 2-clock with rates in [], GPU idle
+ *  GET_CLK_RATE_TRACE: ACK="{SEQ:6, RATE:[0x1ad27480], GPU_IDLE:0}"
+ *    Note, 1-clock with rate in [], GPU not idle
+ *  GET_TRACE_SNAPSHOT: ACK="{SEQ:8, SNAPSHOT_ARRAY:[(0x0, 0x1ad27480, 1, 0)]}"
+ *    Note, 1-clock, (start_rate : 0,  last_rate : 0x1ad27480,
+ *                    trace_rate_up_count: 1, trace_rate_down_count : 0)
+ *    For the specific sample case here, there is a single rate_trace event
+ *    that yielded a rate increase change. No rate drop event recorded in the
+ *    reporting snapshot duration.
+ *  INC_PM_CTX_CNT:     ACK="{SEQ:1, PM_CTX_CNT:1}"
+ *    Note, after the increment, M_CTX_CNT is 1. (i.e. 0 -> 1)
+ *  DEC_PM_CTX_CNT:     ACK="{SEQ:3, PM_CTX_CNT:0}"
+ *    Note, after the decrement, PM_CTX_CNT is 0. (i.e. 1 -> 0)
+ *  CLOSE_PORTAL:       ACK="{SEQ:1, PM_CTX_CNT:1}"
+ *    Note, at the close, PM_CTX_CNT is 1. The PM_CTX_CNT will internally be
+ *    dropped down to 0 as part of the portal close clean up.
+ */
+#define GET_CLK_RATE_MGR     "GET_CLK_RATE_MGR"
+#define GET_CLK_RATE_TRACE   "GET_CLK_RATE_TRACE"
+#define GET_TRACE_SNAPSHOT   "GET_TRACE_SNAPSHOT"
+#define INC_PM_CTX_CNT       "INC_PM_CTX_CNT"
+#define DEC_PM_CTX_CNT       "DEC_PM_CTX_CNT"
+#define CLOSE_PORTAL         "CLOSE_PORTAL"
+#define INVOKE_NOTIFY_42KHZ  "INVOKE_NOTIFY_42KHZ"
+
+/**
+ * Portal service response tag names. The response consists of a kutf
+ * named string-value. In case of a 'NACK' (negative acknowledgement), it
+ * can be one of the two formats:
+ *   1. NACK="{SEQ:2, MSG:xyzed}"     # NACK on command with sequence tag-2.
+ *      Note, the portal has received a valid name and valid sequence number
+ *            but can't carry-out the request, reason in the MSG field.
+ *   2. NACK="Failing-message"
+ *      Note, unable to parse a valid name or valid sequence number,
+ *            or some internal error condition. Reason in the quoted string.
+ */
+#define ACK "ACK"
+#define NACK "NACK"
+#define MAX_REPLY_NAME_LEN 32
+
+#endif /* _KUTF_CLK_RATE_TRACE_TEST_H_ */
author	Sidath Senanayake <sidaths@google.com>	2020-09-11 16:44:12 +0100
committer	Sidath Senanayake <sidaths@google.com>	2020-09-11 16:44:12 +0100
commit	d4ca6eb7268ee2db9deabd1745b505c6e1c162f9 (patch)
tree	64058c324e9e6adb30e8689d17f0a2e2b27636bc
parent	bc3c01e61c8ce9783a8ab091053905effcae12de (diff)
download	gpu-d4ca6eb7268ee2db9deabd1745b505c6e1c162f9.tar.gz