Mali Valhall Android DDK r40p0-01eac0 KMD

Provenance: 056ded72d351d1bf6319f7b2b925496dd6ad304f (ipdelivery/EAC/v_r40p0) VX504X08X-BU-00000-r40p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r40p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r40p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r40p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I6db6b45c73c5447dd246533246e65b5ef2c8872f
author: Jörg Wagner <jorwag@google.com> 2022-12-15 14:01:25 +0000
committer: Jörg Wagner <jorwag@google.com> 2022-12-15 16:27:59 +0000
commit: 9ff5b6f2510d94765def3cf7c1fda01e387cabab (patch)
tree: d455bcd53cca74df918b3dd0092e806fb29e1461 /mali_kbase/hwcnt
parent: c30533582604fe0365bc3ce4e9e8e19dec3109da (diff)
download: gpu-9ff5b6f2510d94765def3cf7c1fda01e387cabab.tar.gz
25 files changed, 11023 insertions, 0 deletions
diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild
new file mode 100644
index 0000000..8c8775f
--- /dev/null
+++ b/mali_kbase/hwcnt/Kbuild
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+mali_kbase-y += \
+    hwcnt/mali_kbase_hwcnt.o \
+    hwcnt/mali_kbase_hwcnt_gpu.o \
+    hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
+    hwcnt/mali_kbase_hwcnt_types.o \
+    hwcnt/mali_kbase_hwcnt_virtualizer.o \
+    hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
+
+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
+    mali_kbase-y += \
+        hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \
+        hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o
+else
+    mali_kbase-y += \
+        hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \
+        hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o
+endif
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
new file mode 100644
index 0000000..6cfa6f5
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter backends.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_H_
+#define _KBASE_HWCNT_BACKEND_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to
+ *                                   create an instance of a hardware counter
+ *                                   backend.
+ */
+struct kbase_hwcnt_backend_info;
+
+/*
+ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter
+ *                              backend, used to perform dumps.
+ */
+struct kbase_hwcnt_backend;
+
+/*
+ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter
+ *                                           metadata that describes the layout
+ *                                           of the counter data structures.
+ * @info:        Non-NULL pointer to backend info.
+ *
+ * Multiple calls to this function with the same info are guaranteed to return
+ * the same metadata object each time.
+ *
+ * Return: Non-NULL pointer to immutable hardware counter metadata.
+ */
+typedef const struct kbase_hwcnt_metadata *
+kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info);
+
+/**
+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * All uses of the created hardware counter backend must be externally
+ * synchronised.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info,
+					struct kbase_hwcnt_backend **out_backend);
+
+/**
+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
+ * @backend: Pointer to backend to be terminated.
+ */
+typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
+ *                                               timestamp.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * Return: Backend timestamp in nanoseconds.
+ */
+typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
+ *                                              backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * The enable_map must have been created using the interface's metadata.
+ * If the backend has already been enabled, an error is returned.
+ *
+ * May be called in an atomic context.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend,
+					       const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
+ *                                                     with the backend.
+ * @backend:    Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be
+ * called in an atomic context with the spinlock documented by the specific
+ * backend interface held.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int
+kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
+					  const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
+ *                                               the backend.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is already disabled, does nothing.
+ * Any undumped counter values since the last dump get will be lost.
+ */
+typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
+ *                                             counters.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
+ *                                               dump.
+ * @backend: Non-NULL pointer to backend.
+ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was
+ *                requested will be written out to on success.
+ *
+ * If the backend is not enabled or another dump is already in progress,
+ * returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend,
+						u64 *dump_time_ns);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
+ *                                            counter dump has completed.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
+ *                                           counters dumped after the last dump
+ *                                           request into the dump buffer.
+ * @backend:     Non-NULL pointer to backend.
+ * @dump_buffer: Non-NULL pointer to destination dump buffer.
+ * @enable_map:  Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate:  True if counters should be accumulated into dump_buffer, rather
+ *               than copied.
+ *
+ * The resultant contents of the dump buffer are only well defined if a prior
+ * call to dump_wait returned successfully, and a new dump has not yet been
+ * requested by a call to dump_request.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
+					    struct kbase_hwcnt_dump_buffer *dump_buffer,
+					    const struct kbase_hwcnt_enable_map *enable_map,
+					    bool accumulate);
+
+/**
+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
+ *                                        interface.
+ * @info:               Immutable info used to initialise an instance of the
+ *                      backend.
+ * @metadata:           Function ptr to get the immutable hardware counter
+ *                      metadata.
+ * @init:               Function ptr to initialise an instance of the backend.
+ * @term:               Function ptr to terminate an instance of the backend.
+ * @timestamp_ns:       Function ptr to get the current backend timestamp.
+ * @dump_enable:        Function ptr to enable dumping.
+ * @dump_enable_nolock: Function ptr to enable dumping while the
+ *                      backend-specific spinlock is already held.
+ * @dump_disable:       Function ptr to disable dumping.
+ * @dump_clear:         Function ptr to clear counters.
+ * @dump_request:       Function ptr to request a dump.
+ * @dump_wait:          Function ptr to wait until dump to complete.
+ * @dump_get:           Function ptr to copy or accumulate dump into a dump
+ *                      buffer.
+ */
+struct kbase_hwcnt_backend_interface {
+	const struct kbase_hwcnt_backend_info *info;
+	kbase_hwcnt_backend_metadata_fn *metadata;
+	kbase_hwcnt_backend_init_fn *init;
+	kbase_hwcnt_backend_term_fn *term;
+	kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns;
+	kbase_hwcnt_backend_dump_enable_fn *dump_enable;
+	kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock;
+	kbase_hwcnt_backend_dump_disable_fn *dump_disable;
+	kbase_hwcnt_backend_dump_clear_fn *dump_clear;
+	kbase_hwcnt_backend_dump_request_fn *dump_request;
+	kbase_hwcnt_backend_dump_wait_fn *dump_wait;
+	kbase_hwcnt_backend_dump_get_fn *dump_get;
+};
+
+#endif /* _KBASE_HWCNT_BACKEND_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
new file mode 100644
index 0000000..424a360
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -0,0 +1,1892 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/log2.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#ifndef BASE_MAX_NR_CLOCKS_REGULATORS
+#define BASE_MAX_NR_CLOCKS_REGULATORS 2
+#endif
+
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000)
+#else
+/* Backend watch dog timer interval in milliseconds: 1 second. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
+#endif /* IS_FPGA && !NO_MALI */
+
+/**
+ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is
+ * an error.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and
+ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK,
+ * PROTMODE_ENTER_ACK, or if an error occurs.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been
+ * requested and we're waiting for an ACK - this ACK could come from either
+ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here
+ * since watchdog request can't be triggered in protected mode.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert
+ * immediately after receiving the ACK, so we know which index corresponds to
+ * the buffer we requested.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and
+ * now we have kicked off the worker.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now
+ * we have kicked off the worker to accumulate up to that insert and then copy
+ * the delta to the user buffer to prepare for dump_get().
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully.
+ *
+ * Valid state transitions:
+ * IDLE -> REQUESTED (on user dump request)
+ * IDLE -> WATCHDOG_REQUESTED (on watchdog request)
+ * IDLE -> QUERYING_INSERT (on user dump request in protected mode)
+ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware)
+ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request)
+ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request)
+ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission)
+ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating)
+ * ACCUMULATING -> COMPLETED (on accumulation completion)
+ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode)
+ * COMPLETED -> REQUESTED (on user dump request)
+ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request)
+ * COMPLETED -> IDLE (on disable)
+ * ANY -> IDLE (on error)
+ */
+enum kbase_hwcnt_backend_csf_dump_state {
+	KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING,
+	KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED,
+};
+
+/**
+ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend
+ * is disabled.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged,
+ * enable is done.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been
+ * acknowledged, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An
+ * unrecoverable error happened, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:  An unrecoverable error
+ * happened, and dump workers have finished, waiting for reset.
+ *
+ * Valid state transitions:
+ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable)
+ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack)
+ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable)
+ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack)
+ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed)
+ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error)
+ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable
+ *                                                          error)
+ * UNRECOVERABLE_ERROR -> DISABLED (on before reset)
+ */
+enum kbase_hwcnt_backend_csf_enable_state {
+	KBASE_HWCNT_BACKEND_CSF_DISABLED,
+	KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED,
+	KBASE_HWCNT_BACKEND_CSF_ENABLED,
+	KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED,
+	KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER,
+	KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER,
+	KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR,
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance
+ *                                       of a CSF hardware counter backend.
+ * @backend:                      Pointer to access CSF backend.
+ * @fw_in_protected_mode:         True if FW is running in protected mode, else
+ *                                false.
+ * @unrecoverable_error_happened: True if an recoverable error happened, else
+ *                                false.
+ * @csf_if:                       CSF interface object pointer.
+ * @ring_buf_cnt:                 Dump buffer count in the ring buffer.
+ * @counter_set:                  The performance counter set to use.
+ * @metadata:                     Hardware counter metadata.
+ * @prfcnt_info:                  Performance counter information.
+ * @watchdog_if:                  Watchdog interface object pointer.
+ */
+struct kbase_hwcnt_backend_csf_info {
+	struct kbase_hwcnt_backend_csf *backend;
+	bool fw_in_protected_mode;
+	bool unrecoverable_error_happened;
+	struct kbase_hwcnt_backend_csf_if *csf_if;
+	u32 ring_buf_cnt;
+	enum kbase_hwcnt_set counter_set;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
+	struct kbase_hwcnt_watchdog_interface *watchdog_if;
+};
+
+/**
+ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
+ *                                          information.
+ * @hw_block_cnt:       Total number of hardware counters blocks. The hw counters blocks are
+ *                      sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
+ *                      hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
+ * @fe_cnt:             Front end block count.
+ * @tiler_cnt:          Tiler block count.
+ * @mmu_l2_cnt:         Memory system (MMU and L2 cache) block count.
+ * @shader_cnt:         Shader Core block count.
+ * @fw_block_cnt:       Total number of firmware counters blocks.
+ * @block_cnt:          Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt).
+ * @shader_avail_mask:  Bitmap of all shader cores in the system.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ *                      starting from the beginning of block.
+ * @headers_per_block:  For any block, the number of counters designated as block's header.
+ * @counters_per_block: For any block, the number of counters designated as block's payload.
+ * @values_per_block:   For any block, the number of counters in total (header + payload).
+ */
+struct kbase_hwcnt_csf_physical_layout {
+	u8 hw_block_cnt;
+	u8 fe_cnt;
+	u8 tiler_cnt;
+	u8 mmu_l2_cnt;
+	u8 shader_cnt;
+	u8 fw_block_cnt;
+	u8 block_cnt;
+	u64 shader_avail_mask;
+	size_t enable_mask_offset;
+	size_t headers_per_block;
+	size_t counters_per_block;
+	size_t values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend.
+ * @info:                       CSF Info used to create the backend.
+ * @dump_state:                 The dumping state of the backend.
+ * @enable_state:               The CSF backend internal enabled state.
+ * @insert_index_to_accumulate: The insert index in the ring buffer which need
+ *                              to accumulate up to.
+ * @enable_state_waitq:         Wait queue object used to notify the enable
+ *                              changing flag is done.
+ * @to_user_buf:                HWC sample buffer for client user, size
+ *                              metadata.dump_buf_bytes.
+ * @accum_buf:                  HWC sample buffer used as an internal
+ *                              accumulator, size metadata.dump_buf_bytes.
+ * @old_sample_buf:             HWC sample buffer to save the previous values
+ *                              for delta calculation, size
+ *                              prfcnt_info.dump_bytes.
+ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last
+ *                                 seen, to check any new firmware automatic
+ *                                 samples generated during the watchdog
+ *                                 period.
+ * @ring_buf:                   Opaque pointer for ring buffer object.
+ * @ring_buf_cpu_base:          CPU base address of the allocated ring buffer.
+ * @clk_enable_map:             The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:        Cycle count elapsed for a given sample period.
+ * @prev_cycle_count:           Previous cycle count to calculate the cycle
+ *                              count for sample period.
+ * @phys_layout:                Physical memory layout information of HWC
+ *                              sample buffer.
+ * @dump_completed:             Completion signaled by the dump worker when
+ *                              it is completed accumulating up to the
+ *                              insert_index_to_accumulate.
+ *                              Should be initialized to the "complete" state.
+ * @user_requested:             Flag to indicate a dump_request called from
+ *                              user.
+ * @hwc_dump_workq:             Single threaded work queue for HWC workers
+ *                              execution.
+ * @hwc_dump_work:              Worker to accumulate samples.
+ * @hwc_threshold_work:         Worker for consuming available samples when
+ *                              threshold interrupt raised.
+ */
+struct kbase_hwcnt_backend_csf {
+	struct kbase_hwcnt_backend_csf_info *info;
+	enum kbase_hwcnt_backend_csf_dump_state dump_state;
+	enum kbase_hwcnt_backend_csf_enable_state enable_state;
+	u32 insert_index_to_accumulate;
+	wait_queue_head_t enable_state_waitq;
+	u64 *to_user_buf;
+	u64 *accum_buf;
+	u32 *old_sample_buf;
+	u32 watchdog_last_seen_insert_idx;
+	struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
+	void *ring_buf_cpu_base;
+	u64 clk_enable_map;
+	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+	struct kbase_hwcnt_csf_physical_layout phys_layout;
+	struct completion dump_completed;
+	bool user_requested;
+	struct workqueue_struct *hwc_dump_workq;
+	struct work_struct hwc_dump_work;
+	struct work_struct hwc_threshold_work;
+};
+
+static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+	WARN_ON(!csf_info);
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	return (csf_info->backend != NULL);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count
+ *                                                tracking.
+ *
+ * @backend_csf: Non-NULL pointer to backend.
+ * @enable_map:  Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void
+kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf,
+					   const struct kbase_hwcnt_enable_map *enable_map)
+{
+	u64 clk_enable_map = enable_map->clk_enable_map;
+	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+	size_t clk;
+
+	/* Read cycle count from CSF interface for both clock domains. */
+	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+						       clk_enable_map);
+
+	kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk)
+	{
+		if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk))
+			backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+	}
+
+	/* Keep clk_enable_map for dump_request. */
+	backend_csf->clk_enable_map = clk_enable_map;
+}
+
+static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+	size_t clk;
+
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+						       backend_csf->clk_enable_map);
+
+	kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk)
+	{
+		if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) {
+			backend_csf->cycle_count_elapsed[clk] =
+				cycle_counts[clk] - backend_csf->prev_cycle_count[clk];
+			backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+		}
+	}
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if)
+		return 0;
+
+	return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx);
+}
+
+/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
+ *                                                  guarantee headers are
+ *                                                  enabled if any counter is
+ *                                                  required.
+ *@phys_enable_map: HWC physical enable map to be processed.
+ */
+static void
+kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map)
+{
+	WARN_ON(!phys_enable_map);
+
+	/* Enable header if any counter is required from user, the header is
+	 * controlled by bit 0 of the enable mask.
+	 */
+	if (phys_enable_map->fe_bm)
+		phys_enable_map->fe_bm |= 1;
+
+	if (phys_enable_map->tiler_bm)
+		phys_enable_map->tiler_bm |= 1;
+
+	if (phys_enable_map->mmu_l2_bm)
+		phys_enable_map->mmu_l2_bm |= 1;
+
+	if (phys_enable_map->shader_bm)
+		phys_enable_map->shader_bm |= 1;
+}
+
+static void kbasep_hwcnt_backend_csf_init_layout(
+	const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
+	struct kbase_hwcnt_csf_physical_layout *phys_layout)
+{
+	size_t shader_core_cnt;
+	size_t values_per_block;
+	size_t fw_blocks_count;
+	size_t hw_blocks_count;
+
+	WARN_ON(!prfcnt_info);
+	WARN_ON(!phys_layout);
+
+	shader_core_cnt = fls64(prfcnt_info->core_mask);
+	values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+	fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
+	hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
+
+	/* The number of hardware counters reported by the GPU matches the legacy guess-work we
+	 * have done in the past
+	 */
+	WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT +
+					   KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+					   prfcnt_info->l2_count + shader_core_cnt);
+
+	*phys_layout = (struct kbase_hwcnt_csf_physical_layout){
+		.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+		.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+		.mmu_l2_cnt = prfcnt_info->l2_count,
+		.shader_cnt = shader_core_cnt,
+		.fw_block_cnt = fw_blocks_count,
+		.hw_block_cnt = hw_blocks_count,
+		.block_cnt = fw_blocks_count + hw_blocks_count,
+		.shader_avail_mask = prfcnt_info->core_mask,
+		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+		.values_per_block = values_per_block,
+		.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+		.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+	};
+}
+
+static void
+kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
+	memset(backend_csf->to_user_buf, 0, user_buf_bytes);
+	memset(backend_csf->accum_buf, 0, user_buf_bytes);
+	memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes);
+}
+
+static void
+kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf,
+						      u32 *sample)
+{
+	u32 block_idx;
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout;
+	u32 *block_buf;
+
+	phys_layout = &backend_csf->phys_layout;
+
+	for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+		block_buf = sample + block_idx * phys_layout->values_per_block;
+		block_buf[phys_layout->enable_mask_offset] = 0;
+	}
+}
+
+static void
+kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	u32 idx;
+	u32 *sample;
+	char *cpu_dump_base;
+	size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+
+	cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base;
+
+	for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) {
+		sample = (u32 *)&cpu_dump_base[idx * dump_bytes];
+		kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
+	}
+}
+
+static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
+	/* Copy the data into the sample and wait for the user to get it. */
+	memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes);
+
+	/* After copied data into user sample, clear the accumulator values to
+	 * prepare for the next accumulator, such as the next request or
+	 * threshold.
+	 */
+	memset(backend_csf->accum_buf, 0, user_buf_bytes);
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_sample(
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
+	u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples)
+{
+	size_t block_idx;
+	const u32 *old_block = old_sample_buf;
+	const u32 *new_block = new_sample_buf;
+	u64 *acc_block = accum_buf;
+	const size_t values_per_block = phys_layout->values_per_block;
+
+	/* Performance counter blocks for firmware are stored before blocks for hardware.
+	 * We skip over the firmware's performance counter blocks (counters dumping is not
+	 * supported for firmware blocks, only hardware ones).
+	 */
+	old_block += values_per_block * phys_layout->fw_block_cnt;
+	new_block += values_per_block * phys_layout->fw_block_cnt;
+
+	for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
+	     block_idx++) {
+		const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
+		const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
+
+		if (new_enable_mask == 0) {
+			/* Hardware block was unavailable or we didn't turn on
+			 * any counters. Do nothing.
+			 */
+		} else {
+			/* Hardware block was available and it had some counters
+			 * enabled. We need to update the accumulation buffer.
+			 */
+			size_t ctr_idx;
+
+			/* Unconditionally copy the headers. */
+			for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) {
+				acc_block[ctr_idx] = new_block[ctr_idx];
+			}
+
+			/* Accumulate counter samples
+			 *
+			 * When accumulating samples we need to take into
+			 * account whether the counter sampling method involves
+			 * clearing counters back to zero after each sample is
+			 * taken.
+			 *
+			 * The intention for CSF was that all HW should use
+			 * counters which wrap to zero when their maximum value
+			 * is reached. This, combined with non-clearing
+			 * sampling, enables multiple concurrent users to
+			 * request samples without interfering with each other.
+			 *
+			 * However some early HW may not support wrapping
+			 * counters, for these GPUs counters must be cleared on
+			 * sample to avoid loss of data due to counters
+			 * saturating at their maximum value.
+			 */
+			if (!clearing_samples) {
+				if (old_enable_mask == 0) {
+					/* Hardware block was previously
+					 * unavailable. Accumulate the new
+					 * counters only, as we know previous
+					 * values are zeroes.
+					 */
+					for (ctr_idx = phys_layout->headers_per_block;
+					     ctr_idx < values_per_block; ctr_idx++) {
+						acc_block[ctr_idx] += new_block[ctr_idx];
+					}
+				} else {
+					/* Hardware block was previously
+					 * available. Accumulate the delta
+					 * between old and new counter values.
+					 */
+					for (ctr_idx = phys_layout->headers_per_block;
+					     ctr_idx < values_per_block; ctr_idx++) {
+						acc_block[ctr_idx] +=
+							new_block[ctr_idx] - old_block[ctr_idx];
+					}
+				}
+			} else {
+				for (ctr_idx = phys_layout->headers_per_block;
+				     ctr_idx < values_per_block; ctr_idx++) {
+					acc_block[ctr_idx] += new_block[ctr_idx];
+				}
+			}
+		}
+		old_block += values_per_block;
+		new_block += values_per_block;
+		acc_block += values_per_block;
+	}
+
+	WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+	WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+	WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
+				     (values_per_block * phys_layout->fw_block_cnt));
+	(void)dump_bytes;
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf,
+							u32 extract_index_to_start,
+							u32 insert_index_to_stop)
+{
+	u32 raw_idx;
+	unsigned long flags;
+	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
+	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
+	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+	bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
+	u32 *old_sample_buf = backend_csf->old_sample_buf;
+	u32 *new_sample_buf = old_sample_buf;
+
+	if (extract_index_to_start == insert_index_to_stop)
+		/* No samples to accumulate. Early out. */
+		return;
+
+	/* Sync all the buffers to CPU side before read the data. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf, extract_index_to_start,
+						 insert_index_to_stop, true);
+
+	/* Consider u32 wrap case, '!=' is used here instead of '<' operator */
+	for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
+		/* The logical "&" acts as a modulo operation since buf_count
+		 * must be a power of two.
+		 */
+		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+
+		new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+		kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout,
+							   buf_dump_bytes, backend_csf->accum_buf,
+							   old_sample_buf, new_sample_buf,
+							   clearing_samples);
+
+		old_sample_buf = new_sample_buf;
+	}
+
+	/* Save the newest buffer as the old buffer for next time. */
+	memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes);
+
+	/* Reset the prfcnt_en header on each sample before releasing them. */
+	for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
+		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+		u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+		kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
+	}
+
+	/* Sync zeroed buffers to avoid coherency issues on future use. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf, extract_index_to_start,
+						 insert_index_to_stop, false);
+
+	/* After consuming all samples between extract_idx and insert_idx,
+	 * set the raw extract index to insert_idx so that the sample buffers
+	 * can be released back to the ring buffer pool.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx,
+						     insert_index_to_stop);
+	/* Update the watchdog last seen index to check any new FW auto samples
+	 * in next watchdog callback.
+	 */
+	backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop;
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+	struct kbase_hwcnt_backend_csf *backend_csf,
+	enum kbase_hwcnt_backend_csf_enable_state new_state)
+{
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	if (backend_csf->enable_state != new_state) {
+		backend_csf->enable_state = new_state;
+
+		wake_up(&backend_csf->enable_state_waitq);
+	}
+}
+
+static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info = info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	unsigned long flags;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+
+	if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+
+	backend_csf = csf_info->backend;
+
+	/* Only do watchdog request when all conditions are met: */
+	if (/* 1. Backend is enabled. */
+	    (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+	    /* 2. FW is not in protected mode. */
+	    (!csf_info->fw_in_protected_mode) &&
+	    /* 3. dump state indicates no other dumping is in progress. */
+	    ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
+	     (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
+		u32 extract_index;
+		u32 insert_index;
+
+		/* Read the raw extract and insert indexes from the CSF interface. */
+		csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
+
+		/* Do watchdog request if no new FW auto samples. */
+		if (insert_index == backend_csf->watchdog_last_seen_insert_idx) {
+			/* Trigger the watchdog request. */
+			csf_info->csf_if->dump_request(csf_info->csf_if->ctx);
+
+			/* A watchdog dump is required, change the state to
+			 * start the request process.
+			 */
+			backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
+		}
+	}
+
+	/* Must schedule another callback when in the transitional state because
+	 * this function can be called for the first time before the performance
+	 * counter enabled interrupt.
+	 */
+	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ||
+	    (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
+		/* Reschedule the timer for next watchdog callback. */
+		csf_info->watchdog_if->modify(csf_info->watchdog_if->timer,
+					      HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+	}
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker.
+ * @work: Work structure.
+ *
+ * To accumulate all available samples in the ring buffer when a request has
+ * been done.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	u32 insert_index_to_acc;
+	u32 extract_index;
+	u32 insert_index;
+
+	WARN_ON(!work);
+	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* The backend was disabled or had an error while the worker was being
+	 * launched.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+		WARN_ON(!completion_done(&backend_csf->dump_completed));
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED);
+
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING;
+	insert_index_to_acc = backend_csf->insert_index_to_accumulate;
+
+	/* Read the raw extract and insert indexes from the CSF interface. */
+	backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+					       &insert_index);
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* Accumulate up to the insert we grabbed at the prfcnt request
+	 * interrupt.
+	 */
+	kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
+						    insert_index_to_acc);
+
+	/* Copy to the user buffer so if a threshold interrupt fires
+	 * between now and get(), the accumulations are untouched.
+	 */
+	kbasep_hwcnt_backend_csf_update_user_sample(backend_csf);
+
+	/* Dump done, set state back to COMPLETED for next request. */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* The backend was disabled or had an error while we were accumulating.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+		WARN_ON(!completion_done(&backend_csf->dump_completed));
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING);
+
+	/* Our work here is done - set the wait object and unblock waiters. */
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+	complete_all(&backend_csf->dump_completed);
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker.
+ *
+ * @work: Work structure.
+ *
+ * Called when a HWC threshold interrupt raised to consume all available samples
+ * in the ring buffer.
+ */
+static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+	u32 extract_index;
+	u32 insert_index;
+
+	WARN_ON(!work);
+
+	backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work);
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* Assert the backend is not destroyed. */
+	WARN_ON(backend_csf != backend_csf->info->backend);
+
+	/* Read the raw extract and insert indexes from the CSF interface. */
+	backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+					       &insert_index);
+
+	/* The backend was disabled or had an error while the worker was being
+	 * launched.
+	 */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	/* Early out if we are not in the IDLE state or COMPLETED state, as this
+	 * means a concurrent dump is in progress and we don't want to
+	 * interfere.
+	 */
+	if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+	    (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* Accumulate everything we possibly can. We grabbed the insert index
+	 * immediately after we acquired the lock but before we checked whether
+	 * a concurrent dump was triggered. This ensures that if a concurrent
+	 * dump was triggered between releasing the lock and now, we know for a
+	 * fact that our insert will not exceed the concurrent dump's
+	 * insert_to_accumulate, so we don't risk accumulating too much data.
+	 */
+	kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index);
+
+	/* No need to wake up anything since it is not a user dump request. */
+}
+
+static void
+kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+	u32 extract_index;
+
+	WARN_ON(!csf_info);
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info));
+	WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED);
+	WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
+
+	/* Save insert index now so that the dump worker only accumulates the
+	 * HWC data associated with this request. Extract index is not stored
+	 * as that needs to be checked when accumulating to prevent re-reading
+	 * buffers that have already been read and returned to the GPU.
+	 */
+	csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index,
+				      &csf_info->backend->insert_index_to_accumulate);
+	csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
+
+	/* Submit the accumulator task into the work queue. */
+	queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work);
+}
+
+static void
+kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf,
+					     const struct kbase_hwcnt_enable_map *enable_map,
+					     struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+	enum kbase_hwcnt_physical_set phys_counter_set;
+	struct kbase_hwcnt_physical_enable_map phys_enable_map;
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
+
+	/* process the enable_map to guarantee the block header is enabled which
+	 * is needed for delta calculation.
+	 */
+	kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map);
+
+	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set);
+
+	/* Use processed enable_map to enable HWC in HW level. */
+	enable->fe_bm = phys_enable_map.fe_bm;
+	enable->shader_bm = phys_enable_map.shader_bm;
+	enable->tiler_bm = phys_enable_map.tiler_bm;
+	enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+	enable->counter_set = phys_counter_set;
+	enable->clk_enable_map = enable_map->clk_enable_map;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int
+kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+					    const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	struct kbase_hwcnt_backend_csf_if_enable enable;
+	int err;
+
+	if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata))
+		return -EINVAL;
+
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable);
+
+	/* enable_state should be DISABLED before we transfer it to enabled */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
+		return -EIO;
+
+	err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer,
+						     HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
+						     kbasep_hwcnt_backend_watchdog_timer_cb,
+						     backend_csf->info);
+	if (err)
+		return err;
+
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	WARN_ON(!completion_done(&backend_csf->dump_completed));
+	kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+		backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED);
+
+	backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx,
+					       backend_csf->ring_buf, &enable);
+
+	kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map);
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend,
+						const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map);
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+	return errcode;
+}
+
+static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
+	struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
+{
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
+	       (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags);
+
+		wait_event(backend_csf->enable_state_waitq,
+			   (backend_csf->enable_state !=
+			    KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
+				   (backend_csf->enable_state !=
+				    KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
+
+		backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags);
+	}
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	bool do_disable = false;
+
+	WARN_ON(!backend_csf);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* Make sure we wait until any previous enable or disable have completed
+	 * before doing anything.
+	 */
+	kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED ||
+	    backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+		/* If we are already disabled or in an unrecoverable error
+		 * state, there is nothing for us to do.
+		 */
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return;
+	}
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+		complete_all(&backend_csf->dump_completed);
+		/* Only disable if we were previously enabled - in all other
+		 * cases the call to disable will have already been made.
+		 */
+		do_disable = true;
+	}
+
+	WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+	WARN_ON(!completion_done(&backend_csf->dump_completed));
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* Deregister the timer and block until any timer callback has completed.
+	 * We've transitioned out of the ENABLED state so we can guarantee it
+	 * won't reschedule itself.
+	 */
+	backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer);
+
+	/* Block until any async work has completed. We have transitioned out of
+	 * the ENABLED state so we can guarantee no new work will concurrently
+	 * be submitted.
+	 */
+	flush_workqueue(backend_csf->hwc_dump_workq);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	if (do_disable)
+		backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+
+	kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
+
+	switch (backend_csf->enable_state) {
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+		break;
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+		break;
+	default:
+		WARN_ON(true);
+		break;
+	}
+
+	backend_csf->user_requested = false;
+	backend_csf->watchdog_last_seen_insert_idx = 0;
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* After disable, zero the header of all buffers in the ring buffer back
+	 * to 0 to prepare for the next enable.
+	 */
+	kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+	/* Sync zeroed buffers to avoid coherency issues on future use. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf, 0,
+						 backend_csf->info->ring_buf_cnt, false);
+
+	/* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
+	 * for next enable.
+	 */
+	kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf);
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
+						 u64 *dump_time_ns)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	bool do_request = false;
+	bool watchdog_dumping = false;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+	/* If we're transitioning to enabled there's nothing to accumulate, and
+	 * the user dump buffer is already zeroed. We can just short circuit to
+	 * the DUMP_COMPLETED state.
+	 */
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+		*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+		kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+		backend_csf->user_requested = true;
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return 0;
+	}
+
+	/* Otherwise, make sure we're already enabled. */
+	if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		return -EIO;
+	}
+
+	/* Make sure that this is either the first request since enable or the
+	 * previous user dump has completed or a watchdog dump is in progress,
+	 * so we can avoid midway through a user dump.
+	 * If user request comes while a watchdog dumping is in progress,
+	 * the user request takes the ownership of the watchdog dumping sample by
+	 * changing the dump_state so the interrupt for the watchdog
+	 * request can be processed instead of ignored.
+	 */
+	if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+	    (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
+	    (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
+		/* HWC is disabled or another user dump is ongoing,
+		 * or we're on fault.
+		 */
+		backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+		/* HWC is disabled or another dump is ongoing, or we are on
+		 * fault.
+		 */
+		return -EIO;
+	}
+
+	/* Reset the completion so dump_wait() has something to wait on. */
+	reinit_completion(&backend_csf->dump_completed);
+
+	if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
+		watchdog_dumping = true;
+
+	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+	    !backend_csf->info->fw_in_protected_mode) {
+		/* Only do the request if we are fully enabled and not in
+		 * protected mode.
+		 */
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED;
+		do_request = true;
+	} else {
+		/* Skip the request and waiting for ack and go straight to
+		 * checking the insert and kicking off the worker to do the dump
+		 */
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+	}
+
+	/* CSF firmware might enter protected mode now, but still call request.
+	 * That is fine, as we changed state while holding the lock, so the
+	 * protected mode enter function will query the insert and launch the
+	 * dumping worker.
+	 * At some point we will get the dump request ACK saying a dump is done,
+	 * but we can ignore it if we are not in the REQUESTED state and process
+	 * it in next round dumping worker.
+	 */
+
+	*dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+	kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+	backend_csf->user_requested = true;
+
+	if (do_request) {
+		/* If a watchdog dumping is in progress, don't need to do
+		 * another request, just update the dump_state and take the
+		 * ownership of the sample which watchdog requested.
+		 */
+		if (!watchdog_dumping)
+			backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx);
+	} else
+		kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* Modify watchdog timer to delay the regular check time since
+	 * just requested.
+	 */
+	backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer,
+					       HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	int errcode;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	wait_for_completion(&backend_csf->dump_completed);
+
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	/* Make sure the last dump actually succeeded when user requested is
+	 * set.
+	 */
+	if (backend_csf->user_requested &&
+	    ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
+	     (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
+		errcode = 0;
+	else
+		errcode = -EIO;
+
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	int errcode;
+	u64 ts;
+
+	if (!backend_csf)
+		return -EINVAL;
+
+	/* Request a dump so we can clear all current counters. */
+	errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts);
+	if (!errcode)
+		/* Wait for the manual dump or auto dump to be done and
+		 * accumulator to be updated.
+		 */
+		errcode = kbasep_hwcnt_backend_csf_dump_wait(backend);
+
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend,
+					     struct kbase_hwcnt_dump_buffer *dst,
+					     const struct kbase_hwcnt_enable_map *dst_enable_map,
+					     bool accumulate)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+	int ret;
+	size_t clk;
+
+	if (!backend_csf || !dst || !dst_enable_map ||
+	    (backend_csf->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	/* Extract elapsed cycle count for each clock domain if enabled. */
+	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+	{
+		if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+			continue;
+
+		/* Reset the counter to zero if accumulation is off. */
+		if (!accumulate)
+			dst->clk_cnt_buf[clk] = 0;
+		dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk];
+	}
+
+	/* We just return the user buffer without checking the current state,
+	 * as it is undefined to call this function without a prior succeeding
+	 * one to dump_wait().
+	 */
+	ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate);
+
+	return ret;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend.
+ * @backend_csf: Pointer to CSF backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	if (!backend_csf)
+		return;
+
+	destroy_workqueue(backend_csf->hwc_dump_workq);
+
+	backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf);
+
+	kfree(backend_csf->accum_buf);
+	backend_csf->accum_buf = NULL;
+
+	kfree(backend_csf->old_sample_buf);
+	backend_csf->old_sample_buf = NULL;
+
+	kfree(backend_csf->to_user_buf);
+	backend_csf->to_user_buf = NULL;
+
+	kfree(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance.
+ *
+ * @csf_info:    Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
+					   struct kbase_hwcnt_backend_csf **out_backend)
+{
+	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+	int errcode = -ENOMEM;
+
+	WARN_ON(!csf_info);
+	WARN_ON(!out_backend);
+
+	backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL);
+	if (!backend_csf)
+		goto alloc_error;
+
+	backend_csf->info = csf_info;
+	kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout);
+
+	backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+	if (!backend_csf->accum_buf)
+		goto err_alloc_acc_buf;
+
+	backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+	if (!backend_csf->old_sample_buf)
+		goto err_alloc_pre_sample_buf;
+
+	backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+	if (!backend_csf->to_user_buf)
+		goto err_alloc_user_sample_buf;
+
+	errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
+						   &backend_csf->ring_buf_cpu_base,
+						   &backend_csf->ring_buf);
+	if (errcode)
+		goto err_ring_buf_alloc;
+	errcode = -ENOMEM;
+
+	/* Zero all performance enable header to prepare for first enable. */
+	kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+	/* Sync zeroed buffers to avoid coherency issues on use. */
+	backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf, 0,
+						 backend_csf->info->ring_buf_cnt, false);
+
+	init_completion(&backend_csf->dump_completed);
+
+	init_waitqueue_head(&backend_csf->enable_state_waitq);
+
+	/* Allocate a single threaded work queue for dump worker and threshold
+	 * worker.
+	 */
+	backend_csf->hwc_dump_workq =
+		alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (!backend_csf->hwc_dump_workq)
+		goto err_alloc_workqueue;
+
+	INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker);
+	INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker);
+
+	backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	complete_all(&backend_csf->dump_completed);
+	backend_csf->user_requested = false;
+	backend_csf->watchdog_last_seen_insert_idx = 0;
+
+	*out_backend = backend_csf;
+	return 0;
+
+err_alloc_workqueue:
+	backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+						 backend_csf->ring_buf);
+err_ring_buf_alloc:
+	kfree(backend_csf->to_user_buf);
+	backend_csf->to_user_buf = NULL;
+err_alloc_user_sample_buf:
+	kfree(backend_csf->old_sample_buf);
+	backend_csf->old_sample_buf = NULL;
+err_alloc_pre_sample_buf:
+	kfree(backend_csf->accum_buf);
+	backend_csf->accum_buf = NULL;
+err_alloc_acc_buf:
+	kfree(backend_csf);
+alloc_error:
+	return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
+					 struct kbase_hwcnt_backend **out_backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+	struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
+	int errcode;
+	bool success = false;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	/* Create the backend. */
+	errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf);
+	if (errcode)
+		return errcode;
+
+	/* If it was not created before, attach it to csf_info.
+	 * Use spin lock to avoid concurrent initialization.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	if (csf_info->backend == NULL) {
+		csf_info->backend = backend_csf;
+		*out_backend = (struct kbase_hwcnt_backend *)backend_csf;
+		success = true;
+		if (csf_info->unrecoverable_error_happened)
+			backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
+	}
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	/* Destroy the new created backend if the backend has already created
+	 * before. In normal case, this won't happen if the client call init()
+	 * function properly.
+	 */
+	if (!success) {
+		kbasep_hwcnt_backend_csf_destroy(backend_csf);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_csf_dump_disable(backend);
+
+	/* Set the backend in csf_info to NULL so we won't handle any external
+	 * notification anymore since we are terminating.
+	 */
+	backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+	backend_csf->info->backend = NULL;
+	backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+	kbasep_hwcnt_backend_csf_destroy(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info)
+{
+	if (!info)
+		return;
+
+	/* The backend should be destroyed before the info object destroy. */
+	WARN_ON(info->backend != NULL);
+
+	/* The metadata should be destroyed before the info object destroy. */
+	WARN_ON(info->metadata != NULL);
+
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info.
+ *
+ * @csf_if:        Non-NULL pointer to a hwcnt backend CSF interface structure
+ *                 used to create backend interface.
+ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer.
+ *                MUST be power of 2.
+ * @watchdog_if:  Non-NULL pointer to a hwcnt watchdog interface structure used to create
+ *                backend interface.
+ * @out_info:     Non-NULL pointer to where info is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+				     struct kbase_hwcnt_watchdog_interface *watchdog_if,
+				     const struct kbase_hwcnt_backend_csf_info **out_info)
+{
+	struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+	if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) ||
+	    WARN_ON(!is_power_of_2(ring_buf_cnt)))
+		return -EINVAL;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	*info = (struct kbase_hwcnt_backend_csf_info)
+	{
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+		.counter_set = KBASE_HWCNT_SET_SECONDARY,
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+		.counter_set = KBASE_HWCNT_SET_TERTIARY,
+#else
+		/* Default to primary */
+		.counter_set = KBASE_HWCNT_SET_PRIMARY,
+#endif
+		.backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt,
+		.fw_in_protected_mode = false, .unrecoverable_error_happened = false,
+		.watchdog_if = watchdog_if,
+	};
+	*out_info = info;
+
+	return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	if (!info)
+		return NULL;
+
+	WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata);
+
+	return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata;
+}
+
+static void
+kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	bool do_disable = false;
+
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	/* We are already in or transitioning to the unrecoverable error state.
+	 * Early out.
+	 */
+	if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) ||
+	    (backend_csf->enable_state ==
+	     KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER))
+		return;
+
+	/* If we are disabled, we know we have no pending workers, so skip the
+	 * waiting state.
+	 */
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+		return;
+	}
+
+	/* Trigger a disable only if we are not already transitioning to
+	 * disabled, we don't want to disable twice if an unrecoverable error
+	 * happens while we are disabling.
+	 */
+	do_disable =
+		(backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+
+	kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+		backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER);
+
+	/* Transition the dump to the IDLE state and unblock any waiters. The
+	 * IDLE state signifies an error.
+	 */
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+	complete_all(&backend_csf->dump_completed);
+
+	/* Trigger a disable only if we are not already transitioning to
+	 * disabled, - we don't want to disable twice if an unrecoverable error
+	 * happens while we are disabling.
+	 */
+	if (do_disable)
+		backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+}
+
+static void
+kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+	backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+	switch (backend_csf->enable_state) {
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED:
+	case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED:
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:
+	case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+		/* Already disabled or disabling, or in an unrecoverable error.
+		 * Nothing to be done to handle the error.
+		 */
+		return;
+	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
+		/* A seemingly recoverable error that occurs while we are
+		 * transitioning to enabled is probably unrecoverable.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf);
+		return;
+	case KBASE_HWCNT_BACKEND_CSF_ENABLED:
+		/* Start transitioning to the disabled state. We can't wait for
+		 * it as this recoverable error might be triggered from an
+		 * interrupt. The wait will be done in the eventual call to
+		 * disable().
+		 */
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+		/* Transition the dump to the IDLE state and unblock any
+		 * waiters. The IDLE state signifies an error.
+		 */
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+		complete_all(&backend_csf->dump_completed);
+
+		backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+		return;
+	}
+}
+
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info =
+		(struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	csf_info->fw_in_protected_mode = true;
+
+	/* Call on_prfcnt_sample() to trigger collection of the protected mode
+	 * entry auto-sample if there is currently a pending dump request.
+	 */
+	kbase_hwcnt_backend_csf_on_prfcnt_sample(iface);
+}
+
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+	csf_info->fw_in_protected_mode = false;
+}
+
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+	csf_info->unrecoverable_error_happened = true;
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+
+	kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+	csf_info->unrecoverable_error_happened = false;
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+		csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+		return;
+	}
+	backend_csf = csf_info->backend;
+
+	if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) &&
+	    (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) {
+		/* Before a reset occurs, we must either have been disabled
+		 * (else we lose data) or we should have encountered an
+		 * unrecoverable error. Either way, we will have disabled the
+		 * interface and waited for any workers that might have still
+		 * been in flight.
+		 * If not in these states, fire off one more disable to make
+		 * sure everything is turned off before the power is pulled.
+		 * We can't wait for this disable to complete, but it doesn't
+		 * really matter, the power is being pulled.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+	}
+
+	/* A reset is the only way to exit the unrecoverable error state */
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+	}
+
+	csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	/* Skip the dump_work if it's a watchdog request. */
+	if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
+		backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+		return;
+	}
+
+	/* If the current state is not REQUESTED, this HWC sample will be
+	 * skipped and processed in next dump_request.
+	 */
+	if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED)
+		return;
+	backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+
+	kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED)
+		/* Submit the threshold work into the work queue to consume the
+		 * available samples.
+		 */
+		queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+
+	/* Called when an overflow occurs. We treat this as a recoverable error,
+	 * so we start transitioning to the disabled state.
+	 * We could try and handle it while enabled, but in a real system we
+	 * never expect an overflow to occur so there is no point implementing
+	 * complex recovery code when we can just turn ourselves off instead for
+	 * a while.
+	 */
+	kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED);
+	} else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+		/* Unexpected, but we are already in the right state so just
+		 * ignore it.
+		 */
+	} else {
+		/* Unexpected state change, assume everything is broken until
+		 * we reset.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+	}
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_backend_csf *backend_csf;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+	/* Early out if the backend does not exist. */
+	if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+		return;
+	backend_csf = csf_info->backend;
+
+	if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) {
+		kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+			backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER);
+	} else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+		/* Unexpected, but we are already in the right state so just
+		 * ignore it.
+		 */
+	} else {
+		/* Unexpected state change, assume everything is broken until
+		 * we reset.
+		 */
+		kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+	}
+}
+
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+	struct kbase_hwcnt_gpu_info gpu_info;
+
+	if (!iface)
+		return -EINVAL;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+	WARN_ON(!csf_info->csf_if->get_prfcnt_info);
+
+	csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info);
+
+	/* The clock domain counts should not exceed the number of maximum
+	 * number of clock regulators.
+	 */
+	if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
+		return -EIO;
+
+	gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
+	gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+	gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
+	gpu_info.prfcnt_values_per_block =
+		csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+	return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set,
+					       &csf_info->metadata);
+}
+
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface)
+{
+	struct kbase_hwcnt_backend_csf_info *csf_info;
+
+	if (!iface)
+		return;
+
+	csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+	if (csf_info->metadata) {
+		kbase_hwcnt_csf_metadata_destroy(csf_info->metadata);
+		csf_info->metadata = NULL;
+	}
+}
+
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+				   struct kbase_hwcnt_watchdog_interface *watchdog_if,
+				   struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+	if (!iface || !csf_if || !watchdog_if)
+		return -EINVAL;
+
+	/* The buffer count must be power of 2 */
+	if (!is_power_of_2(ring_buf_cnt))
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info);
+	if (errcode)
+		return errcode;
+
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->metadata = kbasep_hwcnt_backend_csf_metadata;
+	iface->init = kbasep_hwcnt_backend_csf_init;
+	iface->term = kbasep_hwcnt_backend_csf_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_csf_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_csf_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_csf_info_destroy(
+		(const struct kbase_hwcnt_backend_csf_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
new file mode 100644
index 0000000..9c5a5c9
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
+ *                                    interface.
+ * @csf_if:       Non-NULL pointer to a hwcnt backend CSF interface structure
+ *                used to create backend interface.
+ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring
+ *                buffer, MUST be power of 2.
+ * @watchdog_if:  Non-NULL pointer to a hwcnt watchdog interface structure used
+ *                to create backend interface.
+ * @iface:        Non-NULL pointer to backend interface structure that is filled
+ *                in on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+				   struct kbase_hwcnt_watchdog_interface *watchdog_if,
+				   struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
+ *                                           hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
+ *                                           hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure.
+ */
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
+ *                                     interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
+ *                                           notification that protected mode
+ *                                           has been entered.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
+ *                                          notification that protected mode has
+ *                                          been exited.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
+ *                                                    called when unrecoverable
+ *                                                    errors are detected.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, on bus error, or when FW becomes unresponsive.
+ */
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
+ *                                             called immediately before a
+ *                                             reset. Takes us out of the
+ *                                             unrecoverable error state, if we
+ *                                             were in it.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
+ *                                              complete interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
+ *                                                 buffer reach threshold
+ *                                                 interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
+ *                                                overflow interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
+ *                                              interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
+ *                                               disabled interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
new file mode 100644
index 0000000..382a3ad
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for CSF hardware counter backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_backend_csf_if_ctx;
+
+struct kbase_hwcnt_backend_csf_if_ring_buf;
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection
+ *                                            structure.
+ * @fe_bm:          Front End counters selection bitmask.
+ * @shader_bm:      Shader counters selection bitmask.
+ * @tiler_bm:       Tiler counters selection bitmask.
+ * @mmu_l2_bm:      MMU_L2 counters selection bitmask.
+ * @counter_set:    The performance counter set to enable.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ *                  counter for a given clock domain.
+ */
+struct kbase_hwcnt_backend_csf_if_enable {
+	u32 fe_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+	u8 counter_set;
+	u64 clk_enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
+ *                                                 information.
+ * @prfcnt_hw_size:    Total length in bytes of all the hardware counters data. The hardware
+ *                     counters are sub-divided into 4 classes: front-end, shader, tiler, and
+ *                     memory system (l2 cache + MMU).
+ * @prfcnt_fw_size:    Total length in bytes of all the firmware counters data.
+ * @dump_bytes:        Bytes of GPU memory required to perform a performance
+ *                     counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size.
+ * @prfcnt_block_size: Bytes of each performance counter block.
+ * @l2_count:          The MMU L2 cache count.
+ * @core_mask:         Shader core mask.
+ * @clk_cnt:           Clock domain count in the system.
+ * @clearing_samples:  Indicates whether counters are cleared after each sample
+ *                     is taken.
+ */
+struct kbase_hwcnt_backend_csf_if_prfcnt_info {
+	size_t prfcnt_hw_size;
+	size_t prfcnt_fw_size;
+	size_t dump_bytes;
+	size_t prfcnt_block_size;
+	size_t l2_count;
+	u64 core_mask;
+	u8 clk_cnt;
+	bool clearing_samples;
+};
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the
+ *                                                          backend spinlock is
+ *                                                          held.
+ * @ctx: Non-NULL pointer to a CSF context.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ * @flags: Pointer to the memory location that would store the previous
+ *         interrupt state.
+ */
+typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						unsigned long *flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
+ *
+ * @ctx:   Non-NULL pointer to a CSF context.
+ * @flags: Previously stored interrupt state when Scheduler interrupt
+ *         spinlock was acquired.
+ */
+typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						  unsigned long flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
+ *                                                         counter information.
+ * @ctx:          Non-NULL pointer to a CSF context.
+ * @prfcnt_info:  Non-NULL pointer to struct where performance counter
+ *                information should be stored.
+ */
+typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer
+ *                                                        for CSF interface.
+ * @ctx:           Non-NULL pointer to a CSF context.
+ * @buf_count:     The buffer count in the ring buffer to be allocated,
+ *                 MUST be power of 2.
+ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is
+ *                 stored when success.
+ * @ring_buf:      Non-NULL pointer to where ring buffer is stored when success.
+ *
+ * A ring buffer is needed by the CSF interface to do manual HWC sample and
+ * automatic HWC samples, the buffer count in the ring buffer MUST be power
+ * of 2 to meet the hardware requirement.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int
+kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					     u32 buf_count, void **cpu_dump_base,
+					     struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
+ *                                                       memory.
+ * @ctx:             Non-NULL pointer to a CSF context.
+ * @ring_buf:        Non-NULL pointer to the ring buffer.
+ * @buf_index_first: The first buffer index in the ring buffer to be synced,
+ *                   inclusive.
+ * @buf_index_last:  The last buffer index in the ring buffer to be synced,
+ *                   exclusive.
+ * @for_cpu:         The direction of sync to be applied, set to true when CPU
+ *                   cache needs invalidating before reading the buffer, and set
+ *                   to false after CPU writes to flush these before this memory
+ *                   is overwritten by the GPU.
+ *
+ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
+ * are correctly observed.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					    struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+					    u32 buf_index_first, u32 buf_index_last, bool for_cpu);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
+ *                                                       the CSF interface.
+ *
+ * @ctx:      Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					    struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
+ *                                                      timestamp of the CSF
+ *                                                      interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Return: CSF interface timestamp in nanoseconds.
+ */
+typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
+ *                                                     counter in CSF interface.
+ * @ctx:      Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC.
+ * @enable:   Non-NULL pointer to the enable map of HWC.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					  struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+					  struct kbase_hwcnt_backend_csf_if_enable *enable);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
+ *                                                      in CSF interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
+ *
+ * @ctx: Non-NULL pointer to the interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
+ *                                                     insert indexes of the
+ *                                                     ring buffer.
+ *
+ * @ctx:           Non-NULL pointer to a CSF interface context.
+ * @extract_index: Non-NULL pointer where current extract index to be saved.
+ * @insert_index:  Non-NULL pointer where current insert index to be saved.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						       u32 *extract_index, u32 *insert_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
+ *                                                           index of the ring
+ *                                                           buffer.
+ *
+ * @ctx:            Non-NULL pointer to a CSF interface context.
+ * @extract_index:  New extract index to be set.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						u32 extract_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
+ *                                                             GPU cycle count.
+ * @ctx:            Non-NULL pointer to a CSF interface context.
+ * @cycle_counts:   Non-NULL pointer to an array where cycle counts to be saved,
+ *                  the array size should be at least as big as the number of
+ *                  clock domains returned by get_prfcnt_info interface.
+ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock
+ *                  domain.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						  u64 *cycle_counts, u64 clk_enable_map);
+
+/**
+ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
+ *                                     interface.
+ * @ctx:                 CSF interface context.
+ * @assert_lock_held:    Function ptr to assert backend spinlock is held.
+ * @lock:                Function ptr to acquire backend spinlock.
+ * @unlock:              Function ptr to release backend spinlock.
+ * @get_prfcnt_info:     Function ptr to get performance counter related
+ *                       information.
+ * @ring_buf_alloc:      Function ptr to allocate ring buffer for CSF HWC.
+ * @ring_buf_sync:       Function ptr to sync ring buffer to CPU.
+ * @ring_buf_free:       Function ptr to free ring buffer for CSF HWC.
+ * @timestamp_ns:        Function ptr to get the current CSF interface
+ *                       timestamp.
+ * @dump_enable:         Function ptr to enable dumping.
+ * @dump_disable:        Function ptr to disable dumping.
+ * @dump_request:        Function ptr to request a dump.
+ * @get_indexes:         Function ptr to get extract and insert indexes of the
+ *                       ring buffer.
+ * @set_extract_index:   Function ptr to set extract index of ring buffer.
+ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count.
+ */
+struct kbase_hwcnt_backend_csf_if {
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx;
+	kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held;
+	kbase_hwcnt_backend_csf_if_lock_fn *lock;
+	kbase_hwcnt_backend_csf_if_unlock_fn *unlock;
+	kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info;
+	kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc;
+	kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync;
+	kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free;
+	kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns;
+	kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable;
+	kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable;
+	kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request;
+	kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes;
+	kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index;
+	kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count;
+};
+
+#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
new file mode 100644
index 0000000..a3a0e02
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -0,0 +1,784 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * CSF GPU HWC backend firmware interface APIs.
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <device/mali_kbase_device.h>
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include <csf/mali_kbase_csf_registers.h>
+
+#include "csf/mali_kbase_csf_firmware.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include <linux/log2.h>
+#include "mali_kbase_ccswe.h"
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
+
+/* Ring buffer virtual address start at 4GB  */
+#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
+ *                                                 used to save the manual and
+ *                                                 auto HWC samples from
+ *                                                 firmware.
+ * @gpu_dump_base: Starting GPU base address of the ring buffer.
+ * @cpu_dump_base: Starting CPU address for the mapping.
+ * @buf_count:     Buffer count in the ring buffer, MUST be power of 2.
+ * @as_nr:         Address space number for the memory mapping.
+ * @phys:          Physical memory allocation used by the mapping.
+ * @num_pages:     Size of the mapping, in memory pages.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
+	u64 gpu_dump_base;
+	void *cpu_dump_base;
+	size_t buf_count;
+	u32 as_nr;
+	struct tagged_addr *phys;
+	size_t num_pages;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
+ *                                            interface, used to communicate
+ *                                            with firmware.
+ * @kbdev:              KBase device.
+ * @buf_bytes:	        The size in bytes for each buffer in the ring buffer.
+ * @clk_cnt:            The number of clock domains in the system.
+ *                      The maximum is 64.
+ * @clk_enable_map:     Bitmask of enabled clocks
+ * @rate_listener:      Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ctx {
+	struct kbase_device *kbdev;
+	size_t buf_bytes;
+	u8 clk_cnt;
+	u64 clk_enable_map;
+	struct kbase_clk_rate_listener rate_listener;
+	struct kbase_ccswe ccswe_shader_cores;
+};
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						unsigned long *flags)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock(kbdev, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						  unsigned long flags)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+
+	WARN_ON(!ctx);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
+ *
+ * @rate_listener:    Callback state
+ * @clk_index:        Clock index
+ * @clk_rate_hz:      Clock frequency(hz)
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+					      u32 clk_index, u32 clk_rate_hz)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
+		rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
+	u64 timestamp_ns;
+
+	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+		return;
+
+	timestamp_ns = ktime_get_raw_ns();
+	kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
+ *
+ * @fw_ctx:         Non-NULL pointer to CSF firmware interface context.
+ * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
+					 u64 clk_enable_map)
+{
+	struct kbase_device *kbdev = fw_ctx->kbdev;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+		/* software estimation for non-top clock domains */
+		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+		const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+		u32 cur_freq;
+		unsigned long flags;
+		u64 timestamp_ns;
+
+		timestamp_ns = ktime_get_raw_ns();
+
+		spin_lock_irqsave(&rtm->lock, flags);
+
+		cur_freq = (u32)clk_data->clock_val;
+		kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
+		kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
+
+		kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
+
+		spin_unlock_irqrestore(&rtm->lock, flags);
+	}
+
+	fw_ctx->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
+ *
+ * @fw_ctx:     Non-NULL pointer to CSF firmware interface context.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+	struct kbase_device *kbdev = fw_ctx->kbdev;
+	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+	u64 clk_enable_map = fw_ctx->clk_enable_map;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
+		kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+	struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
+{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+		.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
+		.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+		.prfcnt_hw_size =
+			KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+		.prfcnt_fw_size =
+			KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+		.dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
+		.prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
+		.clk_cnt = 1,
+		.clearing_samples = true,
+	};
+
+	fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
+#else
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+	struct kbase_device *kbdev;
+	u32 prfcnt_size;
+	u32 prfcnt_hw_size;
+	u32 prfcnt_fw_size;
+	u32 prfcnt_block_size =
+		KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
+
+	WARN_ON(!ctx);
+	WARN_ON(!prfcnt_info);
+
+	fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	kbdev = fw_ctx->kbdev;
+	prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
+	prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
+	prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
+	fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
+
+	/* Read the block size if the GPU has the register PRFCNT_FEATURES
+	 * which was introduced in architecture version 11.x.7.
+	 */
+	if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
+	    GPU_ID2_PRODUCT_TTUX) {
+		prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
+					    kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
+				    << 8;
+	}
+
+	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+		.prfcnt_hw_size = prfcnt_hw_size,
+		.prfcnt_fw_size = prfcnt_fw_size,
+		.dump_bytes = fw_ctx->buf_bytes,
+		.prfcnt_block_size = prfcnt_block_size,
+		.l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
+		.core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
+		.clk_cnt = fw_ctx->clk_cnt,
+		.clearing_samples = true,
+	};
+
+	/* Block size must be multiple of counter size. */
+	WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
+	/* Total size must be multiple of block size. */
+	WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
+#endif
+}
+
+static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
+	struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
+	struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
+{
+	struct kbase_device *kbdev;
+	struct tagged_addr *phys;
+	struct page **page_list;
+	void *cpu_addr;
+	int ret;
+	int i;
+	size_t num_pages;
+	u64 flags;
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
+
+	pgprot_t cpu_map_prot = PAGE_KERNEL;
+	u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	WARN_ON(!ctx);
+	WARN_ON(!cpu_dump_base);
+	WARN_ON(!out_ring_buf);
+
+	kbdev = fw_ctx->kbdev;
+
+	/* The buffer count must be power of 2 */
+	if (!is_power_of_2(buf_count))
+		return -EINVAL;
+
+	/* alignment failure */
+	if (gpu_va_base & (2048 - 1))
+		return -EINVAL;
+
+	fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
+	if (!fw_ring_buf)
+		return -ENOMEM;
+
+	num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
+	phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+	if (!phys)
+		goto phys_alloc_error;
+
+	page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
+	if (!page_list)
+		goto page_list_alloc_error;
+
+	/* Get physical page for the buffer */
+	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+					 phys, false);
+	if (ret != num_pages)
+		goto phys_mem_pool_alloc_error;
+
+	/* Get the CPU virtual address */
+	for (i = 0; i < num_pages; i++)
+		page_list[i] = as_page(phys[i]);
+
+	cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
+	if (!cpu_addr)
+		goto vmap_error;
+
+	flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
+		KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+
+	/* Update MMU table */
+	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
+				     num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+				     mmu_sync_info);
+	if (ret)
+		goto mmu_insert_failed;
+
+	kfree(page_list);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr;
+#else
+	fw_ring_buf->gpu_dump_base = gpu_va_base;
+#endif /* CONFIG_MALI_NO_MALI */
+	fw_ring_buf->cpu_dump_base = cpu_addr;
+	fw_ring_buf->phys = phys;
+	fw_ring_buf->num_pages = num_pages;
+	fw_ring_buf->buf_count = buf_count;
+	fw_ring_buf->as_nr = MCU_AS_NR;
+
+	*cpu_dump_base = fw_ring_buf->cpu_dump_base;
+	*out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
+
+	return 0;
+
+mmu_insert_failed:
+	vunmap(cpu_addr);
+vmap_error:
+	kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
+				  false, false);
+phys_mem_pool_alloc_error:
+	kfree(page_list);
+page_list_alloc_error:
+	kfree(phys);
+phys_alloc_error:
+	kfree(fw_ring_buf);
+	return -ENOMEM;
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					     struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+					     u32 buf_index_first, u32 buf_index_last, bool for_cpu)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	size_t i;
+	size_t pg_first;
+	size_t pg_last;
+	u64 start_address;
+	u64 stop_address;
+	u32 ring_buf_index_first;
+	u32 ring_buf_index_last;
+
+	WARN_ON(!ctx);
+	WARN_ON(!ring_buf);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	/* When using the dummy backend syncing the ring buffer is unnecessary as
+	 * the ring buffer is only accessed by the CPU. It may also cause data loss
+	 * due to cache invalidation so return early.
+	 */
+	return;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	/* The index arguments for this function form an inclusive, exclusive
+	 * range.
+	 * However, when masking back to the available buffers we will make this
+	 * inclusive at both ends so full flushes are not 0 -> 0.
+	 */
+	ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
+	ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
+
+	/* The start address is the offset of the first buffer. */
+	start_address = fw_ctx->buf_bytes * ring_buf_index_first;
+	pg_first = start_address >> PAGE_SHIFT;
+
+	/* The stop address is the last byte in the final buffer. */
+	stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
+	pg_last = stop_address >> PAGE_SHIFT;
+
+	/* Check whether the buffer range wraps. */
+	if (start_address > stop_address) {
+		/* sync the first part to the end of ring buffer. */
+		for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
+			struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+			if (for_cpu) {
+				kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
+							  PAGE_SIZE, DMA_BIDIRECTIONAL);
+			} else {
+				kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
+							     PAGE_SIZE, DMA_BIDIRECTIONAL);
+			}
+		}
+
+		/* second part starts from page 0. */
+		pg_first = 0;
+	}
+
+	for (i = pg_first; i <= pg_last; i++) {
+		struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+		if (for_cpu) {
+			kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
+						  DMA_BIDIRECTIONAL);
+		} else {
+			kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
+						     DMA_BIDIRECTIONAL);
+		}
+	}
+}
+
+static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	CSTD_UNUSED(ctx);
+	return ktime_get_raw_ns();
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					     struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	if (!fw_ring_buf)
+		return;
+
+	if (fw_ring_buf->phys) {
+		u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
+						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
+						 fw_ring_buf->num_pages, MCU_AS_NR));
+
+		vunmap(fw_ring_buf->cpu_dump_base);
+
+		kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+					  fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
+
+		kfree(fw_ring_buf->phys);
+
+		kfree(fw_ring_buf);
+	}
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+					   struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+					   struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+	u32 prfcnt_config;
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+		(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+
+	WARN_ON(!ctx);
+	WARN_ON(!ring_buf);
+	WARN_ON(!enable);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Configure */
+	prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
+	prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
+
+	/* Configure the ring buffer base address */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
+					fw_ring_buf->gpu_dump_base & U32_MAX);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
+					fw_ring_buf->gpu_dump_base >> 32);
+
+	/* Set extract position to 0 */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
+
+	/* Configure the enable bitmap */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
+
+	/* Configure the HWC set and buffer size */
+	kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
+
+	kbdev->csf.hwcnt.enable_pending = true;
+
+	/* Unmask the interrupts */
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
+					     GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
+
+	/* Enable the HWC */
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+					     (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
+					     GLB_REQ_PRFCNT_ENABLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
+
+	kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Disable the HWC */
+	kbdev->csf.hwcnt.enable_pending = true;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+	/* mask the interrupts */
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+					     GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+					     GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+					     GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+
+	/* In case we have a previous request in flight when the disable
+	 * happens.
+	 */
+	kbdev->csf.hwcnt.request_pending = false;
+
+	kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+	u32 glb_req;
+	struct kbase_device *kbdev;
+	struct kbase_csf_global_iface *global_iface;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	kbdev = fw_ctx->kbdev;
+	global_iface = &kbdev->csf.global_iface;
+
+	/* Trigger dumping */
+	kbdev->csf.hwcnt.request_pending = true;
+	glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+	glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
+					     GLB_REQ_PRFCNT_SAMPLE_MASK);
+	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						       u32 *extract_index, u32 *insert_index)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	WARN_ON(!extract_index);
+	WARN_ON(!insert_index);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	*extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
+							      GLB_PRFCNT_EXTRACT);
+	*insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
+							 GLB_PRFCNT_INSERT);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						 u32 extract_idx)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+	WARN_ON(!ctx);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	/* Set the raw extract index to release the buffer back to the ring
+	 * buffer.
+	 */
+	kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
+					extract_idx);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+						   u64 *cycle_counts, u64 clk_enable_map)
+{
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+	u8 clk;
+	u64 timestamp_ns = ktime_get_raw_ns();
+
+	WARN_ON(!ctx);
+	WARN_ON(!cycle_counts);
+	kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+	for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
+		if (!(clk_enable_map & (1ull << clk)))
+			continue;
+
+		if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+			/* Read cycle count for top clock domain. */
+			kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
+							     NULL, NULL);
+		} else {
+			/* Estimate cycle count for non-top clock domain. */
+			cycle_counts[clk] =
+				kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
+		}
+	}
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
+ *
+ * @fw_ctx: Pointer to context to destroy.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+	if (!fw_ctx)
+		return;
+
+	kfree(fw_ctx);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
+ *
+ * @kbdev:   Non_NULL pointer to kbase device.
+ * @out_ctx: Non-NULL pointer to where info is stored on success.
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
+					  struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
+{
+	u8 clk;
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_ctx);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		goto error;
+
+	ctx->kbdev = kbdev;
+
+	/* Determine the number of available clock domains. */
+	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+			break;
+	}
+	ctx->clk_cnt = clk;
+
+	ctx->clk_enable_map = 0;
+	kbase_ccswe_init(&ctx->ccswe_shader_cores);
+	ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
+
+	*out_ctx = ctx;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
+	return errcode;
+}
+
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+	if (!if_fw)
+		return;
+
+	kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
+		(struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
+	memset(if_fw, 0, sizeof(*if_fw));
+}
+
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+					 struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+	if (!kbdev || !if_fw)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
+	if (errcode)
+		return errcode;
+
+	if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
+	if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
+	if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
+	if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
+	if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
+	if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
+	if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
+	if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
+	if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
+	if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
+	if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
+	if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
+	if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
+	if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
+	if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
+
+	return 0;
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
new file mode 100644
index 0000000..71d1506
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
+ *                                          of hardware counter backend.
+ * @kbdev: Non-NULL pointer to Kbase device.
+ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on
+ *         creation success.
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+					 struct kbase_hwcnt_backend_csf_if *if_fw);
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
+ *                                           hardware counter backend.
+ * @if_fw: Pointer to a CSF interface to destroy.
+ */
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
new file mode 100644
index 0000000..6ddd7ba
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -0,0 +1,863 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "backend/gpu/mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "mali_kbase_ccswe.h"
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/**
+ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
+ *                                      of a JM hardware counter backend.
+ * @kbdev:          KBase device.
+ * @counter_set:    The performance counter set to use.
+ * @metadata:       Hardware counter metadata.
+ * @dump_bytes:     Bytes of GPU memory required to perform a
+ *                  hardware counter dump.
+ * @hwcnt_gpu_info: Hardware counter block information.
+ */
+struct kbase_hwcnt_backend_jm_info {
+	struct kbase_device *kbdev;
+	enum kbase_hwcnt_set counter_set;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+};
+
+/**
+ * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout
+ *                                         information.
+ * @fe_cnt:             Front end block count.
+ * @tiler_cnt:          Tiler block count.
+ * @mmu_l2_cnt:         Memory system(MMU and L2 cache) block count.
+ * @shader_cnt:         Shader Core block count.
+ * @block_cnt:          Total block count (sum of all other block counts).
+ * @shader_avail_mask:  Bitmap of all shader cores in the system.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ *                      starting from the beginning of block.
+ * @headers_per_block:  Header size per block.
+ * @counters_per_block: Counters size per block.
+ * @values_per_block:   Total size per block.
+ */
+struct kbase_hwcnt_jm_physical_layout {
+	u8 fe_cnt;
+	u8 tiler_cnt;
+	u8 mmu_l2_cnt;
+	u8 shader_cnt;
+	u8 block_cnt;
+	u64 shader_avail_mask;
+	size_t enable_mask_offset;
+	size_t headers_per_block;
+	size_t counters_per_block;
+	size_t values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
+ * @info:             Info used to create the backend.
+ * @kctx:             KBase context used for GPU memory allocation and
+ *                    counter dumping.
+ * @gpu_dump_va:      GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va:      CPU mapping of gpu_dump_va.
+ * @vmap:             Dump buffer vmap.
+ * @to_user_buf:      HWC sample buffer for client user, size
+ *                    metadata.dump_buf_bytes.
+ * @enabled:          True if dumping has been enabled, else false.
+ * @pm_core_mask:     PM state sync-ed shaders core mask for the enabled
+ *                    dumping.
+ * @curr_config:      Current allocated hardware resources to correctly map the
+ *                    source raw dump buffer to the destination dump buffer.
+ * @clk_enable_map:   The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:
+ *                    Cycle count elapsed for a given sample period.
+ *                    The top clock cycle, index 0, is read directly from
+ *                    hardware, but the other clock domains need to be
+ *                    calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ *                    sample period.
+ * @rate_listener:    Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ * @phys_layout:      Physical memory layout information of HWC sample buffer.
+ */
+struct kbase_hwcnt_backend_jm {
+	const struct kbase_hwcnt_backend_jm_info *info;
+	struct kbase_context *kctx;
+	u64 gpu_dump_va;
+	void *cpu_dump_va;
+	struct kbase_vmap_struct *vmap;
+	u64 *to_user_buf;
+	bool enabled;
+	u64 pm_core_mask;
+	struct kbase_hwcnt_curr_config curr_config;
+	u64 clk_enable_map;
+	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+	struct kbase_clk_rate_listener rate_listener;
+	struct kbase_ccswe ccswe_shader_cores;
+	struct kbase_hwcnt_jm_physical_layout phys_layout;
+};
+
+/**
+ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used
+ *                                           to create the hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info:  Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
+						 struct kbase_hwcnt_gpu_info *info)
+{
+	size_t clk;
+
+	if (!kbdev || !info)
+		return -EINVAL;
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+	info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+	info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+#else /* CONFIG_MALI_NO_MALI */
+	{
+		const struct base_gpu_props *props = &kbdev->gpu_props.props;
+		const size_t l2_count = props->l2_props.num_l2_slices;
+		const size_t core_mask = props->coherency_info.group[0].core_mask;
+
+		info->l2_count = l2_count;
+		info->core_mask = core_mask;
+		info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+	}
+#endif /* CONFIG_MALI_NO_MALI */
+
+	/* Determine the number of available clock domains. */
+	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+			break;
+	}
+	info->clk_cnt = clk;
+
+	return 0;
+}
+
+static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
+						struct kbase_hwcnt_jm_physical_layout *phys_layout)
+{
+	u8 shader_core_cnt;
+
+	WARN_ON(!gpu_info);
+	WARN_ON(!phys_layout);
+
+	shader_core_cnt = fls64(gpu_info->core_mask);
+
+	*phys_layout = (struct kbase_hwcnt_jm_physical_layout){
+		.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+		.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+		.mmu_l2_cnt = gpu_info->l2_count,
+		.shader_cnt = shader_core_cnt,
+		.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+			     gpu_info->l2_count + shader_core_cnt,
+		.shader_avail_mask = gpu_info->core_mask,
+		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+		.values_per_block = gpu_info->prfcnt_values_per_block,
+		.counters_per_block =
+			gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+		.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+	};
+}
+
+static void
+kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
+{
+	size_t block_idx;
+	const u32 *new_sample_buf = backend_jm->cpu_dump_va;
+	const u32 *new_block = new_sample_buf;
+	u64 *dst_buf = backend_jm->to_user_buf;
+	u64 *dst_block = dst_buf;
+	const size_t values_per_block = backend_jm->phys_layout.values_per_block;
+	const size_t dump_bytes = backend_jm->info->dump_bytes;
+
+	for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
+		size_t ctr_idx;
+
+		for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
+			dst_block[ctr_idx] = new_block[ctr_idx];
+
+		new_block += values_per_block;
+		dst_block += values_per_block;
+	}
+
+	WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+	WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
+ *
+ * @rate_listener:    Callback state
+ * @clk_index:        Clock index
+ * @clk_rate_hz:      Clock frequency(hz)
+ */
+static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+						   u32 clk_index, u32 clk_rate_hz)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm =
+		container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
+	u64 timestamp_ns;
+
+	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+		return;
+
+	timestamp_ns = ktime_get_raw_ns();
+	kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
+ *
+ * @backend_jm:      Non-NULL pointer to backend.
+ * @enable_map:   Non-NULL pointer to enable map specifying enabled counters.
+ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
+ */
+static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
+					      const struct kbase_hwcnt_enable_map *enable_map,
+					      u64 timestamp_ns)
+{
+	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+	u64 clk_enable_map = enable_map->clk_enable_map;
+	u64 cycle_count;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+		/* turn on the cycle counter */
+		kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
+		/* Read cycle count for top clock domain. */
+		kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);
+
+		backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
+	}
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+		/* software estimation for non-top clock domains */
+		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+		const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+		u32 cur_freq;
+		unsigned long flags;
+
+		spin_lock_irqsave(&rtm->lock, flags);
+
+		cur_freq = (u32)clk_data->clock_val;
+		kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
+		kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);
+
+		kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);
+
+		spin_unlock_irqrestore(&rtm->lock, flags);
+
+		/* ccswe was reset. The estimated cycle is zero. */
+		backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
+	}
+
+	/* Keep clk_enable_map for dump_request. */
+	backend_jm->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
+ *
+ * @backend_jm:      Non-NULL pointer to backend.
+ */
+static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
+{
+	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+	u64 clk_enable_map = backend_jm->clk_enable_map;
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+		/* turn off the cycle counter */
+		kbase_pm_release_gpu_cycle_counter(kbdev);
+	}
+
+	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+		kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
+	}
+}
+
+/**
+ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
+ *                                        current config information.
+ * @kbdev:       Non-NULL pointer to kbase device.
+ * @curr_config: Non-NULL pointer to return the current configuration of
+ *               hardware allocated to the GPU.
+ *
+ * The current configuration information is used for architectures where the
+ * max_config interface is available from the Arbiter. In this case the current
+ * allocated hardware is not always the same, so the current config information
+ * is used to correctly map the current allocated resources to the memory layout
+ * that is copied to the user space.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
+					       struct kbase_hwcnt_curr_config *curr_config)
+{
+	if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+		return -EINVAL;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
+	curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
+	return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+	(void)backend;
+	return ktime_get_raw_ns();
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int
+kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+					   const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_context *kctx;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_physical_enable_map phys_enable_map;
+	enum kbase_hwcnt_physical_set phys_counter_set;
+	struct kbase_instr_hwcnt_enable enable;
+	u64 timestamp_ns;
+
+	if (!backend_jm || !enable_map || backend_jm->enabled ||
+	    (enable_map->metadata != backend_jm->info->metadata))
+		return -EINVAL;
+
+	kctx = backend_jm->kctx;
+	kbdev = backend_jm->kctx->kbdev;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
+
+	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
+
+	enable.fe_bm = phys_enable_map.fe_bm;
+	enable.shader_bm = phys_enable_map.shader_bm;
+	enable.tiler_bm = phys_enable_map.tiler_bm;
+	enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+	enable.counter_set = phys_counter_set;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	/* The dummy model needs the CPU mapping. */
+	enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va;
+#else
+	enable.dump_buffer = backend_jm->gpu_dump_va;
+#endif /* CONFIG_MALI_NO_MALI */
+	enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+
+	timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+
+	/* Update the current configuration information. */
+	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+	if (errcode)
+		goto error;
+
+	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+
+	backend_jm->enabled = true;
+
+	kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
+
+	return 0;
+error:
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
+					       const struct kbase_hwcnt_enable_map *enable_map)
+{
+	unsigned long flags;
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_device *kbdev;
+
+	if (!backend_jm)
+		return -EINVAL;
+
+	kbdev = backend_jm->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+	if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+		return;
+
+	kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
+
+	errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
+	WARN_ON(errcode);
+
+	backend_jm->enabled = false;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+	if (!backend_jm || !backend_jm->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_clear(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
+						u64 *dump_time_ns)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+	struct kbase_device *kbdev;
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 current_cycle_count;
+	size_t clk;
+	int ret;
+
+	if (!backend_jm || !backend_jm->enabled || !dump_time_ns)
+		return -EINVAL;
+
+	kbdev = backend_jm->kctx->kbdev;
+	metadata = backend_jm->info->metadata;
+
+	/* Disable pre-emption, to make the timestamp as accurate as possible */
+	preempt_disable();
+	{
+		*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+		ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
+
+		kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+		{
+			if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
+				continue;
+
+			if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+				/* Read cycle count for top clock domain. */
+				kbase_backend_get_gpu_time_norequest(kbdev, &current_cycle_count,
+								     NULL, NULL);
+			} else {
+				/*
+				 * Estimate cycle count for non-top clock
+				 * domain.
+				 */
+				current_cycle_count = kbase_ccswe_cycle_at(
+					&backend_jm->ccswe_shader_cores, *dump_time_ns);
+			}
+			backend_jm->cycle_count_elapsed[clk] =
+				current_cycle_count - backend_jm->prev_cycle_count[clk];
+
+			/*
+			 * Keep the current cycle count for later calculation.
+			 */
+			backend_jm->prev_cycle_count[clk] = current_cycle_count;
+		}
+	}
+	preempt_enable();
+
+	return ret;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+	if (!backend_jm || !backend_jm->enabled)
+		return -EINVAL;
+
+	return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
+					    struct kbase_hwcnt_dump_buffer *dst,
+					    const struct kbase_hwcnt_enable_map *dst_enable_map,
+					    bool accumulate)
+{
+	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+	size_t clk;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	struct kbase_device *kbdev;
+	unsigned long flags;
+	int errcode;
+#endif /* CONFIG_MALI_NO_MALI */
+
+	if (!backend_jm || !dst || !dst_enable_map ||
+	    (backend_jm->info->metadata != dst->metadata) ||
+	    (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	/* Invalidate the kernel buffer before reading from it. */
+	kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+
+	/* Dump sample to the internal 64-bit user buffer. */
+	kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
+
+	/* Extract elapsed cycle count for each clock domain if enabled. */
+	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+	{
+		if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+			continue;
+
+		/* Reset the counter to zero if accumulation is off. */
+		if (!accumulate)
+			dst->clk_cnt_buf[clk] = 0;
+		dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk];
+	}
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	kbdev = backend_jm->kctx->kbdev;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+	/* Update the current configuration information. */
+	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
+
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (errcode)
+		return errcode;
+#endif /* CONFIG_MALI_NO_MALI */
+	return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
+				       backend_jm->pm_core_mask, &backend_jm->curr_config,
+				       accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer.
+ * @info:        Non-NULL pointer to JM backend info.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ *               is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
+					      struct kbase_context *kctx, u64 *gpu_dump_va)
+{
+	struct kbase_va_region *reg;
+	u64 flags;
+	u64 nr_pages;
+
+	/* Calls to this function are inherently asynchronous, with respect to
+	 * MMU operations.
+	 */
+	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+	WARN_ON(!info);
+	WARN_ON(!kctx);
+	WARN_ON(!gpu_dump_va);
+
+	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+		BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;
+
+	nr_pages = PFN_UP(info->dump_bytes);
+
+	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);
+
+	if (!reg)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx:        Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
+{
+	WARN_ON(!kctx);
+	if (gpu_dump_va)
+		kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend.
+ * @backend: Pointer to JM backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
+{
+	if (!backend)
+		return;
+
+	if (backend->kctx) {
+		struct kbase_context *kctx = backend->kctx;
+		struct kbase_device *kbdev = kctx->kbdev;
+
+		if (backend->cpu_dump_va)
+			kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+		if (backend->gpu_dump_va)
+			kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);
+
+		kbasep_js_release_privileged_ctx(kbdev, kctx);
+		kbase_destroy_context(kctx);
+	}
+
+	kfree(backend->to_user_buf);
+
+	kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_create() - Create a JM backend.
+ * @info:        Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
+					  struct kbase_hwcnt_backend_jm **out_backend)
+{
+	int errcode;
+	struct kbase_device *kbdev;
+	struct kbase_hwcnt_backend_jm *backend = NULL;
+
+	WARN_ON(!info);
+	WARN_ON(!out_backend);
+
+	kbdev = info->kbdev;
+
+	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+	if (!backend)
+		goto alloc_error;
+
+	backend->info = info;
+	kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);
+
+	backend->kctx = kbase_create_context(kbdev, true,
+					     BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+	if (!backend->kctx)
+		goto alloc_error;
+
+	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+	errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
+	if (errcode)
+		goto error;
+
+	backend->cpu_dump_va =
+		kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
+	if (!backend->cpu_dump_va || !backend->vmap)
+		goto alloc_error;
+
+	backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
+	if (!backend->to_user_buf)
+		goto alloc_error;
+
+	kbase_ccswe_init(&backend->ccswe_shader_cores);
+	backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+
+	*out_backend = backend;
+	return 0;
+
+alloc_error:
+	errcode = -ENOMEM;
+error:
+	kbasep_hwcnt_backend_jm_destroy(backend);
+	return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	if (!info)
+		return NULL;
+
+	return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
+					struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode;
+	struct kbase_hwcnt_backend_jm *backend = NULL;
+
+	if (!info || !out_backend)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
+						 &backend);
+	if (errcode)
+		return errcode;
+
+	*out_backend = (struct kbase_hwcnt_backend *)backend;
+
+	return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
+{
+	if (!backend)
+		return;
+
+	kbasep_hwcnt_backend_jm_dump_disable(backend);
+	kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
+{
+	if (!info)
+		return;
+
+	kbase_hwcnt_jm_metadata_destroy(info->metadata);
+	kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
+					       const struct kbase_hwcnt_backend_jm_info **out_info)
+{
+	int errcode = -ENOMEM;
+	struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+	WARN_ON(!kbdev);
+	WARN_ON(!out_info);
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return errcode;
+
+	info->kbdev = kbdev;
+
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+	/* Default to primary */
+	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
+
+	errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
+						 &info->metadata, &info->dump_bytes);
+	if (errcode)
+		goto error;
+
+	*out_info = info;
+
+	return 0;
+error:
+	kbasep_hwcnt_backend_jm_info_destroy(info);
+	return errcode;
+}
+
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+				  struct kbase_hwcnt_backend_interface *iface)
+{
+	int errcode;
+	const struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+	if (!kbdev || !iface)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info);
+
+	if (errcode)
+		return errcode;
+
+	iface->info = (struct kbase_hwcnt_backend_info *)info;
+	iface->metadata = kbasep_hwcnt_backend_jm_metadata;
+	iface->init = kbasep_hwcnt_backend_jm_init;
+	iface->term = kbasep_hwcnt_backend_jm_term;
+	iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
+	iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
+	iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
+	iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable;
+	iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear;
+	iface->dump_request = kbasep_hwcnt_backend_jm_dump_request;
+	iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait;
+	iface->dump_get = kbasep_hwcnt_backend_jm_dump_get;
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface)
+		return;
+
+	kbasep_hwcnt_backend_jm_info_destroy(
+		(const struct kbase_hwcnt_backend_jm_info *)iface->info);
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
new file mode 100644
index 0000000..4a6293c
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_JM_H_
+#define _KBASE_HWCNT_BACKEND_JM_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+
+struct kbase_device;
+
+/**
+ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend
+ *                                    interface.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @iface: Non-NULL pointer to backend interface structure that is filled in
+ *             on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+				  struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
+ *                                     interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
new file mode 100644
index 0000000..a8654ea
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+
+#include <hwcnt/mali_kbase_hwcnt_gpu.h>
+#include <hwcnt/mali_kbase_hwcnt_types.h>
+
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
+
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000;
+#else
+/* Backend watch dog timer interval in milliseconds: 1 second. */
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000;
+#endif /* IS_FPGA && !NO_MALI */
+
+/*
+ * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY     on dump_request.
+ * IDLE_BUFFER_EMPTY -> TIMER_DUMPING                 after
+ *                                                    hwcnt_backend_watchdog_timer_interval_ms
+ *                                                    milliseconds, if no dump_request has been
+ *                                                    called in the meantime.
+ * IDLE_BUFFER_FULL  -> USER_DUMPING_BUFFER_FULL      on dump_request.
+ * IDLE_BUFFER_FULL  -> TIMER_DUMPING                 after
+ *                                                    hwcnt_backend_watchdog_timer_interval_ms
+ *                                                    milliseconds, if no dump_request has been
+ *                                                    called in the meantime.
+ * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY              on dump_disable, upon discarding undumped
+ *                                                    counter values since the last dump_get.
+ * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING               on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * IDLE_BUFFER_FULL  -> BUFFER_CLEARING               on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING       on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * USER_DUMPING_BUFFER_FULL  -> BUFFER_CLEARING       on dump_clear, before calling job manager
+ *                                                    backend dump_clear.
+ * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY               on dump_clear, upon job manager backend
+ *                                                    dump_clear completion.
+ * TIMER_DUMPING -> IDLE_BUFFER_FULL                  on timer's callback completion.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR          on dump_clear, notifies the callback thread
+ *                                                    that there is no need for dumping the buffer
+ *                                                    anymore, and that the client will proceed
+ *                                                    clearing the buffer.
+ * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY      on timer's callback completion, when a user
+ *                                                    requested a dump_clear.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED      on dump_request, when a client performs a
+ *                                                    dump request while the timer is dumping (the
+ *                                                    timer will perform the dump and (once
+ *                                                    completed) the client will retrieve the value
+ *                                                    from the buffer).
+ * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY  on dump_get, when a timer completed and the
+ *                                                    user reads the periodic dump buffer.
+ * Any -> ERROR                                       if the job manager backend returns an error
+ *                                                    (of any kind).
+ * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY     on dump_get (performs get, ignores the
+ *                                                    periodic dump buffer and returns).
+ * USER_DUMPING_BUFFER_FULL  -> IDLE_BUFFER_EMPTY     on dump_get (performs get, accumulates with
+ *                                                    periodic dump buffer and returns).
+ */
+
+/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread.
+ * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls.
+ * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is
+ *                                 empty.
+ * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full.
+ * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback
+ *                               thread should just ignore and reschedule another callback in
+ *                               hwcnt_backend_watchdog_timer_interval_ms milliseconds.
+ * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user
+ *                                            requested a dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a
+ *                                        dump_clear. The timer has to complete the periodic dump
+ *                                        and clear buffer (internal and job manager backend).
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic
+ *                                         dump buffer is empty.
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump
+ *                                        buffer is full.
+ *
+ * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is
+ * allowed to call the job manager backend layer.
+ */
+enum backend_watchdog_state {
+	HWCNT_JM_WD_ERROR,
+	HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
+	HWCNT_JM_WD_IDLE_BUFFER_FULL,
+	HWCNT_JM_WD_BUFFER_CLEARING,
+	HWCNT_JM_WD_TIMER_DUMPING,
+	HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+	HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR,
+	HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY,
+	HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL,
+};
+
+/** enum wd_init_state - State machine for initialization / termination of the backend resources
+ */
+enum wd_init_state {
+	HWCNT_JM_WD_INIT_START,
+	HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START,
+	HWCNT_JM_WD_INIT_ENABLE_MAP,
+	HWCNT_JM_WD_INIT_DUMP_BUFFER,
+	HWCNT_JM_WD_INIT_END
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an
+ *                                               instance of the job manager watchdog backend.
+ * @jm_backend_iface: Hardware counter backend interface. This module extends
+ *                    this interface with a watchdog that performs regular
+ *                    dumps. The new interface this module provides complies
+ *                    with the old backend interface.
+ * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the
+ *                       hardware counter in case no reads are requested within
+ *                       a certain time, used to avoid hardware counter's buffer
+ *                       saturation.
+ */
+struct kbase_hwcnt_backend_jm_watchdog_info {
+	struct kbase_hwcnt_backend_interface *jm_backend_iface;
+	struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend.
+ * @info: Immutable information used to create the job manager watchdog backend.
+ * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls.
+ * @timeout_ms: Time period in milliseconds for hardware counters dumping.
+ * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are
+ *                  valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+ *                  %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL.
+ * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying
+ *                 the enable_map passed as argument.
+ * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled
+ *                     during watchdog timer dumps.
+ * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls.
+ * @locked: Members protected from concurrent access by different threads.
+ * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual
+ *                        exclusion).
+ * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the
+ *                     watchdog timer are both enabled. If false then both are disabled (or soon
+ *                     will be). Races between enable and disable have undefined behavior.
+ * @locked.state: State used to synchronize timer callbacks with the main thread.
+ */
+struct kbase_hwcnt_backend_jm_watchdog {
+	const struct kbase_hwcnt_backend_jm_watchdog_info *info;
+	struct kbase_hwcnt_backend *jm_backend;
+	u32 timeout_ms;
+	struct kbase_hwcnt_dump_buffer wd_dump_buffer;
+	struct kbase_hwcnt_enable_map wd_enable_map;
+	u64 wd_dump_timestamp;
+	struct completion watchdog_complete;
+	struct {
+		spinlock_t watchdog_lock;
+		bool is_enabled;
+		enum backend_watchdog_state state;
+	} locked;
+};
+
+/* timer's callback function */
+static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend;
+	unsigned long flags;
+	bool wd_accumulate;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+
+	if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY ||
+	      wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) {
+		/*resetting the timer. Calling modify on a disabled timer enables it.*/
+		wd_backend->info->dump_watchdog_iface->modify(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+	/*start performing the dump*/
+
+	/* if there has been a previous timeout use accumulating dump_get()
+	 * otherwise use non-accumulating to overwrite buffer
+	 */
+	wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL);
+
+	wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING;
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+							     &wd_backend->wd_dump_timestamp) ||
+	    wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) ||
+	    wd_backend->info->jm_backend_iface->dump_get(
+		    wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map,
+		    wd_accumulate)) {
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+		wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		/* Unblock user if it's waiting. */
+		complete_all(&wd_backend->watchdog_complete);
+		return;
+	}
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+		wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+		wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+	if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) {
+		/* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL
+		 * to indicate timer dump is done and the buffer is full. If state changed to
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state
+		 * machine to next state.
+		 */
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL;
+	}
+	if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) {
+		/* reset the timer to schedule another callback. Calling modify on a
+		 * disabled timer enables it.
+		 */
+		/*The spin lock needs to be held in case the client calls dump_enable*/
+		wd_backend->info->dump_watchdog_iface->modify(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	/* Unblock user if it's waiting. */
+	complete_all(&wd_backend->watchdog_complete);
+}
+
+/* helper methods, info structure creation and destruction*/
+
+static struct kbase_hwcnt_backend_jm_watchdog_info *
+kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					     struct kbase_hwcnt_watchdog_interface *watchdog_iface)
+{
+	struct kbase_hwcnt_backend_jm_watchdog_info *const info =
+		kmalloc(sizeof(*info), GFP_KERNEL);
+
+	if (!info)
+		return NULL;
+
+	*info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface,
+							       .dump_watchdog_iface =
+								       watchdog_iface };
+
+	return info;
+}
+
+/****** kbase_hwcnt_backend_interface implementation *******/
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+	const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info;
+
+	if (WARN_ON(!info))
+		return NULL;
+
+	return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+}
+
+static void
+kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+					      enum wd_init_state state)
+{
+	if (!wd_backend)
+		return;
+
+	WARN_ON(state > HWCNT_JM_WD_INIT_END);
+
+	while (state-- > HWCNT_JM_WD_INIT_START) {
+		switch (state) {
+		case HWCNT_JM_WD_INIT_BACKEND:
+			wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
+			break;
+		case HWCNT_JM_WD_INIT_ENABLE_MAP:
+			kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map);
+			break;
+		case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+			kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer);
+			break;
+		case HWCNT_JM_WD_INIT_END:
+			break;
+		}
+	}
+
+	kfree(wd_backend);
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
+ * Calling term does *not* destroy the interface
+ */
+static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend =
+		(struct kbase_hwcnt_backend_jm_watchdog *)backend;
+
+	if (!backend)
+		return;
+
+	/* disable timer thread to avoid concurrent access to shared resources */
+	wd_backend->info->dump_watchdog_iface->disable(
+		wd_backend->info->dump_watchdog_iface->timer);
+
+	kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END);
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info,
+						 struct kbase_hwcnt_backend **out_backend)
+{
+	int errcode = 0;
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL;
+	struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info;
+	const struct kbase_hwcnt_backend_info *jm_info;
+	const struct kbase_hwcnt_metadata *metadata;
+	enum wd_init_state state = HWCNT_JM_WD_INIT_START;
+
+	if (WARN_ON(!info) || WARN_ON(!out_backend))
+		return -EINVAL;
+
+	jm_info = wd_info->jm_backend_iface->info;
+	metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+
+	wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
+	if (!wd_backend) {
+		*out_backend = NULL;
+		return -ENOMEM;
+	}
+
+	*wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
+		.info = wd_info,
+		.timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
+		.locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false }
+	};
+
+	while (state < HWCNT_JM_WD_INIT_END && !errcode) {
+		switch (state) {
+		case HWCNT_JM_WD_INIT_BACKEND:
+			errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
+			break;
+		case HWCNT_JM_WD_INIT_ENABLE_MAP:
+			errcode =
+				kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map);
+			break;
+		case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+			errcode = kbase_hwcnt_dump_buffer_alloc(metadata,
+								&wd_backend->wd_dump_buffer);
+			break;
+		case HWCNT_JM_WD_INIT_END:
+			break;
+		}
+		if (!errcode)
+			state++;
+	}
+
+	if (errcode) {
+		kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state);
+		*out_backend = NULL;
+		return errcode;
+	}
+
+	WARN_ON(state != HWCNT_JM_WD_INIT_END);
+
+	spin_lock_init(&wd_backend->locked.watchdog_lock);
+	init_completion(&wd_backend->watchdog_complete);
+
+	*out_backend = (struct kbase_hwcnt_backend *)wd_backend;
+	return 0;
+}
+
+/* Job manager watchdog backend, implementation of timestamp_ns */
+static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend);
+}
+
+static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+	struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+	const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler)
+{
+	int errcode = -EPERM;
+	unsigned long flags;
+
+	if (WARN_ON(!wd_backend) || WARN_ON(!enable_map))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	/* If the backend is already enabled return an error */
+	if (wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	/*We copy the enable map into our watchdog backend copy, for future usage*/
+	kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map);
+
+	errcode = enabler(wd_backend->jm_backend, enable_map);
+	if (!errcode) {
+		/*Enable dump watchdog*/
+		errcode = wd_backend->info->dump_watchdog_iface->enable(
+			wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms,
+			kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend);
+		if (!errcode) {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(wd_backend->locked.is_enabled);
+			wd_backend->locked.is_enabled = true;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		} else
+			/*Reverting the job manager backend back to disabled*/
+			wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_enable */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend,
+					     const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+		wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable);
+}
+
+/* Job manager watchdog backend, implementation of dump_enable_nolock */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+						    const struct kbase_hwcnt_enable_map *enable_map)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+		wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock);
+}
+
+/* Job manager watchdog backend, implementation of dump_disable */
+static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+
+	if (WARN_ON(!backend))
+		return;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return;
+	}
+
+	wd_backend->locked.is_enabled = false;
+
+	/* Discard undumped counter values since the last dump_get. */
+	if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	wd_backend->info->dump_watchdog_iface->disable(
+		wd_backend->info->dump_watchdog_iface->timer);
+
+	wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+}
+
+/* Job manager watchdog backend, implementation of dump_clear */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+	int errcode = -EPERM;
+	bool clear_wd_wait_completion = false;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	if (WARN_ON(!backend))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+	case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+		wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING;
+		errcode = 0;
+		break;
+	case HWCNT_JM_WD_TIMER_DUMPING:
+		/* The timer asked for a dump request, when complete, the job manager backend
+		 * buffer will be zero
+		 */
+		clear_wd_wait_completion = true;
+		/* This thread will have to wait for the callback to terminate and then call a
+		 * dump_clear on the job manager backend. We change the state to
+		 * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is
+		 * no more need to dump the buffer (since we will clear it right after anyway).
+		 * We set up a wait queue to synchronize with the callback.
+		 */
+		reinit_completion(&wd_backend->watchdog_complete);
+		wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR;
+		errcode = 0;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!errcode) {
+		if (clear_wd_wait_completion) {
+			/* Waiting for the callback to finish */
+			wait_for_completion(&wd_backend->watchdog_complete);
+		}
+
+		/* Clearing job manager backend buffer */
+		errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend);
+
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+			wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING &&
+			wd_backend->locked.state != HWCNT_JM_WD_ERROR);
+
+		WARN_ON(!wd_backend->locked.is_enabled);
+
+		if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) {
+			/* Setting the internal buffer state to EMPTY */
+			wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+			/* Resetting the timer. Calling modify on a disabled timer
+			 * enables it.
+			 */
+			wd_backend->info->dump_watchdog_iface->modify(
+				wd_backend->info->dump_watchdog_iface->timer,
+				wd_backend->timeout_ms);
+		} else {
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			errcode = -EPERM;
+		}
+
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_request */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend,
+							 u64 *dump_time_ns)
+{
+	bool call_dump_request = false;
+	int errcode = 0;
+	unsigned long flags;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+	if (WARN_ON(!backend) || WARN_ON(!dump_time_ns))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+		/* progressing the state to avoid callbacks running while calling the job manager
+		 * backend
+		 */
+		wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY;
+		call_dump_request = true;
+		break;
+	case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+		wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL;
+		call_dump_request = true;
+		break;
+	case HWCNT_JM_WD_TIMER_DUMPING:
+		/* Retrieve timing information from previous dump_request */
+		*dump_time_ns = wd_backend->wd_dump_timestamp;
+		/* On the next client call (dump_wait) the thread will have to wait for the
+		 * callback to finish the dumping.
+		 * We set up a wait queue to synchronize with the callback.
+		 */
+		reinit_completion(&wd_backend->watchdog_complete);
+		wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (call_dump_request) {
+		errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+									   dump_time_ns);
+		if (!errcode) {
+			/*resetting the timer. Calling modify on a disabled timer enables it*/
+			wd_backend->info->dump_watchdog_iface->modify(
+				wd_backend->info->dump_watchdog_iface->timer,
+				wd_backend->timeout_ms);
+		} else {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(!wd_backend->locked.is_enabled);
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		}
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_wait */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+	int errcode = -EPERM;
+	bool wait_for_auto_dump = false, wait_for_user_dump = false;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+
+	if (WARN_ON(!backend))
+		return -EINVAL;
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+	if (!wd_backend->locked.is_enabled) {
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		return -EPERM;
+	}
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+		wait_for_auto_dump = true;
+		errcode = 0;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+		wait_for_user_dump = true;
+		errcode = 0;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (wait_for_auto_dump)
+		wait_for_completion(&wd_backend->watchdog_complete);
+	else if (wait_for_user_dump) {
+		errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend);
+		if (errcode) {
+			spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+			WARN_ON(!wd_backend->locked.is_enabled);
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+			spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+		}
+	}
+
+	return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_get */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_get(
+	struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer,
+	const struct kbase_hwcnt_enable_map *enable_map, bool accumulate)
+{
+	bool call_dump_get = false;
+	struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+	unsigned long flags;
+	int errcode = 0;
+
+	if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map))
+		return -EINVAL;
+
+	/* The resultant contents of the dump buffer are only well defined if a prior
+	 * call to dump_wait returned successfully, and a new dump has not yet been
+	 * requested by a call to dump_request.
+	 */
+
+	spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+	switch (wd_backend->locked.state) {
+	case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+		/*we assume dump_wait has been called and completed successfully*/
+		if (accumulate)
+			kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+							   enable_map);
+		else
+			kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+						     enable_map);
+
+		/*use state to indicate the the buffer is now empty*/
+		wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+		/*accumulate or copy watchdog data to user buffer first so that dump_get can set
+		 * the header correctly
+		 */
+		if (accumulate)
+			kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+							   enable_map);
+		else
+			kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+						     enable_map);
+
+		/*accumulate backend data into user buffer on top of watchdog data*/
+		accumulate = true;
+		call_dump_get = true;
+		break;
+	case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+		call_dump_get = true;
+		break;
+	default:
+		errcode = -EPERM;
+		break;
+	}
+
+	spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+	if (call_dump_get && !errcode) {
+		/*we just dump the job manager backend into the user buffer, following
+		 *accumulate flag
+		 */
+		errcode = wd_backend->info->jm_backend_iface->dump_get(
+			wd_backend->jm_backend, dump_buffer, enable_map, accumulate);
+
+		spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+		WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY &&
+			wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL &&
+			wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+		if (!errcode)
+			wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+		else
+			wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+
+		spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+	}
+
+	return errcode;
+}
+
+/* exposed methods */
+
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					   struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+					   struct kbase_hwcnt_backend_interface *out_iface)
+{
+	struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL;
+
+	if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface))
+		return -EINVAL;
+
+	info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface);
+	if (!info)
+		return -ENOMEM;
+
+	/*linking the info table with the output iface, to allow the callbacks below to access the
+	 *info object later on
+	 */
+	*out_iface = (struct kbase_hwcnt_backend_interface){
+		.info = (void *)info,
+		.metadata = kbasep_hwcnt_backend_jm_watchdog_metadata,
+		.init = kbasep_hwcnt_backend_jm_watchdog_init,
+		.term = kbasep_hwcnt_backend_jm_watchdog_term,
+		.timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns,
+		.dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable,
+		.dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock,
+		.dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable,
+		.dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear,
+		.dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request,
+		.dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait,
+		.dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get
+	};
+
+	/*registering watchdog backend module methods on the output interface*/
+
+	return 0;
+}
+
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+	if (!iface || !iface->info)
+		return;
+
+	kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
+
+	/*blanking the watchdog backend interface*/
+	memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
new file mode 100644
index 0000000..02a7952
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager
+ * backend. This module functionally interleaves between the hardware counter
+ * (hwcnt_accumulator) module (the interface consumer) and the job manager
+ * backend module (hwcnt_backend_jm). This module provides buffering
+ * functionality for the dumping requests requested by the hwcnt_accumulator
+ * consumer. This module is NOT multi-thread safe. The programmer must
+ * ensure the exposed methods are called by at most one thread at any time.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog
+ *                                            backend interface.
+ * @backend_iface:  Non-NULL pointer to the backend interface structure that this module will
+ *                  extend.
+ * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface.
+ * @out_iface:      Non-NULL pointer to backend interface structure that is filled in
+ *                  on creation success.
+ *
+ * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+					   struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+					   struct kbase_hwcnt_backend_interface *out_iface);
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog
+ *                                             backend interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
new file mode 100644
index 0000000..e724572
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -0,0 +1,775 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Implementation of hardware counter context and accumulator APIs.
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/**
+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states.
+ * @ACCUM_STATE_ERROR:    Error state, where all accumulator operations fail.
+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled.
+ * @ACCUM_STATE_ENABLED:  Enabled state, where dumping is enabled if there are
+ *                        any enabled counters.
+ */
+enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED };
+
+/**
+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @metadata:               Pointer to immutable hwcnt metadata.
+ * @backend:                Pointer to created counter backend.
+ * @state:                  The current state of the accumulator.
+ *                           - State transition from disabled->enabled or
+ *                             disabled->error requires state_lock.
+ *                           - State transition from enabled->disabled or
+ *                             enabled->error requires both accum_lock and
+ *                             state_lock.
+ *                           - Error state persists until next disable.
+ * @enable_map:             The current set of enabled counters.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map_any_enabled.
+ * @enable_map_any_enabled: True if any counters in the map are enabled, else
+ *                          false. If true, and state is ACCUM_STATE_ENABLED,
+ *                          then the counter backend will be enabled.
+ *                           - Must only be modified while holding both
+ *                             accum_lock and state_lock.
+ *                           - Can be read while holding either lock.
+ *                           - Must stay in sync with enable_map.
+ * @scratch_map:            Scratch enable map, used as temporary enable map
+ *                          storage during dumps.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accum_buf:              Accumulation buffer, where dumps will be accumulated
+ *                          into on transition to a disable state.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @accumulated:            True if the accumulation buffer has been accumulated
+ *                          into and not subsequently read from yet, else false.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ * @ts_last_dump_ns:        Timestamp (ns) of the end time of the most recent
+ *                          dump that was requested by the user.
+ *                           - Must only be read or modified while holding
+ *                             accum_lock.
+ */
+struct kbase_hwcnt_accumulator {
+	const struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_backend *backend;
+	enum kbase_hwcnt_accum_state state;
+	struct kbase_hwcnt_enable_map enable_map;
+	bool enable_map_any_enabled;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool accumulated;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_context - Hardware counter context structure.
+ * @iface:         Pointer to hardware counter backend interface.
+ * @state_lock:    Spinlock protecting state.
+ * @disable_count: Disable count of the context. Initialised to 1.
+ *                 Decremented when the accumulator is acquired, and incremented
+ *                 on release. Incremented on calls to
+ *                 kbase_hwcnt_context_disable[_atomic], and decremented on
+ *                 calls to kbase_hwcnt_context_enable.
+ *                  - Must only be read or modified while holding state_lock.
+ * @accum_lock:    Mutex protecting accumulator.
+ * @accum_inited:  Flag to prevent concurrent accumulator initialisation and/or
+ *                 termination. Set to true before accumulator initialisation,
+ *                 and false after accumulator termination.
+ *                  - Must only be modified while holding both accum_lock and
+ *                    state_lock.
+ *                  - Can be read while holding either lock.
+ * @accum:         Hardware counter accumulator structure.
+ * @wq:            Centralized workqueue for users of hardware counters to
+ *                 submit async hardware counter related work. Never directly
+ *                 called, but it's expected that a lot of the functions in this
+ *                 API will end up called from the enqueued async work.
+ */
+struct kbase_hwcnt_context {
+	const struct kbase_hwcnt_backend_interface *iface;
+	spinlock_t state_lock;
+	size_t disable_count;
+	struct mutex accum_lock;
+	bool accum_inited;
+	struct kbase_hwcnt_accumulator accum;
+	struct workqueue_struct *wq;
+};
+
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+			     struct kbase_hwcnt_context **out_hctx)
+{
+	struct kbase_hwcnt_context *hctx = NULL;
+
+	if (!iface || !out_hctx)
+		return -EINVAL;
+
+	hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
+	if (!hctx)
+		goto err_alloc_hctx;
+
+	hctx->iface = iface;
+	spin_lock_init(&hctx->state_lock);
+	hctx->disable_count = 1;
+	mutex_init(&hctx->accum_lock);
+	hctx->accum_inited = false;
+
+	hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
+	if (!hctx->wq)
+		goto err_alloc_workqueue;
+
+	*out_hctx = hctx;
+
+	return 0;
+
+err_alloc_workqueue:
+	kfree(hctx);
+err_alloc_hctx:
+	return -ENOMEM;
+}
+
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return;
+
+	/* Make sure we didn't leak the accumulator */
+	WARN_ON(hctx->accum_inited);
+
+	/* We don't expect any work to be pending on this workqueue.
+	 * Regardless, this will safely drain and complete the work.
+	 */
+	destroy_workqueue(hctx->wq);
+	kfree(hctx);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx)
+{
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map);
+	kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf);
+	kbase_hwcnt_enable_map_free(&hctx->accum.enable_map);
+	hctx->iface->term(hctx->accum.backend);
+	memset(&hctx->accum, 0, sizeof(hctx->accum));
+}
+
+/**
+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
+{
+	int errcode;
+
+	WARN_ON(!hctx);
+	WARN_ON(!hctx->accum_inited);
+
+	errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend);
+	if (errcode)
+		goto error;
+
+	hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
+	hctx->accum.state = ACCUM_STATE_ERROR;
+
+	errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.enable_map_any_enabled = false;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map);
+	if (errcode)
+		goto error;
+
+	hctx->accum.accumulated = false;
+
+	hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend);
+
+	return 0;
+
+error:
+	kbasep_hwcnt_accumulator_term(hctx);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the
+ *                                      disabled state, from the enabled or
+ *                                      error states.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	int errcode = 0;
+	bool backend_enabled = false;
+	struct kbase_hwcnt_accumulator *accum;
+	unsigned long flags;
+	u64 dump_time_ns;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+	WARN_ON(!hctx->accum_inited);
+
+	accum = &hctx->accum;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count != 0);
+	WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
+
+	if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled))
+		backend_enabled = true;
+
+	if (!backend_enabled)
+		hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Early out if the backend is not already enabled */
+	if (!backend_enabled)
+		return;
+
+	if (!accumulate)
+		goto disable;
+
+	/* Try and accumulate before disabling */
+	errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_wait(accum->backend);
+	if (errcode)
+		goto disable;
+
+	errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map,
+					accum->accumulated);
+	if (errcode)
+		goto disable;
+
+	accum->accumulated = true;
+
+disable:
+	hctx->iface->dump_disable(accum->backend);
+
+	/* Regardless of any errors during the accumulate, put the accumulator
+	 * in the disabled state.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum.state = ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the
+ *                                     enabled state, from the disabled state.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
+{
+	int errcode = 0;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->state_lock);
+	WARN_ON(!hctx->accum_inited);
+	WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED);
+
+	accum = &hctx->accum;
+
+	/* The backend only needs enabling if any counters are enabled */
+	if (accum->enable_map_any_enabled)
+		errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map);
+
+	if (!errcode)
+		accum->state = ACCUM_STATE_ENABLED;
+	else
+		accum->state = ACCUM_STATE_ERROR;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date
+ *                                   values of enabled counters possible, and
+ *                                   optionally update the set of enabled
+ *                                   counters.
+ * @hctx:        Non-NULL pointer to the hardware counter context
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ * @new_map:     Pointer to the new counter enable map. If non-NULL, must have
+ *               the same metadata as the accumulator. If NULL, the set of
+ *               enabled counters will be unchanged.
+ *
+ * Return:       0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns,
+					 u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf,
+					 const struct kbase_hwcnt_enable_map *new_map)
+{
+	int errcode = 0;
+	unsigned long flags;
+	enum kbase_hwcnt_accum_state state;
+	bool dump_requested = false;
+	bool dump_written = false;
+	bool cur_map_any_enabled;
+	struct kbase_hwcnt_enable_map *cur_map;
+	bool new_map_any_enabled = false;
+	u64 dump_time_ns;
+	struct kbase_hwcnt_accumulator *accum;
+
+	WARN_ON(!hctx);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata));
+	WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata));
+	WARN_ON(!hctx->accum_inited);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	accum = &hctx->accum;
+	cur_map = &accum->scratch_map;
+
+	/* Save out info about the current enable map */
+	cur_map_any_enabled = accum->enable_map_any_enabled;
+	kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
+
+	if (new_map)
+		new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map);
+
+	/*
+	 * We're holding accum_lock, so the accumulator state might transition
+	 * from disabled to enabled during this function (as enabling is lock
+	 * free), but it will never disable (as disabling needs to hold the
+	 * accum_lock), nor will it ever transition from enabled to error (as
+	 * an enable while we're already enabled is impossible).
+	 *
+	 * If we're already disabled, we'll only look at the accumulation buffer
+	 * rather than do a real dump, so a concurrent enable does not affect
+	 * us.
+	 *
+	 * If a concurrent enable fails, we might transition to the error
+	 * state, but again, as we're only looking at the accumulation buffer,
+	 * it's not an issue.
+	 */
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	state = accum->state;
+
+	/*
+	 * Update the new map now, such that if an enable occurs during this
+	 * dump then that enable will set the new map. If we're already enabled,
+	 * then we'll do it ourselves after the dump.
+	 */
+	if (new_map) {
+		kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map);
+		accum->enable_map_any_enabled = new_map_any_enabled;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	/* Error state, so early out. No need to roll back any map updates */
+	if (state == ACCUM_STATE_ERROR)
+		return -EIO;
+
+	/* Initiate the dump if the backend is enabled. */
+	if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
+		if (dump_buf) {
+			errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
+			dump_requested = true;
+		} else {
+			dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+			errcode = hctx->iface->dump_clear(accum->backend);
+		}
+
+		if (errcode)
+			goto error;
+	} else {
+		dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+	}
+
+	/* Copy any accumulation into the dest buffer */
+	if (accum->accumulated && dump_buf) {
+		kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map);
+		dump_written = true;
+	}
+
+	/* Wait for any requested dumps to complete */
+	if (dump_requested) {
+		WARN_ON(state != ACCUM_STATE_ENABLED);
+		errcode = hctx->iface->dump_wait(accum->backend);
+		if (errcode)
+			goto error;
+	}
+
+	/* If we're enabled and there's a new enable map, change the enabled set
+	 * as soon after the dump has completed as possible.
+	 */
+	if ((state == ACCUM_STATE_ENABLED) && new_map) {
+		/* Backend is only enabled if there were any enabled counters */
+		if (cur_map_any_enabled)
+			hctx->iface->dump_disable(accum->backend);
+
+		/* (Re-)enable the backend if the new map has enabled counters.
+		 * No need to acquire the spinlock, as concurrent enable while
+		 * we're already enabled and holding accum_lock is impossible.
+		 */
+		if (new_map_any_enabled) {
+			errcode = hctx->iface->dump_enable(accum->backend, new_map);
+			if (errcode)
+				goto error;
+		}
+	}
+
+	/* Copy, accumulate, or zero into the dest buffer to finish */
+	if (dump_buf) {
+		/* If we dumped, copy or accumulate it into the destination */
+		if (dump_requested) {
+			WARN_ON(state != ACCUM_STATE_ENABLED);
+			errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map,
+							dump_written);
+			if (errcode)
+				goto error;
+			dump_written = true;
+		}
+
+		/* If we've not written anything into the dump buffer so far, it
+		 * means there was nothing to write. Zero any enabled counters.
+		 */
+		if (!dump_written)
+			kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+	}
+
+	/* Write out timestamps */
+	*ts_start_ns = accum->ts_last_dump_ns;
+	*ts_end_ns = dump_time_ns;
+
+	accum->accumulated = false;
+	accum->ts_last_dump_ns = dump_time_ns;
+
+	return 0;
+error:
+	/* An error was only physically possible if the backend was enabled */
+	WARN_ON(state != ACCUM_STATE_ENABLED);
+
+	/* Disable the backend, and transition to the error state */
+	hctx->iface->dump_disable(accum->backend);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	accum->state = ACCUM_STATE_ERROR;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx:       Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+	unsigned long flags;
+
+	WARN_ON(!hctx);
+	lockdep_assert_held(&hctx->accum_lock);
+
+	if (!kbase_hwcnt_context_disable_atomic(hctx)) {
+		kbasep_hwcnt_accumulator_disable(hctx, accumulate);
+
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		/* Atomic disable failed and we're holding the mutex, so current
+		 * disable count must be 0.
+		 */
+		WARN_ON(hctx->disable_count != 0);
+		hctx->disable_count++;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+	}
+}
+
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+				    struct kbase_hwcnt_accumulator **accum)
+{
+	int errcode = 0;
+	unsigned long flags;
+
+	if (!hctx || !accum)
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!hctx->accum_inited)
+		/* Set accum initing now to prevent concurrent init */
+		hctx->accum_inited = true;
+	else
+		/* Already have an accum, or already being inited */
+		errcode = -EBUSY;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+
+	if (errcode)
+		return errcode;
+
+	errcode = kbasep_hwcnt_accumulator_init(hctx);
+
+	if (errcode) {
+		mutex_lock(&hctx->accum_lock);
+		spin_lock_irqsave(&hctx->state_lock, flags);
+
+		hctx->accum_inited = false;
+
+		spin_unlock_irqrestore(&hctx->state_lock, flags);
+		mutex_unlock(&hctx->accum_lock);
+
+		return errcode;
+	}
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	WARN_ON(hctx->disable_count == 0);
+	WARN_ON(hctx->accum.enable_map_any_enabled);
+
+	/* Decrement the disable count to allow the accumulator to be accessible
+	 * now that it's fully constructed.
+	 */
+	hctx->disable_count--;
+
+	/*
+	 * Make sure the accumulator is initialised to the correct state.
+	 * Regardless of initial state, counters don't need to be enabled via
+	 * the backend, as the initial enable map has no enabled counters.
+	 */
+	hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	*accum = &hctx->accum;
+
+	return 0;
+}
+
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
+{
+	unsigned long flags;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum)
+		return;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	mutex_lock(&hctx->accum_lock);
+
+	/* Double release is a programming error */
+	WARN_ON(!hctx->accum_inited);
+
+	/* Disable the context to ensure the accumulator is inaccesible while
+	 * we're destroying it. This performs the corresponding disable count
+	 * increment to the decrement done during acquisition.
+	 */
+	kbasep_hwcnt_context_disable(hctx, false);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	kbasep_hwcnt_accumulator_term(hctx);
+
+	mutex_lock(&hctx->accum_lock);
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	hctx->accum_inited = false;
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+	mutex_unlock(&hctx->accum_lock);
+}
+
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
+{
+	if (WARN_ON(!hctx))
+		return;
+
+	/* Try and atomically disable first, so we can avoid locking the mutex
+	 * if we don't need to.
+	 */
+	if (kbase_hwcnt_context_disable_atomic(hctx))
+		return;
+
+	mutex_lock(&hctx->accum_lock);
+
+	kbasep_hwcnt_context_disable(hctx, true);
+
+	mutex_unlock(&hctx->accum_lock);
+}
+
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+	bool atomic_disabled = false;
+
+	if (WARN_ON(!hctx))
+		return false;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
+		/*
+		 * If disable count is non-zero, we can just bump the disable
+		 * count.
+		 *
+		 * Otherwise, we can't disable in an atomic context.
+		 */
+		if (hctx->disable_count != 0) {
+			hctx->disable_count++;
+			atomic_disabled = true;
+		}
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+	return atomic_disabled;
+}
+
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
+{
+	unsigned long flags;
+
+	if (WARN_ON(!hctx))
+		return;
+
+	spin_lock_irqsave(&hctx->state_lock, flags);
+
+	if (!WARN_ON(hctx->disable_count == 0)) {
+		if (hctx->disable_count == 1)
+			kbasep_hwcnt_accumulator_enable(hctx);
+
+		hctx->disable_count--;
+	}
+
+	spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx)
+{
+	if (!hctx)
+		return NULL;
+
+	return hctx->iface->metadata(hctx->iface->info);
+}
+
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work)
+{
+	if (WARN_ON(!hctx) || WARN_ON(!work))
+		return false;
+
+	return queue_work(hctx->wq, work);
+}
+
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+					 const struct kbase_hwcnt_enable_map *new_map,
+					 u64 *ts_start_ns, u64 *ts_end_ns,
+					 struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !new_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if ((new_map->metadata != hctx->accum.metadata) ||
+	    (dump_buf && (dump_buf->metadata != hctx->accum.metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+				 u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_context *hctx;
+
+	if (!accum || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+	if (dump_buf && (dump_buf->metadata != hctx->accum.metadata))
+		return -EINVAL;
+
+	mutex_lock(&hctx->accum_lock);
+
+	errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+
+	mutex_unlock(&hctx->accum_lock);
+
+	return errcode;
+}
+
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum)
+{
+	struct kbase_hwcnt_context *hctx;
+
+	if (WARN_ON(!accum))
+		return 0;
+
+	hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+	return hctx->iface->timestamp_ns(accum->backend);
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
new file mode 100644
index 0000000..069e020
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter accumulator API.
+ */
+
+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_
+#define _KBASE_HWCNT_ACCUMULATOR_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_accumulator;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator
+ *                                     for a hardware counter context.
+ * @hctx:  Non-NULL pointer to a hardware counter context.
+ * @accum: Non-NULL pointer to where the pointer to the created accumulator
+ *         will be stored on success.
+ *
+ * There can exist at most one instance of the hardware counter accumulator per
+ * context at a time.
+ *
+ * If multiple clients need access to the hardware counters at the same time,
+ * then an abstraction built on top of the single instance to the hardware
+ * counter accumulator is required.
+ *
+ * No counters will be enabled with the returned accumulator. A subsequent call
+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on.
+ *
+ * There are four components to a hardware counter dump:
+ *  - A set of enabled counters
+ *  - A start time
+ *  - An end time
+ *  - A dump buffer containing the accumulated counter values for all enabled
+ *    counters between the start and end times.
+ *
+ * For each dump, it is guaranteed that all enabled counters were active for the
+ * entirety of the period between the start and end times.
+ *
+ * It is also guaranteed that the start time of dump "n" is always equal to the
+ * end time of dump "n - 1".
+ *
+ * For all dumps, the values of any counters that were not enabled is undefined.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+				    struct kbase_hwcnt_accumulator **accum);
+
+/**
+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * The accumulator must be released before the context the accumulator was
+ * created from is terminated.
+ */
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
+
+/**
+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently
+ *                                          enabled counters, and enable a new
+ *                                          set of counters that will be used
+ *                                          for subsequent dumps.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @new_map:     Non-NULL pointer to the new counter enable map. Must have the
+ *               same metadata as the accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+					 const struct kbase_hwcnt_enable_map *new_map,
+					 u64 *ts_start_ns, u64 *ts_end_ns,
+					 struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
+ *                                  counters.
+ * @accum:       Non-NULL pointer to the hardware counter accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+				 u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
+ *                                          timestamp.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * Return: Accumulator backend timestamp in nanoseconds.
+ */
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum);
+
+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
new file mode 100644
index 0000000..89732a9
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter context API.
+ */
+
+#ifndef _KBASE_HWCNT_CONTEXT_H_
+#define _KBASE_HWCNT_CONTEXT_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kbase_hwcnt_backend_interface;
+struct kbase_hwcnt_context;
+
+/**
+ * kbase_hwcnt_context_init() - Initialise a hardware counter context.
+ * @iface:    Non-NULL pointer to a hardware counter backend interface.
+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will
+ *            be stored on success.
+ *
+ * On creation, the disable count of the context will be 0.
+ * A hardware counter accumulator can be acquired using a created context.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+			     struct kbase_hwcnt_context **out_hctx);
+
+/**
+ * kbase_hwcnt_context_term() - Terminate a hardware counter context.
+ * @hctx: Pointer to context to be terminated.
+ */
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by
+ *                                  the context, so related counter data
+ *                                  structures can be created.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * If a call to this function increments the disable count from 0 to 1, and
+ * an accumulator has been acquired, then a counter dump will be performed
+ * before counters are disabled via the backend interface.
+ *
+ * Subsequent dumps via the accumulator while counters are disabled will first
+ * return the accumulated dump, then will return dumps with zeroed counters.
+ *
+ * After this function call returns, it is guaranteed that counters will not be
+ * enabled via the backend interface.
+ */
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
+ *                                        context if possible in an atomic
+ *                                        context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * This function will only succeed if hardware counters are effectively already
+ * disabled, i.e. there is no accumulator, the disable count is already
+ * non-zero, or the accumulator has no counters set.
+ *
+ * After this function call returns true, it is guaranteed that counters will
+ * not be enabled via the backend interface.
+ *
+ * Return: True if the disable count was incremented, else False.
+ */
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * If a call to this function decrements the disable count from 1 to 0, and
+ * an accumulator has been acquired, then counters will be re-enabled via the
+ * backend interface.
+ *
+ * If an accumulator has been acquired and enabling counters fails for some
+ * reason, the accumulator will be placed into an error state.
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_hwcnt_context_disable.
+ *
+ * The spinlock documented in the backend interface that was passed in to
+ * kbase_hwcnt_context_init() must be held before calling this function.
+ */
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async
+ *                                    work on a workqueue specialized for
+ *                                    hardware counters.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ * @work: Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * Performance counter related work is high priority, short running, and
+ * generally CPU locality is unimportant. There is no standard workqueue that
+ * can service this flavor of work.
+ *
+ * Rather than have each user of counters define their own workqueue, we have
+ * a centralized one in here that anybody using this hardware counter API
+ * should use.
+ *
+ * Before the context is destroyed, all work submitted must have been completed.
+ * Given that the work enqueued via this function is likely to be hardware
+ * counter related and will therefore use the context object, this is likely
+ * to be behavior that will occur naturally.
+ *
+ * Historical note: prior to this centralized workqueue, the system_highpri_wq
+ * was used. This was generally fine, except when a particularly long running,
+ * higher priority thread ended up scheduled on the enqueuing CPU core. Given
+ * that hardware counters requires tight integration with power management,
+ * this meant progress through the power management states could be stalled
+ * for however long that higher priority thread took.
+ */
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work);
+
+#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
new file mode 100644
index 0000000..74916da
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -0,0 +1,738 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/err.h>
+
+/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
+ */
+enum enable_map_idx {
+	EM_LO,
+	EM_HI,
+	EM_COUNT,
+};
+
+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		if (is_csf)
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
+		else
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf)
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
+		else
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		if (is_csf)
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
+		else
+			*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+{
+	switch (counter_set) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata
+ *                                              for the GPU.
+ * @gpu_info:      Non-NULL pointer to hwcnt info for current GPU.
+ * @is_csf:        true for CSF GPU, otherwise false.
+ * @counter_set:   The performance counter set to use.
+ * @metadata:      Non-NULL pointer to where created metadata is stored
+ *                 on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+						    const bool is_csf,
+						    enum kbase_hwcnt_set counter_set,
+						    const struct kbase_hwcnt_metadata **metadata)
+{
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description group;
+	struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+	size_t non_sc_block_count;
+	size_t sc_block_count;
+
+	WARN_ON(!gpu_info);
+	WARN_ON(!metadata);
+
+	/* Calculate number of block instances that aren't shader cores */
+	non_sc_block_count = 2 + gpu_info->l2_count;
+	/* Calculate number of block instances that are shader cores */
+	sc_block_count = fls64(gpu_info->core_mask);
+
+	/*
+	 * A system can have up to 64 shader cores, but the 64-bit
+	 * availability mask can't physically represent that many cores as well
+	 * as the other hardware blocks.
+	 * Error out if there are more blocks than our implementation can
+	 * support.
+	 */
+	if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+		return -EINVAL;
+
+	/* One Front End block */
+	kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
+	blks[0].inst_cnt = 1;
+	blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+	/* One Tiler block */
+	kbasep_get_tiler_block_type(&blks[1].type, counter_set);
+	blks[1].inst_cnt = 1;
+	blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+	/* l2_count memsys blks */
+	kbasep_get_memsys_block_type(&blks[2].type, counter_set);
+	blks[2].inst_cnt = gpu_info->l2_count;
+	blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+	/*
+	 * There are as many shader cores in the system as there are bits set in
+	 * the core mask. However, the dump buffer memory requirements need to
+	 * take into account the fact that the core mask may be non-contiguous.
+	 *
+	 * For example, a system with a core mask of 0b1011 has the same dump
+	 * buffer memory requirements as a system with 0b1111, but requires more
+	 * memory than a system with 0b0111. However, core 2 of the system with
+	 * 0b1011 doesn't physically exist, and the dump buffer memory that
+	 * accounts for that core will never be written to when we do a counter
+	 * dump.
+	 *
+	 * We find the core mask's last set bit to determine the memory
+	 * requirements, and embed the core mask into the availability mask so
+	 * we can determine later which shader cores physically exist.
+	 */
+	kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
+	blks[3].inst_cnt = sc_block_count;
+	blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+	blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+	WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+
+	group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+	group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
+	group.blks = blks;
+
+	desc.grp_cnt = 1;
+	desc.grps = &group;
+	desc.clk_cnt = gpu_info->clk_cnt;
+
+	/* The JM, Tiler, and L2s are always available, and are before cores */
+	desc.avail_mask = (1ull << non_sc_block_count) - 1;
+	/* Embed the core mask directly in the availability mask */
+	desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
+
+	return kbase_hwcnt_metadata_create(&desc, metadata);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the
+ *                                        GPU.
+ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU.
+ *
+ * Return: Size of buffer the GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
+{
+	WARN_ON(!gpu_info);
+
+	return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
+	       gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
+}
+
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+				   enum kbase_hwcnt_set counter_set,
+				   const struct kbase_hwcnt_metadata **out_metadata,
+				   size_t *out_dump_bytes)
+{
+	int errcode;
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_bytes;
+
+	if (!gpu_info || !out_metadata || !out_dump_bytes)
+		return -EINVAL;
+
+	/*
+	 * For architectures where a max_config interface is available
+	 * from the arbiter, the v5 dump bytes and the metadata v5 are
+	 * based on the maximum possible allocation of the HW in the
+	 * GPU cause it needs to be prepared for the worst case where
+	 * all the available L2 cache and Shader cores are allocated.
+	 */
+	dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
+	errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
+	if (errcode)
+		return errcode;
+
+	/*
+	 * The physical dump size should be half of dump abstraction size in
+	 * metadata since physical HW uses 32-bit per value but metadata
+	 * specifies 64-bit per value.
+	 */
+	WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
+
+	*out_metadata = metadata;
+	*out_dump_bytes = dump_bytes;
+
+	return 0;
+}
+
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	if (!metadata)
+		return;
+
+	kbase_hwcnt_metadata_destroy(metadata);
+}
+
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+				    enum kbase_hwcnt_set counter_set,
+				    const struct kbase_hwcnt_metadata **out_metadata)
+{
+	int errcode;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!gpu_info || !out_metadata)
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
+	if (errcode)
+		return errcode;
+
+	*out_metadata = metadata;
+
+	return 0;
+}
+
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	if (!metadata)
+		return;
+
+	kbase_hwcnt_metadata_destroy(metadata);
+}
+
+static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
+{
+	bool is_shader = false;
+
+	/* Warn on unknown group type */
+	if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
+		return false;
+
+	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
+	    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
+		is_shader = true;
+
+	return is_shader;
+}
+
+static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
+{
+	bool is_l2_cache = false;
+
+	switch (grp_type) {
+	case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+		if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
+		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
+		    blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
+			is_l2_cache = true;
+		break;
+	default:
+		/* Warn on unknown group type */
+		WARN_ON(true);
+	}
+
+	return is_l2_cache;
+}
+
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+			    const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
+			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+	const u64 *dump_src = src;
+	size_t src_offset = 0;
+	u64 core_mask = pm_core_mask;
+
+	/* Variables to deal with the current configuration */
+	int l2_count = 0;
+
+	if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+		const size_t ctr_cnt =
+			kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+		const bool is_shader_core = is_block_type_shader(
+			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
+		const bool is_l2_cache = is_block_type_l2_cache(
+			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+		const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+		bool hw_res_available = true;
+
+		/*
+		 * If l2 blocks is greater than the current allocated number of
+		 * L2 slices, there is no hw allocated to that block.
+		 */
+		if (is_l2_cache) {
+			l2_count++;
+			if (l2_count > curr_config->num_l2_slices)
+				hw_res_available = false;
+			else
+				hw_res_available = true;
+		}
+		/*
+		 * For the shader cores, the current shader_mask allocated is
+		 * always a subgroup of the maximum shader_mask, so after
+		 * jumping any L2 cache not available the available shader cores
+		 * will always have a matching set of blk instances available to
+		 * accumulate them.
+		 */
+		else
+			hw_res_available = true;
+
+		/*
+		 * Skip block if no values in the destination block are enabled.
+		 */
+		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+			u64 *dst_blk =
+				kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+			const u64 *src_blk = dump_src + src_offset;
+			bool blk_powered;
+
+			if (!is_shader_core) {
+				/* Under the current PM system, counters will
+				 * only be enabled after all non shader core
+				 * blocks are powered up.
+				 */
+				blk_powered = true;
+			} else {
+				/* Check the PM core mask to see if the shader
+				 * core is powered up.
+				 */
+				blk_powered = core_mask & 1;
+			}
+
+			if (blk_powered && !is_undefined && hw_res_available) {
+				/* Only powered and defined blocks have valid data. */
+				if (accumulate) {
+					kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+										 hdr_cnt, ctr_cnt);
+				} else {
+					kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+									   (hdr_cnt + ctr_cnt));
+				}
+			} else {
+				/* Even though the block might be undefined, the
+				 * user has enabled counter collection for it.
+				 * We should not propagate garbage data.
+				 */
+				if (accumulate) {
+					/* No-op to preserve existing values */
+				} else {
+					/* src is garbage, so zero the dst */
+					kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+									   (hdr_cnt + ctr_cnt));
+				}
+			}
+		}
+
+		/* Just increase the src_offset if the HW is available */
+		if (hw_res_available)
+			src_offset += (hdr_cnt + ctr_cnt);
+		if (is_shader_core)
+			core_mask = core_mask >> 1;
+	}
+
+	return 0;
+}
+
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+			     const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	const u64 *dump_src = src;
+	size_t src_offset = 0;
+	size_t grp, blk, blk_inst;
+
+	if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
+		return -EINVAL;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+		const size_t ctr_cnt =
+			kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+		const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+		const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+			kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+
+		/*
+		 * Skip block if no values in the destination block are enabled.
+		 */
+		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+			u64 *dst_blk =
+				kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+			const u64 *src_blk = dump_src + src_offset;
+
+			if (!is_undefined) {
+				if (accumulate) {
+					kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+										 hdr_cnt, ctr_cnt);
+				} else {
+					kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+									   (hdr_cnt + ctr_cnt));
+				}
+			} else {
+				/* Even though the block might be undefined, the
+				 * user has enabled counter collection for it.
+				 * We should not propagate garbage data.
+				 */
+				if (accumulate) {
+					/* No-op to preserve existing values */
+				} else {
+					/* src is garbage, so zero the dst */
+					kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+									   (hdr_cnt + ctr_cnt));
+				}
+			}
+		}
+
+		src_offset += (hdr_cnt + ctr_cnt);
+	}
+
+	return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
+ *                                                      block enable map to a
+ *                                                      block enable map
+ *                                                      abstraction.
+ * @phys: Physical 32-bit block enable map
+ * @lo:   Non-NULL pointer to where low 64 bits of block enable map abstraction
+ *        will be stored.
+ * @hi:   Non-NULL pointer to where high 64 bits of block enable map abstraction
+ *        will be stored.
+ */
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
+{
+	u64 dwords[2] = { 0, 0 };
+
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u16 packed = phys >> (16 * dword_idx);
+		u64 dword = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u64 mask = (packed >> (hword_bit)) & 0x1;
+
+			dword |= mask << (dword_bit + 0);
+			dword |= mask << (dword_bit + 1);
+			dword |= mask << (dword_bit + 2);
+			dword |= mask << (dword_bit + 3);
+		}
+		dwords[dword_idx] = dword;
+	}
+	*lo = dwords[0];
+	*hi = dwords[1];
+}
+
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+					    const struct kbase_hwcnt_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 fe_bm[EM_COUNT] = { 0 };
+	u64 shader_bm[EM_COUNT] = { 0 };
+	u64 tiler_bm[EM_COUNT] = { 0 };
+	u64 mmu_l2_bm[EM_COUNT] = { 0 };
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = src->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+		const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
+
+		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+					/* Nothing to do in this case. */
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+					fe_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+					tiler_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+					shader_bm[map_idx] |= blk_map[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+					mmu_l2_bm[map_idx] |= blk_map[map_idx];
+					break;
+				default:
+					WARN_ON(true);
+				}
+			}
+		} else {
+			WARN_ON(true);
+		}
+	}
+
+	dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]);
+	dst->shader_bm =
+		kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]);
+	dst->tiler_bm =
+		kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
+	dst->mmu_l2_bm =
+		kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
+}
+
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
+{
+	switch (src) {
+	case KBASE_HWCNT_SET_PRIMARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
+		break;
+	case KBASE_HWCNT_SET_SECONDARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
+		break;
+	case KBASE_HWCNT_SET_TERTIARY:
+		*dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
+		break;
+	default:
+		WARN_ON(true);
+	}
+}
+
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+					      const struct kbase_hwcnt_physical_enable_map *src)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+
+	u64 fe_bm[EM_COUNT] = { 0 };
+	u64 shader_bm[EM_COUNT] = { 0 };
+	u64 tiler_bm[EM_COUNT] = { 0 };
+	u64 mmu_l2_bm[EM_COUNT] = { 0 };
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!src) || WARN_ON(!dst))
+		return;
+
+	metadata = dst->metadata;
+
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
+							 &shader_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
+							 &tiler_bm[EM_HI]);
+	kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
+							 &mmu_l2_bm[EM_HI]);
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+		const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+		u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+
+		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+					/* Nothing to do in this case. */
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+					blk_map[map_idx] = fe_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+					blk_map[map_idx] = tiler_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+					blk_map[map_idx] = shader_bm[map_idx];
+					break;
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+				case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+					blk_map[map_idx] = mmu_l2_bm[map_idx];
+					break;
+				default:
+					WARN_ON(true);
+				}
+			}
+		} else {
+			WARN_ON(true);
+		}
+	}
+}
+
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+					const struct kbase_hwcnt_enable_map *enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
+		return;
+
+	metadata = buf->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+		u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
+		const u64 *blk_map =
+			kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+
+		if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+			const size_t map_stride =
+				kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+			u64 prfcnt_bm[EM_COUNT] = { 0 };
+			u32 prfcnt_en = 0;
+			size_t map_idx;
+
+			for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+				if (WARN_ON(map_idx >= EM_COUNT))
+					break;
+
+				prfcnt_bm[map_idx] = blk_map[map_idx];
+			}
+
+			prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
+										  prfcnt_bm[EM_HI]);
+
+			buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
+		} else {
+			WARN_ON(true);
+		}
+	}
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
new file mode 100644
index 0000000..a49c31e
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_H_
+#define _KBASE_HWCNT_GPU_H_
+
+#include <linux/bug.h>
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/* Hardware counter version 5 definitions, V5 is the only supported version. */
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK                                                    \
+	(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
+
+/* FrontEnd block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
+/* Tiler block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1
+
+/* Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
+/* Number of bytes for each counter value in hardware. */
+#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
+
+/**
+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
+ *                                   identify metadata groups.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
+ */
+enum kbase_hwcnt_gpu_group_type {
+	KBASE_HWCNT_GPU_GROUP_TYPE_V5,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
+ *                                      used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:        Front End block (Job manager
+ *                                                or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:       Secondary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:       Tertiary Front End block (Job
+ *                                                manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block
+ *                                                   (e.g. if a counter set that
+ *                                                   a block doesn't support is
+ *                                                   used).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:     Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:        Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:       Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:       Tertiary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:    Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:   Secondary Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block.
+ */
+enum kbase_hwcnt_gpu_v5_block_type {
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+	KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED,
+};
+
+/**
+ * enum kbase_hwcnt_set - GPU hardware counter sets
+ * @KBASE_HWCNT_SET_PRIMARY:   The Primary set of counters
+ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
+ * @KBASE_HWCNT_SET_TERTIARY:  The Tertiary set of counters
+ * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters
+ */
+enum kbase_hwcnt_set {
+	KBASE_HWCNT_SET_PRIMARY,
+	KBASE_HWCNT_SET_SECONDARY,
+	KBASE_HWCNT_SET_TERTIARY,
+	KBASE_HWCNT_SET_UNDEFINED = 255,
+};
+
+/**
+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map
+ *                                          directly used by GPU.
+ * @fe_bm:     Front end (JM/CSHW) counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm:  Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ */
+struct kbase_hwcnt_physical_enable_map {
+	u32 fe_bm;
+	u32 shader_bm;
+	u32 tiler_bm;
+	u32 mmu_l2_bm;
+};
+
+/*
+ * Values for Hardware Counter SET_SELECT value.
+ * Directly passed to HW.
+ */
+enum kbase_hwcnt_physical_set {
+	KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
+	KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
+	KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
+};
+
+/**
+ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
+ * @l2_count:                L2 cache count.
+ * @core_mask:               Shader core mask. May be sparse.
+ * @clk_cnt:                 Number of clock domains available.
+ * @prfcnt_values_per_block: Total entries (header + counters) of performance
+ *                           counter per block.
+ */
+struct kbase_hwcnt_gpu_info {
+	size_t l2_count;
+	u64 core_mask;
+	u8 clk_cnt;
+	size_t prfcnt_values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
+ *                                  GPU.
+ * @num_l2_slices:  Current number of L2 slices allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ *
+ * For architectures with the max_config interface available from the Arbiter,
+ * the current resources allocated may change during runtime due to a
+ * re-partitioning (possible with partition manager). Thus, the HWC needs to be
+ * prepared to report any possible set of counters. For this reason the memory
+ * layout in the userspace is based on the maximum possible allocation. On the
+ * other hand, each partition has just the view of its currently allocated
+ * resources. Therefore, it is necessary to correctly map the dumped HWC values
+ * from the registers into this maximum memory layout so that it can be exposed
+ * to the userspace side correctly.
+ *
+ * For L2 cache just the number is enough once the allocated ones will be
+ * accumulated on the first L2 slots available in the destination buffer.
+ *
+ * For the correct mapping of the shader cores it is necessary to jump all the
+ * L2 cache slots in the destination buffer that are not allocated. But, it is
+ * not necessary to add any logic to map the shader cores bitmap into the memory
+ * layout because the shader_present allocated will always be a subset of the
+ * maximum shader_present. It is possible because:
+ * 1 - Partitions are made of slices and they are always ordered from the ones
+ *     with more shader cores to the ones with less.
+ * 2 - The shader cores in a slice are always contiguous.
+ * 3 - A partition can only have a contiguous set of slices allocated to it.
+ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
+ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
+ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
+ *                          contiguous in any slice.
+ * Supposing that a partition takes the two slices in the middle, the current
+ * config shader_present for this partition would be:
+ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
+ *                are contiguous.
+ * Therefore, by directly copying any subset of the maximum possible
+ * shader_present the mapping is already achieved.
+ */
+struct kbase_hwcnt_curr_config {
+	size_t num_l2_slices;
+	u64 shader_present;
+};
+
+/**
+ * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined.
+ *
+ * @grp_type: Hardware counter group type.
+ * @blk_type: Hardware counter block type.
+ *
+ * Return: true if the block type is undefined, else false.
+ */
+static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
+						       const uint64_t blk_type)
+{
+	/* Warn on unknown group type */
+	if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
+		return false;
+
+	return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED ||
+		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED ||
+		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED ||
+		blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED);
+}
+
+/**
+ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
+ *                                    JM GPUs.
+ * @info:           Non-NULL pointer to info struct.
+ * @counter_set:    The performance counter set used.
+ * @out_metadata:   Non-NULL pointer to where created metadata is stored on
+ *                  success.
+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
+ *                  buffer is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+				   enum kbase_hwcnt_set counter_set,
+				   const struct kbase_hwcnt_metadata **out_metadata,
+				   size_t *out_dump_bytes);
+
+/**
+ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
+ *
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
+ *                                     CSF GPUs.
+ * @info:           Non-NULL pointer to info struct.
+ * @counter_set:    The performance counter set used.
+ * @out_metadata:   Non-NULL pointer to where created metadata is stored on
+ *                  success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+				    enum kbase_hwcnt_set counter_set,
+				    const struct kbase_hwcnt_metadata **out_metadata);
+
+/**
+ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
+ *                                      metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
+ *                             dump buffer in src into the dump buffer
+ *                             abstraction in dst.
+ * @dst:            Non-NULL pointer to destination dump buffer.
+ * @src:            Non-NULL pointer to source raw dump buffer, of same length
+ *                  as dump_buf_bytes in the metadata of destination dump
+ *                  buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask:   PM state synchronized shaders core mask with the dump.
+ * @curr_config:    Current allocated hardware resources to correctly map the
+ *                  source raw dump buffer to the destination dump buffer.
+ * @accumulate:     True if counters in source should be accumulated into
+ *                  destination, rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+			    const struct kbase_hwcnt_enable_map *dst_enable_map,
+			    const u64 pm_core_mask,
+			    const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
+
+/**
+ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
+ *                              dump buffer in src into the dump buffer
+ *                              abstraction in dst.
+ * @dst:            Non-NULL pointer to destination dump buffer.
+ * @src:            Non-NULL pointer to source raw dump buffer, of same length
+ *                  as dump_buf_bytes in the metadata of dst dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate:     True if counters in src should be accumulated into
+ *                  destination, rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+			     const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
+
+/**
+ * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ *                                                   enable map abstraction to
+ *                                                   a physical block enable
+ *                                                   map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
+{
+	u32 phys = 0;
+	u64 dwords[2] = { lo, hi };
+	size_t dword_idx;
+
+	for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+		const u64 dword = dwords[dword_idx];
+		u16 packed = 0;
+
+		size_t hword_bit;
+
+		for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+			const size_t dword_bit = hword_bit * 4;
+			const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) |
+					 ((dword >> (dword_bit + 1)) & 0x1) |
+					 ((dword >> (dword_bit + 2)) & 0x1) |
+					 ((dword >> (dword_bit + 3)) & 0x1);
+			packed |= (mask << hword_bit);
+		}
+		phys |= ((u32)packed) << (16 * dword_idx);
+	}
+	return phys;
+}
+
+/**
+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
+ *                                            into a physical enable map.
+ * @dst: Non-NULL pointer to destination physical enable map.
+ * @src: Non-NULL pointer to source enable map abstraction.
+ *
+ * The src must have been created from a metadata returned from a call to
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This is a lossy conversion, as the enable map abstraction has one bit per
+ * individual counter block value, but the physical enable map uses 1 bit for
+ * every 4 counters, shared over all instances of a block.
+ */
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+					    const struct kbase_hwcnt_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
+ *                                     SET_SELECT value.
+ *
+ * @dst: Non-NULL pointer to destination physical SET_SELECT value.
+ * @src: Non-NULL pointer to source counter set selection.
+ */
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
+ *                                              an enable map abstraction.
+ * @dst: Non-NULL pointer to destination enable map abstraction.
+ * @src: Non-NULL pointer to source physical enable map.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This is a lossy conversion, as the physical enable map can technically
+ * support counter blocks with 128 counters each, but no hardware actually uses
+ * more than 64, so the enable map abstraction has nowhere to store the enable
+ * information for the 64 non-existent counters.
+ */
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+					      const struct kbase_hwcnt_physical_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
+ *                                        enable headers in a dump buffer to
+ *                                        reflect the specified enable map.
+ * @buf:        Non-NULL pointer to dump buffer to patch.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * The buf and enable_map must have been created from a metadata returned from
+ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This function should be used before handing off a dump buffer over the
+ * kernel-user boundary, to ensure the header is accurate for the enable map
+ * used by the user.
+ */
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+					const struct kbase_hwcnt_enable_map *enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
new file mode 100644
index 0000000..0cf2f94
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+					   const struct kbase_hwcnt_metadata *src_md)
+{
+	struct kbase_hwcnt_description desc;
+	struct kbase_hwcnt_group_description group;
+	struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+	size_t prfcnt_values_per_block;
+	size_t blk;
+	int err;
+	struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+
+	if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
+	    !src_md->grp_metadata[0].blk_metadata)
+		return -EINVAL;
+
+	/* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
+	 * count in the metadata.
+	 */
+	if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
+	    (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
+		return -EINVAL;
+
+	/* Get the values count in the first block. */
+	prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
+
+	/* check all blocks should have same values count. */
+	for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
+		if (val_cnt != prfcnt_values_per_block)
+			return -EINVAL;
+	}
+
+	/* Only support 64 and 128 entries per block. */
+	if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
+		return -EINVAL;
+
+	metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
+	if (!metadata_narrow)
+		return -ENOMEM;
+
+	/* Narrow to 64 entries per block to keep API backward compatibility. */
+	prfcnt_values_per_block = 64;
+
+	for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+		size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
+		blks[blk] = (struct kbase_hwcnt_block_description){
+			.type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
+			.inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
+			.hdr_cnt = blk_hdr_cnt,
+			.ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
+		};
+	}
+
+	group = (struct kbase_hwcnt_group_description){
+		.type = kbase_hwcnt_metadata_group_type(src_md, 0),
+		.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
+		.blks = blks,
+	};
+
+	desc = (struct kbase_hwcnt_description){
+		.grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
+		.avail_mask = src_md->avail_mask,
+		.clk_cnt = src_md->clk_cnt,
+		.grps = &group,
+	};
+
+	err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
+	if (!err) {
+		/* Narrow down the buffer size to half as the narrowed metadata
+		 * only supports 32-bit but the created metadata uses 64-bit for
+		 * block entry.
+		 */
+		metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
+		*dst_md_narrow = metadata_narrow;
+	} else {
+		kfree(metadata_narrow);
+	}
+
+	return err;
+}
+
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+	if (!md_narrow)
+		return;
+
+	kbase_hwcnt_metadata_destroy(md_narrow->metadata);
+	kfree(md_narrow);
+}
+
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+					 struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
+{
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
+	u8 *buf;
+
+	if (!md_narrow || !dump_buf)
+		return -EINVAL;
+
+	dump_buf_bytes = md_narrow->dump_buf_bytes;
+	clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+	/* Make a single allocation for both dump_buf and clk_cnt_buf. */
+	buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	*dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
+		.md_narrow = md_narrow,
+		.dump_buf = (u32 *)buf,
+		.clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
+	};
+
+	return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
+{
+	if (!dump_buf_narrow)
+		return;
+
+	kfree(dump_buf_narrow->dump_buf);
+	*dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
+								    .dump_buf = NULL,
+								    .clk_cnt_buf = NULL };
+}
+
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+	const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+	struct kbase_hwcnt_dump_buffer_narrow *buffers;
+	size_t buf_idx;
+	unsigned int order;
+	unsigned long addr;
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
+	size_t total_dump_buf_size;
+
+	if (!md_narrow || !dump_bufs)
+		return -EINVAL;
+
+	dump_buf_bytes = md_narrow->dump_buf_bytes;
+	clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+	/* Allocate memory for the dump buffer struct array */
+	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers)
+		return -ENOMEM;
+
+	/* Allocate pages for the actual dump buffers, as they tend to be fairly
+	 * large.
+	 */
+	order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+	if (!addr) {
+		kfree(buffers);
+		return -ENOMEM;
+	}
+
+	*dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
+		.page_addr = addr,
+		.page_order = order,
+		.buf_cnt = n,
+		.bufs = buffers,
+	};
+
+	total_dump_buf_size = dump_buf_bytes * n;
+	/* Set the buffer of each dump buf */
+	for (buf_idx = 0; buf_idx < n; buf_idx++) {
+		const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+		const size_t clk_cnt_buf_offset =
+			total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
+
+		buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
+			.md_narrow = md_narrow,
+			.dump_buf = (u32 *)(addr + dump_buf_offset),
+			.clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
+		};
+	}
+
+	return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+	if (!dump_bufs)
+		return;
+
+	kfree(dump_bufs->bufs);
+	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+	memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+						      const u64 *blk_em, size_t val_cnt)
+{
+	size_t val;
+
+	for (val = 0; val < val_cnt; val++) {
+		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+		u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
+
+		dst_blk[val] = val_enabled ? src_val : 0;
+	}
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+						const struct kbase_hwcnt_dump_buffer *src,
+						const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+	size_t grp;
+	size_t clk;
+
+	if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
+	    WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
+	    WARN_ON(src->metadata->grp_cnt != 1) ||
+	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+		    src->metadata->grp_metadata[0].blk_cnt) ||
+	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+		    KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
+	    WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
+		    src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
+		return;
+
+	/* Don't use src metadata since src buffer is bigger than dst buffer. */
+	metadata_narrow = dst_narrow->md_narrow;
+
+	for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
+		size_t blk;
+		size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
+
+		for (blk = 0; blk < blk_cnt; blk++) {
+			size_t blk_inst;
+			size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
+				metadata_narrow, grp, blk);
+
+			for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
+				/* The narrowed down buffer is only 32-bit. */
+				u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
+					dst_narrow, grp, blk, blk_inst);
+				const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+					src, grp, blk, blk_inst);
+				const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+					dst_enable_map, grp, blk, blk_inst);
+				size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
+					metadata_narrow, grp, blk);
+				/* Align upwards to include padding bytes */
+				val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+					val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+						  KBASE_HWCNT_VALUE_BYTES));
+
+				kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
+										 blk_em, val_cnt);
+			}
+		}
+	}
+
+	for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
+		bool clk_enabled =
+			kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
+
+		dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+	}
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
new file mode 100644
index 0000000..afd236d
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_NARROW_H_
+#define _KBASE_HWCNT_GPU_NARROW_H_
+
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical
+ *                                      layout of narrow dump buffers.
+ *                                      For backward compatibility, the narrow
+ *                                      metadata only supports 64 counters per
+ *                                      block and 32-bit per block entry.
+ * @metadata:       Non-NULL pointer to the metadata before narrow down to
+ *                  32-bit per block entry, it has 64 counters per block and
+ *                  64-bit per value.
+ * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block
+ *                  entry.
+ */
+struct kbase_hwcnt_metadata_narrow {
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t dump_buf_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer.
+ * @md_narrow:   Non-NULL pointer to narrow metadata used to identify, and to
+ *               describe the layout of the narrow dump buffer.
+ * @dump_buf:    Non-NULL pointer to an array of u32 values, the array size
+ *               is md_narrow->dump_buf_bytes.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ *               for each clock domain.
+ */
+struct kbase_hwcnt_dump_buffer_narrow {
+	const struct kbase_hwcnt_metadata_narrow *md_narrow;
+	u32 *dump_buf;
+	u64 *clk_cnt_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump
+ *                                               buffer array.
+ * @page_addr:  Address of first allocated page. A single allocation is used for
+ *              all narrow dump buffers in the array.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ *              scale.
+ * @buf_cnt:    The number of allocated dump buffers.
+ * @bufs:       Non-NULL pointer to the array of narrow dump buffer descriptors.
+ */
+struct kbase_hwcnt_dump_buffer_narrow_array {
+	unsigned long page_addr;
+	unsigned int page_order;
+	size_t buf_cnt;
+	struct kbase_hwcnt_dump_buffer_narrow *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from
+ *                                             narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ *
+ * Return: Number of hardware counter groups described by narrow metadata.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+	return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group
+ *                                            from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:      Index of the group in the narrow metadata.
+ *
+ * Return: Type of the group grp.
+ */
+static inline u64
+kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+				       size_t grp)
+{
+	return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a
+ *                                             group from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:       Index of the group in the narrow metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+					size_t grp)
+{
+	return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of
+ *                                                      instances of a block
+ *                                                      from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:       Index of the group in the narrow metadata.
+ * @blk:       Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
+	const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
+{
+	return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter
+ *                                                     headers from narrow
+ *                                                     metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:       Index of the group in the narrow metadata.
+ * @blk:       Index of the block in the group.
+ *
+ * Return: Number of counter headers in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+						size_t grp, size_t blk)
+{
+	return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of
+ *                                                      counters from narrow
+ *                                                      metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:       Index of the group in the narrow metadata.
+ * @blk:       Index of the block in the group.
+ *
+ * Return: Number of counters in each instance of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
+	const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
+{
+	return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values
+ *                                                    from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp:       Index of the group in the narrow metadata.
+ * @blk:       Index of the block in the group.
+ *
+ * Return: Number of headers plus counters in each instance of block blk
+ *         in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+					       size_t grp, size_t blk)
+{
+	return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
+	       kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a
+ *                                                   narrowed block instance's
+ *                                                   dump buffer.
+ * @buf:      Non-NULL pointer to narrow dump buffer.
+ * @grp:      Index of the group in the narrow metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u32* to the dump buffer for the block instance.
+ */
+static inline u32 *
+kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
+					      size_t grp, size_t blk, size_t blk_inst)
+{
+	return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
+	       buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+	       (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
+		blk_inst);
+}
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC
+ *                                            entries per block truncated to
+ *                                            64 entries and block entry size
+ *                                            narrowed down to 32-bit.
+ *
+ * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored
+ *                 on success.
+ * @src_md:        Non-NULL pointer to the HWC metadata used as the source to
+ *                 create dst_md_narrow.
+ *
+ * For backward compatibility of the interface to user clients, a new metadata
+ * with entries per block truncated to 64 and block entry size narrowed down
+ * to 32-bit will be created for dst_md_narrow.
+ * The total entries per block in src_md must be 64 or 128, if it's other
+ * values, function returns error since it's not supported.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+					   const struct kbase_hwcnt_metadata *src_md);
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
+ *                                             metadata object.
+ * @md_narrow: Pointer to hardware counter narrow metadata.
+ */
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @dump_buf:  Non-NULL pointer to narrow dump buffer to be initialised. Will be
+ *             initialised to undefined values, so must be used as a copy
+ *             destination, or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+					 struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer structure, or on an
+ * already freed narrow dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
+ *                                                dump buffers.
+ * @md_narrow:  Non-NULL pointer to narrow metadata.
+ * @n:          Number of narrow dump buffers to allocate
+ * @dump_bufs:  Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array
+ *              object to be initialised.
+ *
+ * A single zeroed contiguous page allocation will be used for all of the
+ * buffers inside the object, where:
+ * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+	const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer
+ *                                               array.
+ * @dump_bufs: Narrow Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer array structure, or
+ * on an already freed narrow dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+	struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block
+ *                                                      values from source to
+ *                                                      destination.
+ * @dst_blk: Non-NULL pointer to destination block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_narrow_block_instance.
+ * @src_blk: Non-NULL pointer to source block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in destination will be zero, the enabled
+ * values in destination will be saturated at U32_MAX if the corresponding
+ * source value is bigger than U32_MAX, or copy the value from source if the
+ * corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+						      const u64 *blk_em, size_t val_cnt);
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
+ *                                                narrow dump buffer.
+ * @dst_narrow:     Non-NULL pointer to destination dump buffer.
+ * @src:            Non-NULL pointer to source dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * After the operation, all non-enabled values (including padding bytes) will be
+ * zero. Slower than the non-strict variant.
+ *
+ * The enabled values in dst_narrow will be saturated at U32_MAX if the
+ * corresponding source value is bigger than U32_MAX, or copy the value from
+ * source if the corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+						const struct kbase_hwcnt_dump_buffer *src,
+						const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
new file mode 100644
index 0000000..763eb31
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/slab.h>
+
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+				const struct kbase_hwcnt_metadata **out_metadata)
+{
+	char *buf;
+	struct kbase_hwcnt_metadata *metadata;
+	struct kbase_hwcnt_group_metadata *grp_mds;
+	size_t grp;
+	size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
+	size_t dump_buf_count; /* Number of u64 values (inc padding) */
+	size_t avail_mask_bits; /* Number of availability mask bits */
+
+	size_t size;
+	size_t offset;
+
+	if (!desc || !out_metadata)
+		return -EINVAL;
+
+	/* The maximum number of clock domains is 64. */
+	if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE))
+		return -EINVAL;
+
+	/* Calculate the bytes needed to tightly pack the metadata */
+
+	/* Top level metadata */
+	size = 0;
+	size += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Group metadata */
+	size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	/* Block metadata */
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
+	}
+
+	/* Single allocation for the entire metadata */
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Use the allocated memory for the metadata and its members */
+
+	/* Bump allocate the top level metadata */
+	offset = 0;
+	metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_metadata);
+
+	/* Bump allocate the group metadata */
+	grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
+	offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+	enable_map_count = 0;
+	dump_buf_count = 0;
+	avail_mask_bits = 0;
+
+	for (grp = 0; grp < desc->grp_cnt; grp++) {
+		size_t blk;
+
+		const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
+		struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
+
+		size_t group_enable_map_count = 0;
+		size_t group_dump_buffer_count = 0;
+		size_t group_avail_mask_bits = 0;
+
+		/* Bump allocate this group's block metadata */
+		struct kbase_hwcnt_block_metadata *blk_mds =
+			(struct kbase_hwcnt_block_metadata *)(buf + offset);
+		offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
+
+		/* Fill in each block in the group's information */
+		for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
+			const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
+			struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
+			const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+			blk_md->type = blk_desc->type;
+			blk_md->inst_cnt = blk_desc->inst_cnt;
+			blk_md->hdr_cnt = blk_desc->hdr_cnt;
+			blk_md->ctr_cnt = blk_desc->ctr_cnt;
+			blk_md->enable_map_index = group_enable_map_count;
+			blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
+			blk_md->dump_buf_index = group_dump_buffer_count;
+			blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
+				n_values,
+				(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+			blk_md->avail_mask_index = group_avail_mask_bits;
+
+			group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
+			group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
+			group_avail_mask_bits += blk_md->inst_cnt;
+		}
+
+		/* Fill in the group's information */
+		grp_md->type = grp_desc->type;
+		grp_md->blk_cnt = grp_desc->blk_cnt;
+		grp_md->blk_metadata = blk_mds;
+		grp_md->enable_map_index = enable_map_count;
+		grp_md->dump_buf_index = dump_buf_count;
+		grp_md->avail_mask_index = avail_mask_bits;
+
+		enable_map_count += group_enable_map_count;
+		dump_buf_count += group_dump_buffer_count;
+		avail_mask_bits += group_avail_mask_bits;
+	}
+
+	/* Fill in the top level metadata's information */
+	metadata->grp_cnt = desc->grp_cnt;
+	metadata->grp_metadata = grp_mds;
+	metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+	metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
+	metadata->avail_mask = desc->avail_mask;
+	metadata->clk_cnt = desc->clk_cnt;
+
+	WARN_ON(size != offset);
+	/* Due to the block alignment, there should be exactly one enable map
+	 * bit per 4 bytes in the dump buffer.
+	 */
+	WARN_ON(metadata->dump_buf_bytes !=
+		(metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+
+	*out_metadata = metadata;
+	return 0;
+}
+
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+	kfree(metadata);
+}
+
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+				 struct kbase_hwcnt_enable_map *enable_map)
+{
+	u64 *enable_map_buf;
+
+	if (!metadata || !enable_map)
+		return -EINVAL;
+
+	if (metadata->enable_map_bytes > 0) {
+		enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+		if (!enable_map_buf)
+			return -ENOMEM;
+	} else {
+		enable_map_buf = NULL;
+	}
+
+	enable_map->metadata = metadata;
+	enable_map->hwcnt_enable_map = enable_map_buf;
+	return 0;
+}
+
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
+{
+	if (!enable_map)
+		return;
+
+	kfree(enable_map->hwcnt_enable_map);
+	enable_map->hwcnt_enable_map = NULL;
+	enable_map->metadata = NULL;
+}
+
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+				  struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
+	u8 *buf;
+
+	if (!metadata || !dump_buf)
+		return -EINVAL;
+
+	dump_buf_bytes = metadata->dump_buf_bytes;
+	clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+
+	/* Make a single allocation for both dump_buf and clk_cnt_buf. */
+	buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	dump_buf->metadata = metadata;
+	dump_buf->dump_buf = (u64 *)buf;
+	dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
+
+	return 0;
+}
+
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	if (!dump_buf)
+		return;
+
+	kfree(dump_buf->dump_buf);
+	memset(dump_buf, 0, sizeof(*dump_buf));
+}
+
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+					struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	struct kbase_hwcnt_dump_buffer *buffers;
+	size_t buf_idx;
+	unsigned int order;
+	unsigned long addr;
+	size_t dump_buf_bytes;
+	size_t clk_cnt_buf_bytes;
+
+	if (!metadata || !dump_bufs)
+		return -EINVAL;
+
+	dump_buf_bytes = metadata->dump_buf_bytes;
+	clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
+
+	/* Allocate memory for the dump buffer struct array */
+	buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+	if (!buffers)
+		return -ENOMEM;
+
+	/* Allocate pages for the actual dump buffers, as they tend to be fairly
+	 * large.
+	 */
+	order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+	addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+	if (!addr) {
+		kfree(buffers);
+		return -ENOMEM;
+	}
+
+	dump_bufs->page_addr = addr;
+	dump_bufs->page_order = order;
+	dump_bufs->buf_cnt = n;
+	dump_bufs->bufs = buffers;
+
+	/* Set the buffer of each dump buf */
+	for (buf_idx = 0; buf_idx < n; buf_idx++) {
+		const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+		const size_t clk_cnt_buf_offset =
+			(dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
+
+		buffers[buf_idx].metadata = metadata;
+		buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
+		buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
+	}
+
+	return 0;
+}
+
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+	if (!dump_bufs)
+		return;
+
+	kfree(dump_bufs->bufs);
+	free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+	memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+				  const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+	}
+
+	memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
+}
+
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
+{
+	if (WARN_ON(!dst))
+		return;
+
+	memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+
+	memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
+}
+
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+					      const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		const u64 *blk_em =
+			kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+			val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+
+		if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
+			/* Block available, so only zero non-enabled values */
+			kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
+		} else {
+			/* Block not available, so zero the entire thing */
+			kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+		}
+	}
+}
+
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+				  const struct kbase_hwcnt_dump_buffer *src,
+				  const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+	size_t clk;
+
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk;
+		const u64 *src_blk;
+		size_t val_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+		val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+	{
+		if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
+	}
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+					 const struct kbase_hwcnt_dump_buffer *src,
+					 const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+	size_t clk;
+
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		const u64 *src_blk =
+			kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+		const u64 *blk_em =
+			kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+		size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+			val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+
+		kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
+	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+	{
+		bool clk_enabled =
+			kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
+
+		dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+	}
+}
+
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+					const struct kbase_hwcnt_dump_buffer *src,
+					const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+	size_t clk;
+
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk;
+		const u64 *src_blk;
+		size_t hdr_cnt;
+		size_t ctr_cnt;
+
+		if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+			continue;
+
+		dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+		hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+		ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+
+		kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
+	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+	{
+		if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+	}
+}
+
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+					       const struct kbase_hwcnt_dump_buffer *src,
+					       const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+	const struct kbase_hwcnt_metadata *metadata;
+	size_t grp, blk, blk_inst;
+	size_t clk;
+
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+	    WARN_ON(dst->metadata != src->metadata) ||
+	    WARN_ON(dst->metadata != dst_enable_map->metadata))
+		return;
+
+	metadata = dst->metadata;
+
+	kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+	{
+		u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+		const u64 *src_blk =
+			kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+		const u64 *blk_em =
+			kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+		size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+		size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+		/* Align upwards to include padding bytes */
+		ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+			hdr_cnt + ctr_cnt,
+			(KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+
+		kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
+								ctr_cnt);
+	}
+
+	kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+	{
+		if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+			dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+		else
+			dst->clk_cnt_buf[clk] = 0;
+	}
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
new file mode 100644
index 0000000..5c5ada4
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
@@ -0,0 +1,1231 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter types.
+ * Contains structures for describing the physical layout of hardware counter
+ * dump buffers and enable maps within a system.
+ *
+ * Also contains helper functions for manipulation of these dump buffers and
+ * enable maps.
+ *
+ * Through use of these structures and functions, hardware counters can be
+ * enabled, copied, accumulated, and generally manipulated in a generic way,
+ * regardless of the physical counter dump layout.
+ *
+ * Terminology:
+ *
+ * Hardware Counter System:
+ *   A collection of hardware counter groups, making a full hardware counter
+ *   system.
+ * Hardware Counter Group:
+ *   A group of Hardware Counter Blocks (e.g. a t62x might have more than one
+ *   core group, so has one counter group per core group, where each group
+ *   may have a different number and layout of counter blocks).
+ * Hardware Counter Block:
+ *   A block of hardware counters (e.g. shader block, tiler block).
+ * Hardware Counter Block Instance:
+ *   An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
+ *   4 shader block instances).
+ *
+ * Block Header:
+ *   A header value inside a counter block. Headers don't count anything,
+ *   so it is only valid to copy or zero them. Headers are always the first
+ *   values in the block.
+ * Block Counter:
+ *   A counter value inside a counter block. Counters can be zeroed, copied,
+ *   or accumulated. Counters are always immediately after the headers in the
+ *   block.
+ * Block Value:
+ *   A catch-all term for block headers and block counters.
+ *
+ * Enable Map:
+ *   An array of u64 bitfields, where each bit either enables exactly one
+ *   block value, or is unused (padding).
+ * Dump Buffer:
+ *   An array of u64 values, where each u64 corresponds either to one block
+ *   value, or is unused (padding).
+ * Availability Mask:
+ *   A bitfield, where each bit corresponds to whether a block instance is
+ *   physically available (e.g. an MP3 GPU may have a sparse core mask of
+ *   0b1011, meaning it only has 3 cores but for hardware counter dumps has the
+ *   same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
+ *   case, the availability mask might be 0b1011111 (the exact layout will
+ *   depend on the specific hardware architecture), with the 3 extra early bits
+ *   corresponding to other block instances in the hardware counter system).
+ * Metadata:
+ *   Structure describing the physical layout of the enable map and dump buffers
+ *   for a specific hardware counter system.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_TYPES_H_
+#define _KBASE_HWCNT_TYPES_H_
+
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+/* Number of bytes in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
+
+/* Number of bits in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
+
+/* Number of bytes for each counter value.
+ * Use 64-bit per counter in driver to avoid HW 32-bit register values
+ * overflow after a long time accumulation.
+ */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64))
+
+/* Number of bits in an availability mask (i.e. max total number of block
+ * instances supported in a Hardware Counter System)
+ */
+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/* Minimum alignment of each block of hardware counters */
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+
+/**
+ * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value.
+ * @value:     The value to align upwards.
+ * @alignment: The alignment boundary.
+ *
+ * Return: Input value if already aligned to the specified boundary, or next
+ * (incrementing upwards) aligned value.
+ */
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment)                                                \
+	(value + ((alignment - (value % alignment)) % alignment))
+
+/**
+ * struct kbase_hwcnt_block_description - Description of one or more identical,
+ *                                        contiguous, Hardware Counter Blocks.
+ * @type:     The arbitrary identifier used to identify the type of the block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt:  The number of 64-bit Block Headers in the block.
+ * @ctr_cnt:  The number of 64-bit Block Counters in the block.
+ */
+struct kbase_hwcnt_block_description {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_group_description - Description of one or more identical,
+ *                                        contiguous Hardware Counter Groups.
+ * @type:    The arbitrary identifier used to identify the type of the group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the group.
+ * @blks:    Non-NULL pointer to an array of blk_cnt block descriptions,
+ *           describing each type of Hardware Counter Block in the group.
+ */
+struct kbase_hwcnt_group_description {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_description *blks;
+};
+
+/**
+ * struct kbase_hwcnt_description - Description of a Hardware Counter System.
+ * @grp_cnt:    The number of Hardware Counter Groups.
+ * @grps:       Non-NULL pointer to an array of grp_cnt group descriptions,
+ *              describing each Hardware Counter Group in the system.
+ * @avail_mask: Flat Availability Mask for all block instances in the system.
+ * @clk_cnt:    The number of clock domains in the system. The maximum is 64.
+ */
+struct kbase_hwcnt_description {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_description *grps;
+	u64 avail_mask;
+	u8 clk_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout
+ *                                     of a block in a Hardware Counter System's
+ *                                     Dump Buffers and Enable Maps.
+ * @type:              The arbitrary identifier used to identify the type of the
+ *                     block.
+ * @inst_cnt:          The number of Instances of the block.
+ * @hdr_cnt:           The number of 64-bit Block Headers in the block.
+ * @ctr_cnt:           The number of 64-bit Block Counters in the block.
+ * @enable_map_index:  Index in u64s into the parent's Enable Map where the
+ *                     Enable Map bitfields of the Block Instances described by
+ *                     this metadata start.
+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the
+ *                     Block Instances described by this metadata.
+ * @dump_buf_index:    Index in u64s into the parent's Dump Buffer where the
+ *                     Dump Buffers of the Block Instances described by this
+ *                     metadata start.
+ * @dump_buf_stride:   Stride in u64s between the Dump Buffers of each of the
+ *                     Block Instances described by this metadata.
+ * @avail_mask_index:  Index in bits into the parent's Availability Mask where
+ *                     the Availability Masks of the Block Instances described
+ *                     by this metadata start.
+ */
+struct kbase_hwcnt_block_metadata {
+	u64 type;
+	size_t inst_cnt;
+	size_t hdr_cnt;
+	size_t ctr_cnt;
+	size_t enable_map_index;
+	size_t enable_map_stride;
+	size_t dump_buf_index;
+	size_t dump_buf_stride;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
+ *                                     of a group of blocks in a Hardware
+ *                                     Counter System's Dump Buffers and Enable
+ *                                     Maps.
+ * @type:             The arbitrary identifier used to identify the type of the
+ *                    group.
+ * @blk_cnt:          The number of types of Hardware Counter Block in the
+ *                    group.
+ * @blk_metadata:     Non-NULL pointer to an array of blk_cnt block metadata,
+ *                    describing the physical layout of each type of Hardware
+ *                    Counter Block in the group.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ *                    Enable Maps of the blocks within the group described by
+ *                    this metadata start.
+ * @dump_buf_index:   Index in u64s into the parent's Dump Buffer where the
+ *                    Dump Buffers of the blocks within the group described by
+ *                    metadata start.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ *                    the Availability Masks of the blocks within the group
+ *                    described by this metadata start.
+ */
+struct kbase_hwcnt_group_metadata {
+	u64 type;
+	size_t blk_cnt;
+	const struct kbase_hwcnt_block_metadata *blk_metadata;
+	size_t enable_map_index;
+	size_t dump_buf_index;
+	size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_metadata - Metadata describing the memory layout
+ *                               of Dump Buffers and Enable Maps within a
+ *                               Hardware Counter System.
+ * @grp_cnt:          The number of Hardware Counter Groups.
+ * @grp_metadata:     Non-NULL pointer to an array of grp_cnt group metadata,
+ *                    describing the physical layout of each Hardware Counter
+ *                    Group in the system.
+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
+ * @dump_buf_bytes:   The size in bytes of a Dump Buffer needed for the system.
+ * @avail_mask:       The Availability Mask for the system.
+ * @clk_cnt:          The number of clock domains in the system.
+ */
+struct kbase_hwcnt_metadata {
+	size_t grp_cnt;
+	const struct kbase_hwcnt_group_metadata *grp_metadata;
+	size_t enable_map_bytes;
+	size_t dump_buf_bytes;
+	u64 avail_mask;
+	u8 clk_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64
+ *                                 bitfields.
+ * @metadata:   Non-NULL pointer to metadata used to identify, and to describe
+ *              the layout of the enable map.
+ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an
+ *              array of u64 bitfields, each bit of which enables one hardware
+ *              counter.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ *              counter for a given clock domain.
+ */
+struct kbase_hwcnt_enable_map {
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 *hwcnt_enable_map;
+	u64 clk_enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ *            the layout of the Dump Buffer.
+ * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is
+ *            metadata->dump_buf_bytes.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ *               for each clock domain.
+ */
+struct kbase_hwcnt_dump_buffer {
+	const struct kbase_hwcnt_metadata *metadata;
+	u64 *dump_buf;
+	u64 *clk_cnt_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
+ * @page_addr:  Address of allocated pages. A single allocation is used for all
+ *              Dump Buffers in the array.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ *              scale.
+ * @buf_cnt:    The number of allocated Dump Buffers.
+ * @bufs:       Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_hwcnt_dump_buffer_array {
+	unsigned long page_addr;
+	unsigned int page_order;
+	size_t buf_cnt;
+	struct kbase_hwcnt_dump_buffer *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object
+ *                                 from a description.
+ * @desc:     Non-NULL pointer to a hardware counter description.
+ * @metadata: Non-NULL pointer to where created metadata will be stored on
+ *            success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+				const struct kbase_hwcnt_metadata **metadata);
+
+/**
+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
+ * @metadata: Pointer to hardware counter metadata
+ */
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_metadata_group_count() - Get the number of groups.
+ * @metadata: Non-NULL pointer to metadata.
+ *
+ * Return: Number of hardware counter groups described by metadata.
+ */
+static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
+{
+	if (WARN_ON(!metadata))
+		return 0;
+
+	return metadata->grp_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Type of the group grp.
+ */
+static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
+						  size_t grp)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].type;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
+						      size_t grp)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Type of the block blk in group grp.
+ */
+static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
+						  size_t grp, size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_metadata[blk].type;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
+ *                                               a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+					  size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
+ *                                              headers.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of counter headers in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+					 size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of counters in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+					  size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: enable map stride in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata,
+					     size_t grp, size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: Number of headers plus counters in each instance of block blk
+ *         in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+					size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) +
+	       kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
+ *                                         the metadata.
+ * @md:       Non-NULL pointer to metadata.
+ * @grp:      size_t variable used as group iterator.
+ * @blk:      size_t variable used as block iterator.
+ * @blk_inst: size_t variable used as block instance iterator.
+ *
+ * Iteration order is group, then block, then block instance (i.e. linearly
+ * through memory).
+ */
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst)                                \
+	for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++)                   \
+		for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++)    \
+			for ((blk_inst) = 0;                                                       \
+			     (blk_inst) <                                                          \
+			     kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk));        \
+			     (blk_inst)++)
+
+/**
+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
+ *                                          mask corresponding to the block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ *
+ * Return: The bit index into the avail mask for the block.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+				     size_t blk)
+{
+	if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+	    WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+		return 0;
+
+	return metadata->grp_metadata[grp].avail_mask_index +
+	       metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
+ *                                               available.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if the block instance is available, else false.
+ */
+static inline bool
+kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+					  size_t blk, size_t blk_inst)
+{
+	size_t bit;
+	u64 mask;
+
+	if (WARN_ON(!metadata))
+		return false;
+
+	bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst;
+	mask = 1ull << bit;
+
+	return (metadata->avail_mask & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map.
+ * @metadata:   Non-NULL pointer to metadata describing the system.
+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be
+ *              initialised to all zeroes (i.e. all counters disabled).
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+				 struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_free() - Free an enable map.
+ * @enable_map: Enable map to be freed.
+ *
+ * Can be safely called on an all-zeroed enable map structure, or on an already
+ * freed enable map.
+ */
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
+ *                                           instance's enable map.
+ * @map:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u64* to the bitfield(s) used as the enable map for the
+ *         block instance.
+ */
+static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
+							 size_t grp, size_t blk, size_t blk_inst)
+{
+	if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map))
+		return NULL;
+
+	if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) ||
+	    WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) ||
+	    WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+		return map->hwcnt_enable_map;
+
+	return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index +
+	       map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index +
+	       (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst);
+}
+
+/**
+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ *                                to have at minimum one bit per value.
+ * @val_cnt: Number of values.
+ *
+ * Return: Number of required bitfields.
+ */
+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
+{
+	return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst,
+							    size_t grp, size_t blk, size_t blk_inst)
+{
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+
+	if (WARN_ON(!dst))
+		return;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+	memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
+}
+
+/**
+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
+ * @dst: Non-NULL pointer to enable map to zero.
+ */
+static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst)
+{
+	if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+		return;
+
+	if (dst->hwcnt_enable_map != NULL)
+		memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes);
+
+	dst->clk_enable_map = 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
+ * @dst:      Non-NULL pointer to enable map.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst,
+							   size_t grp, size_t blk, size_t blk_inst)
+{
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+	size_t bitfld_idx;
+
+	if (WARN_ON(!dst))
+		return;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		block_enable_map[bitfld_idx] = block_enable_map_mask;
+	}
+}
+
+/**
+ * kbase_hwcnt_enable_map_enable_all() - Enable all values in an enable
+ *                                       map.
+ * @dst: Non-NULL pointer to enable map.
+ */
+static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst)
+{
+	size_t grp, blk, blk_inst;
+
+	if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+		return;
+
+	kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
+		kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst);
+
+	dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
+}
+
+/**
+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst,
+					       const struct kbase_hwcnt_enable_map *src)
+{
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+	    WARN_ON(dst->metadata != src->metadata))
+		return;
+
+	if (dst->hwcnt_enable_map != NULL) {
+		if (WARN_ON(!src->hwcnt_enable_map))
+			return;
+
+		memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map,
+		       dst->metadata->enable_map_bytes);
+	}
+
+	dst->clk_enable_map = src->clk_enable_map;
+}
+
+/**
+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst,
+						const struct kbase_hwcnt_enable_map *src)
+{
+	if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+	    WARN_ON(dst->metadata != src->metadata))
+		return;
+
+	if (dst->hwcnt_enable_map != NULL) {
+		size_t i;
+		size_t const bitfld_count =
+			dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+
+		if (WARN_ON(!src->hwcnt_enable_map))
+			return;
+
+		for (i = 0; i < bitfld_count; i++)
+			dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i];
+	}
+
+	dst->clk_enable_map |= src->clk_enable_map;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
+ *                                          instance are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ * @grp:        Index of the group in the metadata.
+ * @blk:        Index of the block in the group.
+ * @blk_inst:   Index of the block instance in the block.
+ *
+ * Return: true if any values in the block are enabled, else false.
+ */
+static inline bool
+kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp,
+				     size_t blk, size_t blk_inst)
+{
+	bool any_enabled = false;
+	size_t val_cnt;
+	size_t bitfld_cnt;
+	const u64 *const block_enable_map =
+		kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+	size_t bitfld_idx;
+
+	if (WARN_ON(!enable_map))
+		return false;
+
+	val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk);
+	bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+	for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+		const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+		u64 block_enable_map_mask = U64_MAX;
+
+		if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+			block_enable_map_mask = (1ull << remaining_values) - 1;
+
+		any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask);
+	}
+
+	return any_enabled;
+}
+
+/**
+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * Return: true if any values are enabled, else false.
+ */
+static inline bool
+kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map)
+{
+	size_t grp, blk, blk_inst;
+	u64 clk_enable_map_mask;
+
+	if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata))
+		return false;
+
+	clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1;
+
+	if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask))
+		return true;
+
+	kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst)
+	{
+		if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block
+ *                                                instance is enabled.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to check in the block instance.
+ *
+ * Return: true if the value was enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	return (bitfld[idx] & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block
+ *                                               instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to enable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] |= mask;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block
+ *                                                instance.
+ * @bitfld:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to disable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx)
+{
+	const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+	const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+	const u64 mask = 1ull << bit;
+
+	bitfld[idx] &= ~mask;
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
+ *            initialised to undefined values, so must be used as a copy dest,
+ *            or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+				  struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer structure, or on an already
+ * freed dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
+ * @metadata:  Non-NULL pointer to metadata describing the system.
+ * @n:         Number of dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised.
+ *
+ * A single zeroed contiguous page allocation will be used for all of the
+ * buffers inside the array, where:
+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+					struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
+ * @dump_bufs: Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer array structure, or on an
+ * already freed dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
+ *                                            instance's dump buffer.
+ * @buf:      Non-NULL pointer to dump buffer.
+ * @grp:      Index of the group in the metadata.
+ * @blk:      Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u64* to the dump buffer for the block instance.
+ */
+static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf,
+							  size_t grp, size_t blk, size_t blk_inst)
+{
+	if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
+		return NULL;
+
+	if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) ||
+	    WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) ||
+	    WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+		return buf->dump_buf;
+
+	return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
+	       buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+	       (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+				  const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt)
+{
+	if (WARN_ON(!dst_blk))
+		return;
+
+	memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst.
+ *                                         After the operation, all values
+ *                                         (including padding bytes) will be
+ *                                         zero.
+ *                                         Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst);
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
+ *                                              dst (including padding bytes and
+ *                                              unavailable blocks).
+ *                                              After the operation, all enabled
+ *                                              values will be unchanged.
+ * @dst:            Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+					      const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
+ *                                                    values in a block.
+ *                                                    After the operation, all
+ *                                                    enabled values will be
+ *                                                    unchanged.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
+								  size_t val_cnt)
+{
+	size_t val;
+
+	if (WARN_ON(!dst_blk))
+		return;
+
+	for (val = 0; val < val_cnt; val++) {
+		if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
+			dst_blk[val] = 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst.
+ *                                  After the operation, all non-enabled values
+ *                                  will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+				  const struct kbase_hwcnt_dump_buffer *src,
+				  const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk,
+						      size_t val_cnt)
+{
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
+	/* Copy all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to
+ *                                         dst.
+ *                                         After the operation, all non-enabled
+ *                                         values (including padding bytes) will
+ *                                         be zero.
+ *                                         Slower than the non-strict variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+					 const struct kbase_hwcnt_dump_buffer *src,
+					 const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values will be
+ *                                               zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in dst will be zero.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk,
+							     const u64 *blk_em, size_t val_cnt)
+{
+	size_t val;
+
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
+	for (val = 0; val < val_cnt; val++) {
+		bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+
+		dst_blk[val] = val_enabled ? src_blk[val] : 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and
+ *                                        accumulate all enabled counters from
+ *                                        src to dst.
+ *                                        After the operation, all non-enabled
+ *                                        values will be undefined.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+					const struct kbase_hwcnt_dump_buffer *src,
+					const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
+ *                                              accumulate all block counters
+ *                                              from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk,
+							    size_t hdr_cnt, size_t ctr_cnt)
+{
+	size_t ctr;
+
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
+	/* Copy all the headers in the block instance.
+	 * Values of non-enabled headers are undefined.
+	 */
+	memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES);
+
+	/* Accumulate all the counters in the block instance.
+	 * Values of non-enabled counters are undefined.
+	 */
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++)
+		dst_blk[ctr] += src_blk[ctr];
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and
+ *                                               accumulate all enabled counters
+ *                                               from src to dst.
+ *                                               After the operation, all
+ *                                               non-enabled values (including
+ *                                               padding bytes) will be zero.
+ *                                               Slower than the non-strict
+ *                                               variant.
+ * @dst:            Non-NULL pointer to dst dump buffer.
+ * @src:            Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+					       const struct kbase_hwcnt_dump_buffer *src,
+					       const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
+ *                                                     headers and accumulate
+ *                                                     all block counters from
+ *                                                     src to dst.
+ *                                                     After the operation, all
+ *                                                     non-enabled values will
+ *                                                     be zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ *           kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em:  Non-NULL pointer to the block bitfield(s) obtained from a call to
+ *           kbase_hwcnt_enable_map_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk,
+								   const u64 *blk_em,
+								   size_t hdr_cnt, size_t ctr_cnt)
+{
+	size_t ctr;
+
+	if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+		return;
+
+	kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt);
+
+	for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+		bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr);
+
+		if (ctr_enabled)
+			dst_blk[ctr] += src_blk[ctr];
+		else
+			dst_blk[ctr] = 0;
+	}
+}
+
+/**
+ * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the
+ *                                         metadata.
+ * @md:          Non-NULL pointer to metadata.
+ * @clk:         size_t variable used as clock iterator.
+ */
+#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
+
+/**
+ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled
+ *                                        in clk_enable_map.
+ * @clk_enable_map: An enable map for clock domains.
+ * @index:          Index of the enable map for clock domain.
+ *
+ * Return: true if the index of the clock domain is enabled, else false.
+ */
+static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index)
+{
+	if (WARN_ON(index >= 64))
+		return false;
+	if (clk_enable_map & (1ull << index))
+		return true;
+	return false;
+}
+
+#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
new file mode 100644
index 0000000..d618764
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure.
+ * @hctx:              Hardware counter context being virtualized.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting is disabled.
+ * @metadata:          Hardware counter metadata.
+ * @lock:              Lock acquired at all entrypoints, to protect mutable
+ *                     state.
+ * @client_count:      Current number of virtualizer clients.
+ * @clients:           List of virtualizer clients.
+ * @accum:             Hardware counter accumulator. NULL if no clients.
+ * @scratch_map:       Enable map used as scratch space during counter changes.
+ * @scratch_buf:       Dump buffer used as scratch space during dumps.
+ * @ts_last_dump_ns:   End time of most recent dump across all clients.
+ */
+struct kbase_hwcnt_virtualizer {
+	struct kbase_hwcnt_context *hctx;
+	u64 dump_threshold_ns;
+	const struct kbase_hwcnt_metadata *metadata;
+	struct mutex lock;
+	size_t client_count;
+	struct list_head clients;
+	struct kbase_hwcnt_accumulator *accum;
+	struct kbase_hwcnt_enable_map scratch_map;
+	struct kbase_hwcnt_dump_buffer scratch_buf;
+	u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure.
+ * @node:        List node used for virtualizer client list.
+ * @hvirt:       Hardware counter virtualizer.
+ * @enable_map:  Enable map with client's current enabled counters.
+ * @accum_buf:   Dump buffer with client's current accumulated counters.
+ * @has_accum:   True if accum_buf contains any accumulated counters.
+ * @ts_start_ns: Counter collection start time of current dump.
+ */
+struct kbase_hwcnt_virtualizer_client {
+	struct list_head node;
+	struct kbase_hwcnt_virtualizer *hvirt;
+	struct kbase_hwcnt_enable_map enable_map;
+	struct kbase_hwcnt_dump_buffer accum_buf;
+	bool has_accum;
+	u64 ts_start_ns;
+};
+
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return NULL;
+
+	return hvirt->metadata;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
+ * @hvcli: Pointer to virtualizer client.
+ *
+ * Will safely free a client in any partial state of construction.
+ */
+static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf);
+	kbase_hwcnt_enable_map_free(&hvcli->enable_map);
+	kfree(hvcli);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer
+ *                                         client.
+ * @metadata:  Non-NULL pointer to counter metadata.
+ * @out_hvcli: Non-NULL pointer to where created client will be stored on
+ *             success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata,
+						 struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
+
+	WARN_ON(!metadata);
+	WARN_ON(!out_hvcli);
+
+	hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL);
+	if (!hvcli)
+		return -ENOMEM;
+
+	errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf);
+	if (errcode)
+		goto error;
+
+	*out_hvcli = hvcli;
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a
+ *                                              client's accumulation buffer.
+ * @hvcli:    Non-NULL pointer to virtualizer client.
+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
+ */
+static void
+kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli,
+					   const struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	WARN_ON(!hvcli);
+	WARN_ON(!dump_buf);
+	lockdep_assert_held(&hvcli->hvirt->lock);
+
+	if (hvcli->has_accum) {
+		/* If already some accumulation, accumulate */
+		kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	} else {
+		/* If no accumulation, copy */
+		kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+	}
+	hvcli->has_accum = true;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter
+ *                                             accumulator after final client
+ *                                             removal.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Will safely terminate the accumulator in any partial state of initialisation.
+ */
+static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt)
+{
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+
+	kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf);
+	kbase_hwcnt_enable_map_free(&hvirt->scratch_map);
+	kbase_hwcnt_accumulator_release(hvirt->accum);
+	hvirt->accum = NULL;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter
+ *                                             accumulator before first client
+ *                                             addition.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt)
+{
+	int errcode;
+
+	WARN_ON(!hvirt);
+	lockdep_assert_held(&hvirt->lock);
+	WARN_ON(hvirt->client_count);
+	WARN_ON(hvirt->accum);
+
+	errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map);
+	if (errcode)
+		goto error;
+
+	errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf);
+	if (errcode)
+		goto error;
+
+	return 0;
+error:
+	kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the
+ *                                       virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to add.
+ * @enable_map: Non-NULL pointer to client's initial enable map.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt,
+					       struct kbase_hwcnt_virtualizer_client *hvcli,
+					       const struct kbase_hwcnt_enable_map *enable_map)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->client_count == 0)
+		/* First client added, so initialise the accumulator */
+		errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt);
+	if (errcode)
+		return errcode;
+
+	hvirt->client_count += 1;
+
+	if (hvirt->client_count == 1) {
+		/* First client, so just pass the enable map onwards as is */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+							       &ts_start_ns, &ts_end_ns, NULL);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+		list_for_each_entry (pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+							       &ts_start_ns, &ts_end_ns,
+							       &hvirt->scratch_buf);
+		/* Accumulate into only existing clients' accumulation bufs */
+		if (!errcode)
+			list_for_each_entry (pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(pos,
+									   &hvirt->scratch_buf);
+	}
+	if (errcode)
+		goto error;
+
+	list_add(&hvcli->node, &hvirt->clients);
+	hvcli->hvirt = hvirt;
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+	hvcli->has_accum = false;
+	hvcli->ts_start_ns = ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = ts_end_ns;
+
+	return 0;
+error:
+	hvirt->client_count -= 1;
+	if (hvirt->client_count == 0)
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the
+ *                                          virtualizer.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:      Non-NULL pointer to the virtualizer client to remove.
+ */
+static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt,
+						   struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	int errcode = 0;
+	u64 ts_start_ns;
+	u64 ts_end_ns;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	lockdep_assert_held(&hvirt->lock);
+
+	list_del(&hvcli->node);
+	hvirt->client_count -= 1;
+
+	if (hvirt->client_count == 0) {
+		/* Last client removed, so terminate the accumulator */
+		kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+	} else {
+		struct kbase_hwcnt_virtualizer_client *pos;
+		/* Make the scratch enable map the union of all enable maps */
+		kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
+		list_for_each_entry (pos, &hvirt->clients, node)
+			kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+		/* Set the counters with the new union enable map */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+							       &ts_start_ns, &ts_end_ns,
+							       &hvirt->scratch_buf);
+		/* Accumulate into remaining clients' accumulation bufs */
+		if (!errcode) {
+			list_for_each_entry (pos, &hvirt->clients, node)
+				kbasep_hwcnt_virtualizer_client_accumulate(pos,
+									   &hvirt->scratch_buf);
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = ts_end_ns;
+		}
+	}
+	WARN_ON(errcode);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                                currently enabled counters,
+ *                                                and enable a new set of
+ *                                                counters that will be used for
+ *                                                subsequent dumps.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_set_counters(
+	struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+	const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns,
+	struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!enable_map);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(enable_map->metadata != hvirt->metadata);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Make the scratch enable map the union of all enable maps */
+	kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+	list_for_each_entry (pos, &hvirt->clients, node)
+		/* Ignore the enable map of the selected client */
+		if (pos != hvcli)
+			kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+
+	/* Set the counters with the new union enable map */
+	errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+						       ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry (pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+							   &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Update the selected client's enable map */
+	kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+						const struct kbase_hwcnt_enable_map *enable_map,
+						u64 *ts_start_ns, u64 *ts_end_ns,
+						struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if ((enable_map->metadata != hvirt->metadata) ||
+	    (dump_buf && (dump_buf->metadata != hvirt->metadata)))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+							       ts_start_ns, ts_end_ns, dump_buf);
+
+		if (!errcode) {
+			/* Update the selected client's enable map */
+			kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_set_counters(
+			hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                        currently enabled counters.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt,
+						struct kbase_hwcnt_virtualizer_client *hvcli,
+						u64 *ts_start_ns, u64 *ts_end_ns,
+						struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *pos;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	/* Perform the dump */
+	errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+					       &hvirt->scratch_buf);
+	if (errcode)
+		return errcode;
+
+	/* Accumulate into all accumulation bufs except the selected client's */
+	list_for_each_entry (pos, &hvirt->clients, node)
+		if (pos != hvcli)
+			kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
+
+	/* Finally, write into the dump buf */
+	if (dump_buf) {
+		const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+		if (hvcli->has_accum) {
+			kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+							   &hvcli->enable_map);
+			src = &hvcli->accum_buf;
+		}
+		kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+	}
+	hvcli->has_accum = false;
+
+	/* Fix up the timestamps */
+	*ts_start_ns = hvcli->ts_start_ns;
+	hvcli->ts_start_ns = *ts_end_ns;
+
+	/* Store the most recent dump time for rate limiting */
+	hvirt->ts_last_dump_ns = *ts_end_ns;
+
+	return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the
+ *                                           client's currently enabled counters
+ *                                           if it hasn't been rate limited,
+ *                                           otherwise return the client's most
+ *                                           recent accumulation.
+ * @hvirt:       Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+	struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+	u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	bool rate_limited = true;
+
+	WARN_ON(!hvirt);
+	WARN_ON(!hvcli);
+	WARN_ON(!ts_start_ns);
+	WARN_ON(!ts_end_ns);
+	WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+	lockdep_assert_held(&hvirt->lock);
+
+	if (hvirt->dump_threshold_ns == 0) {
+		/* Threshold == 0, so rate limiting disabled */
+		rate_limited = false;
+	} else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) {
+		/* Last dump was performed by this client, and dumps from an
+		 * individual client are never rate limited
+		 */
+		rate_limited = false;
+	} else {
+		const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
+		const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns;
+
+		/* Dump period equals or exceeds the threshold */
+		if (time_since_last_dump_ns >= hvirt->dump_threshold_ns)
+			rate_limited = false;
+	}
+
+	if (!rate_limited)
+		return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns,
+							    dump_buf);
+
+	/* If we've gotten this far, the client must have something accumulated
+	 * otherwise it is a logic error
+	 */
+	WARN_ON(!hvcli->has_accum);
+
+	if (dump_buf)
+		kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
+	hvcli->has_accum = false;
+
+	*ts_start_ns = hvcli->ts_start_ns;
+	*ts_end_ns = hvirt->ts_last_dump_ns;
+	hvcli->ts_start_ns = hvirt->ts_last_dump_ns;
+
+	return 0;
+}
+
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+					u64 *ts_start_ns, u64 *ts_end_ns,
+					struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer *hvirt;
+
+	if (!hvcli || !ts_start_ns || !ts_end_ns)
+		return -EINVAL;
+
+	hvirt = hvcli->hvirt;
+
+	if (dump_buf && (dump_buf->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	mutex_lock(&hvirt->lock);
+
+	if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+		/*
+		 * If there's only one client with no prior accumulation, we can
+		 * completely skip the virtualize and just pass through the call
+		 * to the accumulator, saving a fair few copies and
+		 * accumulations.
+		 */
+		errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+						       dump_buf);
+
+		if (!errcode) {
+			/* Fix up the timestamps */
+			*ts_start_ns = hvcli->ts_start_ns;
+			hvcli->ts_start_ns = *ts_end_ns;
+
+			/* Store the most recent dump time for rate limiting */
+			hvirt->ts_last_dump_ns = *ts_end_ns;
+		}
+	} else {
+		/* Otherwise, do the full virtualize */
+		errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+			hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+	}
+
+	mutex_unlock(&hvirt->lock);
+
+	return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+					  const struct kbase_hwcnt_enable_map *enable_map,
+					  struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+	int errcode;
+	struct kbase_hwcnt_virtualizer_client *hvcli;
+
+	if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata))
+		return -EINVAL;
+
+	errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli);
+	if (errcode)
+		return errcode;
+
+	mutex_lock(&hvirt->lock);
+
+	errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map);
+
+	mutex_unlock(&hvirt->lock);
+
+	if (errcode) {
+		kbasep_hwcnt_virtualizer_client_free(hvcli);
+		return errcode;
+	}
+
+	*out_hvcli = hvcli;
+	return 0;
+}
+
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+	if (!hvcli)
+		return;
+
+	mutex_lock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli);
+
+	mutex_unlock(&hvcli->hvirt->lock);
+
+	kbasep_hwcnt_virtualizer_client_free(hvcli);
+}
+
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+				 struct kbase_hwcnt_virtualizer **out_hvirt)
+{
+	struct kbase_hwcnt_virtualizer *virt;
+	const struct kbase_hwcnt_metadata *metadata;
+
+	if (!hctx || !out_hvirt)
+		return -EINVAL;
+
+	metadata = kbase_hwcnt_context_metadata(hctx);
+	if (!metadata)
+		return -EINVAL;
+
+	virt = kzalloc(sizeof(*virt), GFP_KERNEL);
+	if (!virt)
+		return -ENOMEM;
+
+	virt->hctx = hctx;
+	virt->dump_threshold_ns = dump_threshold_ns;
+	virt->metadata = metadata;
+
+	mutex_init(&virt->lock);
+	INIT_LIST_HEAD(&virt->clients);
+
+	*out_hvirt = virt;
+	return 0;
+}
+
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt)
+{
+	if (!hvirt)
+		return;
+
+	/* Non-zero client count implies client leak */
+	if (WARN_ON(hvirt->client_count != 0)) {
+		struct kbase_hwcnt_virtualizer_client *pos, *n;
+
+		list_for_each_entry_safe (pos, n, &hvirt->clients, node)
+			kbase_hwcnt_virtualizer_client_destroy(pos);
+	}
+
+	WARN_ON(hvirt->client_count != 0);
+	WARN_ON(hvirt->accum);
+
+	kfree(hvirt);
+}
+
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+					struct work_struct *work)
+{
+	if (WARN_ON(!hvirt) || WARN_ON(!work))
+		return false;
+
+	return kbase_hwcnt_context_queue_work(hvirt->hctx, work);
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
new file mode 100644
index 0000000..485ba74
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter virtualizer API.
+ *
+ * Virtualizes a hardware counter context, so multiple clients can access
+ * a single hardware counter resource as though each was the exclusive user.
+ */
+
+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_
+#define _KBASE_HWCNT_VIRTUALIZER_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_hwcnt_virtualizer_client;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer.
+ * @hctx:              Non-NULL pointer to the hardware counter context to
+ *                     virtualize.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ *                     clients where a new accumulator dump will not be
+ *                     performed, and instead accumulated values will be used.
+ *                     If 0, rate limiting will be disabled.
+ * @out_hvirt:         Non-NULL pointer to where the pointer to the created
+ *                     virtualizer will be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+				 struct kbase_hwcnt_virtualizer **out_hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
+ * @hvirt: Pointer to virtualizer to be terminated.
+ */
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
+ *                                    the virtualizer, so related counter data
+ *                                    structures can be created.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
+ * @hvirt:      Non-NULL pointer to the hardware counter virtualizer.
+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the
+ *              same metadata as the virtualizer.
+ * @out_hvcli:  Non-NULL pointer to where the pointer to the created client will
+ *              be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+					  const struct kbase_hwcnt_enable_map *enable_map,
+					  struct kbase_hwcnt_virtualizer_client **out_hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
+ * @hvcli: Pointer to the hardware counter client.
+ */
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ *                                               currently enabled counters, and
+ *                                               enable a new set of counters
+ *                                               that will be used for
+ *                                               subsequent dumps.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @enable_map:  Non-NULL pointer to the new counter enable map for the client.
+ *               Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+						const struct kbase_hwcnt_enable_map *enable_map,
+						u64 *ts_start_ns, u64 *ts_end_ns,
+						struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ *                                       currently enabled counters.
+ * @hvcli:       Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ *               be written out to on success.
+ * @ts_end_ns:   Non-NULL pointer where the end timestamp of the dump will
+ *               be written out to on success.
+ * @dump_buf:    Pointer to the buffer where the dump will be written out to on
+ *               success. If non-NULL, must have the same metadata as the
+ *               accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+					u64 *ts_start_ns, u64 *ts_end_ns,
+					struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async
+ *                                        work on a workqueue specialized for
+ *                                        hardware counters.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @work:  Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * This is a convenience function that directly calls the underlying
+ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work.
+ */
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+					struct work_struct *work);
+
+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
new file mode 100644
index 0000000..501c008
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter watchdog.
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_H_
+
+#include <linux/types.h>
+
+/*
+ * Opaque structure of information used to create a watchdog timer interface.
+ */
+struct kbase_hwcnt_watchdog_info;
+
+/**
+ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done
+ *
+ * @user_data: Pointer to the callback user data.
+ */
+typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer
+ *
+ * @timer:     Non-NULL pointer to a watchdog timer interface context
+ * @period_ms: Period in milliseconds of the watchdog timer
+ * @callback:  Non-NULL pointer to a watchdog callback function
+ * @user_data: Pointer to the user data, used when watchdog timer callback is called
+ *
+ * Return: 0 if the watchdog timer enabled successfully, error code otherwise.
+ */
+typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer,
+					   u32 period_ms,
+					   kbase_hwcnt_watchdog_callback_fn *callback,
+					   void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ */
+typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
+
+/**
+ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout
+ *
+ * @timer:    Non-NULL pointer to a watchdog timer interface context
+ * @delay_ms: Watchdog timer expiration in milliseconds
+ */
+typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
+					    u32 delay_ms);
+
+/**
+ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface.
+ *
+ * @timer:   Immutable watchdog timer info
+ * @enable:  Function ptr to enable watchdog
+ * @disable: Function ptr to disable watchdog
+ * @modify:  Function ptr to modify watchdog
+ */
+struct kbase_hwcnt_watchdog_interface {
+	const struct kbase_hwcnt_watchdog_info *timer;
+	kbase_hwcnt_watchdog_enable_fn *enable;
+	kbase_hwcnt_watchdog_disable_fn *disable;
+	kbase_hwcnt_watchdog_modify_fn *modify;
+};
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
new file mode 100644
index 0000000..4caa832
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h"
+
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog
+ *                                             interface.
+ *
+ * @workq:          Single threaded work queue in which to execute callbacks.
+ * @dwork:          Worker to execute callback function.
+ * @timer_enabled:  True if watchdog timer enabled, otherwise false
+ * @callback:       Watchdog callback function
+ * @user_data:      Pointer to user data passed as argument to the callback
+ *                  function
+ */
+struct kbase_hwcnt_watchdog_if_timer_info {
+	struct workqueue_struct *workq;
+	struct delayed_work dwork;
+	bool timer_enabled;
+	kbase_hwcnt_watchdog_callback_fn *callback;
+	void *user_data;
+};
+
+/**
+ * kbasep_hwcnt_watchdog_callback() - Watchdog callback
+ *
+ * @work: Work structure
+ *
+ * Function to be called in a work queue after watchdog timer has expired.
+ */
+static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const info =
+		container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work);
+
+	if (info->callback)
+		info->callback(info->user_data);
+}
+
+static int kbasep_hwcnt_watchdog_if_timer_enable(
+	const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms,
+	kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+	if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled))
+		return -EINVAL;
+
+	timer_info->callback = callback;
+	timer_info->user_data = user_data;
+
+	queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms));
+	timer_info->timer_enabled = true;
+
+	return 0;
+}
+
+static void
+kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+	if (WARN_ON(!timer))
+		return;
+
+	if (!timer_info->timer_enabled)
+		return;
+
+	cancel_delayed_work_sync(&timer_info->dwork);
+	timer_info->timer_enabled = false;
+}
+
+static void
+kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer,
+				      u32 const delay_ms)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+	if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled))
+		return;
+
+	mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms));
+}
+
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+	if (WARN_ON(!watchdog_if))
+		return;
+
+	timer_info = (void *)watchdog_if->timer;
+
+	if (WARN_ON(!timer_info))
+		return;
+
+	destroy_workqueue(timer_info->workq);
+	kfree(timer_info);
+
+	*watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+		.timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL
+	};
+}
+
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+	struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+	if (WARN_ON(!watchdog_if))
+		return -EINVAL;
+
+	timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL);
+	if (!timer_info)
+		return -ENOMEM;
+
+	*timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false };
+
+	INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback);
+
+	*watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+		.timer = (void *)timer_info,
+		.enable = kbasep_hwcnt_watchdog_if_timer_enable,
+		.disable = kbasep_hwcnt_watchdog_if_timer_disable,
+		.modify = kbasep_hwcnt_watchdog_if_timer_modify,
+	};
+
+	timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+	if (timer_info->workq)
+		return 0;
+
+	kfree(timer_info);
+	return -ENOMEM;
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
new file mode 100644
index 0000000..a545ad3
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+
+struct kbase_hwcnt_watchdog_interface;
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend.
+ *
+ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter
+ *                                           backend.
+ *
+ * @watchdog_if: Pointer to watchdog interface to destroy
+ */
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */
author	Jörg Wagner <jorwag@google.com>	2022-12-15 14:01:25 +0000
committer	Jörg Wagner <jorwag@google.com>	2022-12-15 16:27:59 +0000
commit	9ff5b6f2510d94765def3cf7c1fda01e387cabab (patch)
tree	d455bcd53cca74df918b3dd0092e806fb29e1461 /mali_kbase/hwcnt
parent	c30533582604fe0365bc3ce4e9e8e19dec3109da (diff)
download	gpu-9ff5b6f2510d94765def3cf7c1fda01e387cabab.tar.gz