summaryrefslogtreecommitdiff
path: root/mali_kbase/hwcnt
diff options
context:
space:
mode:
authorJörg Wagner <jorwag@google.com>2022-12-15 14:01:25 +0000
committerJörg Wagner <jorwag@google.com>2022-12-15 16:27:59 +0000
commit9ff5b6f2510d94765def3cf7c1fda01e387cabab (patch)
treed455bcd53cca74df918b3dd0092e806fb29e1461 /mali_kbase/hwcnt
parentc30533582604fe0365bc3ce4e9e8e19dec3109da (diff)
downloadgpu-9ff5b6f2510d94765def3cf7c1fda01e387cabab.tar.gz
Mali Valhall Android DDK r40p0-01eac0 KMD
Provenance: 056ded72d351d1bf6319f7b2b925496dd6ad304f (ipdelivery/EAC/v_r40p0) VX504X08X-BU-00000-r40p0-01eac0 - Valhall Android DDK VX504X08X-BU-60000-r40p0-01eac0 - Valhall Android Document Bundle VX504X08X-DC-11001-r40p0-01eac0 - Valhall Android DDK Software Errata VX504X08X-SW-99006-r40p0-01eac0 - Valhall Android Renderscript AOSP parts Change-Id: I6db6b45c73c5447dd246533246e65b5ef2c8872f
Diffstat (limited to 'mali_kbase/hwcnt')
-rw-r--r--mali_kbase/hwcnt/Kbuild37
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h225
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c1892
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h153
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h302
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c784
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h49
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c863
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h58
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c829
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h65
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt.c775
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h139
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_context.h148
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c738
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h407
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c298
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h330
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.c511
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.h1231
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c744
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h151
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h89
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c157
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h48
25 files changed, 11023 insertions, 0 deletions
diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild
new file mode 100644
index 0000000..8c8775f
--- /dev/null
+++ b/mali_kbase/hwcnt/Kbuild
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+#
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+#
+
+mali_kbase-y += \
+ hwcnt/mali_kbase_hwcnt.o \
+ hwcnt/mali_kbase_hwcnt_gpu.o \
+ hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
+ hwcnt/mali_kbase_hwcnt_types.o \
+ hwcnt/mali_kbase_hwcnt_virtualizer.o \
+ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
+
+ifeq ($(CONFIG_MALI_CSF_SUPPORT),y)
+ mali_kbase-y += \
+ hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \
+ hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o
+else
+ mali_kbase-y += \
+ hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \
+ hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o
+endif
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
new file mode 100644
index 0000000..6cfa6f5
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter backends.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_H_
+#define _KBASE_HWCNT_BACKEND_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/*
+ * struct kbase_hwcnt_backend_info - Opaque pointer to information used to
+ * create an instance of a hardware counter
+ * backend.
+ */
+struct kbase_hwcnt_backend_info;
+
+/*
+ * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter
+ * backend, used to perform dumps.
+ */
+struct kbase_hwcnt_backend;
+
+/*
+ * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter
+ * metadata that describes the layout
+ * of the counter data structures.
+ * @info: Non-NULL pointer to backend info.
+ *
+ * Multiple calls to this function with the same info are guaranteed to return
+ * the same metadata object each time.
+ *
+ * Return: Non-NULL pointer to immutable hardware counter metadata.
+ */
+typedef const struct kbase_hwcnt_metadata *
+kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info);
+
+/**
+ * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend.
+ * @info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * All uses of the created hardware counter backend must be externally
+ * synchronised.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend);
+
+/**
+ * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend.
+ * @backend: Pointer to backend to be terminated.
+ */
+typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend
+ * timestamp.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * Return: Backend timestamp in nanoseconds.
+ */
+typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the
+ * backend.
+ * @backend: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * The enable_map must have been created using the interface's metadata.
+ * If the backend has already been enabled, an error is returned.
+ *
+ * May be called in an atomic context.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping
+ * with the backend.
+ * @backend: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ *
+ * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be
+ * called in an atomic context with the spinlock documented by the specific
+ * backend interface held.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int
+kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
+ * the backend.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is already disabled, does nothing.
+ * Any undumped counter values since the last dump get will be lost.
+ */
+typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
+ * counters.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter
+ * dump.
+ * @backend: Non-NULL pointer to backend.
+ * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was
+ * requested will be written out to on success.
+ *
+ * If the backend is not enabled or another dump is already in progress,
+ * returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested
+ * counter dump has completed.
+ * @backend: Non-NULL pointer to backend.
+ *
+ * If the backend is not enabled, returns an error.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend);
+
+/**
+ * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the
+ * counters dumped after the last dump
+ * request into the dump buffer.
+ * @backend: Non-NULL pointer to backend.
+ * @dump_buffer: Non-NULL pointer to destination dump buffer.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate: True if counters should be accumulated into dump_buffer, rather
+ * than copied.
+ *
+ * The resultant contents of the dump buffer are only well defined if a prior
+ * call to dump_wait returned successfully, and a new dump has not yet been
+ * requested by a call to dump_request.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ bool accumulate);
+
+/**
+ * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual
+ * interface.
+ * @info: Immutable info used to initialise an instance of the
+ * backend.
+ * @metadata: Function ptr to get the immutable hardware counter
+ * metadata.
+ * @init: Function ptr to initialise an instance of the backend.
+ * @term: Function ptr to terminate an instance of the backend.
+ * @timestamp_ns: Function ptr to get the current backend timestamp.
+ * @dump_enable: Function ptr to enable dumping.
+ * @dump_enable_nolock: Function ptr to enable dumping while the
+ * backend-specific spinlock is already held.
+ * @dump_disable: Function ptr to disable dumping.
+ * @dump_clear: Function ptr to clear counters.
+ * @dump_request: Function ptr to request a dump.
+ * @dump_wait: Function ptr to wait until dump to complete.
+ * @dump_get: Function ptr to copy or accumulate dump into a dump
+ * buffer.
+ */
+struct kbase_hwcnt_backend_interface {
+ const struct kbase_hwcnt_backend_info *info;
+ kbase_hwcnt_backend_metadata_fn *metadata;
+ kbase_hwcnt_backend_init_fn *init;
+ kbase_hwcnt_backend_term_fn *term;
+ kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns;
+ kbase_hwcnt_backend_dump_enable_fn *dump_enable;
+ kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock;
+ kbase_hwcnt_backend_dump_disable_fn *dump_disable;
+ kbase_hwcnt_backend_dump_clear_fn *dump_clear;
+ kbase_hwcnt_backend_dump_request_fn *dump_request;
+ kbase_hwcnt_backend_dump_wait_fn *dump_wait;
+ kbase_hwcnt_backend_dump_get_fn *dump_get;
+};
+
+#endif /* _KBASE_HWCNT_BACKEND_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
new file mode 100644
index 0000000..424a360
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -0,0 +1,1892 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/log2.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#ifndef BASE_MAX_NR_CLOCKS_REGULATORS
+#define BASE_MAX_NR_CLOCKS_REGULATORS 2
+#endif
+
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000)
+#else
+/* Backend watch dog timer interval in milliseconds: 1 second. */
+#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
+#endif /* IS_FPGA && !NO_MALI */
+
+/**
+ * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is
+ * an error.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and
+ * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK,
+ * PROTMODE_ENTER_ACK, or if an error occurs.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been
+ * requested and we're waiting for an ACK - this ACK could come from either
+ * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here
+ * since watchdog request can't be triggered in protected mode.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert
+ * immediately after receiving the ACK, so we know which index corresponds to
+ * the buffer we requested.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and
+ * now we have kicked off the worker.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now
+ * we have kicked off the worker to accumulate up to that insert and then copy
+ * the delta to the user buffer to prepare for dump_get().
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully.
+ *
+ * Valid state transitions:
+ * IDLE -> REQUESTED (on user dump request)
+ * IDLE -> WATCHDOG_REQUESTED (on watchdog request)
+ * IDLE -> QUERYING_INSERT (on user dump request in protected mode)
+ * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware)
+ * WATCHDOG_REQUESTED -> REQUESTED (on user dump request)
+ * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request)
+ * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission)
+ * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating)
+ * ACCUMULATING -> COMPLETED (on accumulation completion)
+ * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode)
+ * COMPLETED -> REQUESTED (on user dump request)
+ * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request)
+ * COMPLETED -> IDLE (on disable)
+ * ANY -> IDLE (on error)
+ */
+enum kbase_hwcnt_backend_csf_dump_state {
+ KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING,
+ KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED,
+};
+
+/**
+ * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend
+ * is disabled.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged,
+ * enable is done.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in
+ * progress, waiting for firmware acknowledgment.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been
+ * acknowledged, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An
+ * unrecoverable error happened, waiting for dump workers to be finished.
+ *
+ * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error
+ * happened, and dump workers have finished, waiting for reset.
+ *
+ * Valid state transitions:
+ * DISABLED -> TRANSITIONING_TO_ENABLED (on enable)
+ * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack)
+ * ENABLED -> TRANSITIONING_TO_DISABLED (on disable)
+ * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack)
+ * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed)
+ * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error)
+ * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable
+ * error)
+ * UNRECOVERABLE_ERROR -> DISABLED (on before reset)
+ */
+enum kbase_hwcnt_backend_csf_enable_state {
+ KBASE_HWCNT_BACKEND_CSF_DISABLED,
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED,
+ KBASE_HWCNT_BACKEND_CSF_ENABLED,
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED,
+ KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER,
+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER,
+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR,
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_info - Information used to create an instance
+ * of a CSF hardware counter backend.
+ * @backend: Pointer to access CSF backend.
+ * @fw_in_protected_mode: True if FW is running in protected mode, else
+ * false.
+ * @unrecoverable_error_happened: True if an recoverable error happened, else
+ * false.
+ * @csf_if: CSF interface object pointer.
+ * @ring_buf_cnt: Dump buffer count in the ring buffer.
+ * @counter_set: The performance counter set to use.
+ * @metadata: Hardware counter metadata.
+ * @prfcnt_info: Performance counter information.
+ * @watchdog_if: Watchdog interface object pointer.
+ */
+struct kbase_hwcnt_backend_csf_info {
+ struct kbase_hwcnt_backend_csf *backend;
+ bool fw_in_protected_mode;
+ bool unrecoverable_error_happened;
+ struct kbase_hwcnt_backend_csf_if *csf_if;
+ u32 ring_buf_cnt;
+ enum kbase_hwcnt_set counter_set;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info;
+ struct kbase_hwcnt_watchdog_interface *watchdog_if;
+};
+
+/**
+ * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
+ * information.
+ * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are
+ * sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
+ * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
+ * @fe_cnt: Front end block count.
+ * @tiler_cnt: Tiler block count.
+ * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count.
+ * @shader_cnt: Shader Core block count.
+ * @fw_block_cnt: Total number of firmware counters blocks.
+ * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt).
+ * @shader_avail_mask: Bitmap of all shader cores in the system.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ * starting from the beginning of block.
+ * @headers_per_block: For any block, the number of counters designated as block's header.
+ * @counters_per_block: For any block, the number of counters designated as block's payload.
+ * @values_per_block: For any block, the number of counters in total (header + payload).
+ */
+struct kbase_hwcnt_csf_physical_layout {
+ u8 hw_block_cnt;
+ u8 fe_cnt;
+ u8 tiler_cnt;
+ u8 mmu_l2_cnt;
+ u8 shader_cnt;
+ u8 fw_block_cnt;
+ u8 block_cnt;
+ u64 shader_avail_mask;
+ size_t enable_mask_offset;
+ size_t headers_per_block;
+ size_t counters_per_block;
+ size_t values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend.
+ * @info: CSF Info used to create the backend.
+ * @dump_state: The dumping state of the backend.
+ * @enable_state: The CSF backend internal enabled state.
+ * @insert_index_to_accumulate: The insert index in the ring buffer which need
+ * to accumulate up to.
+ * @enable_state_waitq: Wait queue object used to notify the enable
+ * changing flag is done.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
+ * @accum_buf: HWC sample buffer used as an internal
+ * accumulator, size metadata.dump_buf_bytes.
+ * @old_sample_buf: HWC sample buffer to save the previous values
+ * for delta calculation, size
+ * prfcnt_info.dump_bytes.
+ * @watchdog_last_seen_insert_idx: The insert index which watchdog has last
+ * seen, to check any new firmware automatic
+ * samples generated during the watchdog
+ * period.
+ * @ring_buf: Opaque pointer for ring buffer object.
+ * @ring_buf_cpu_base: CPU base address of the allocated ring buffer.
+ * @clk_enable_map: The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed: Cycle count elapsed for a given sample period.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle
+ * count for sample period.
+ * @phys_layout: Physical memory layout information of HWC
+ * sample buffer.
+ * @dump_completed: Completion signaled by the dump worker when
+ * it is completed accumulating up to the
+ * insert_index_to_accumulate.
+ * Should be initialized to the "complete" state.
+ * @user_requested: Flag to indicate a dump_request called from
+ * user.
+ * @hwc_dump_workq: Single threaded work queue for HWC workers
+ * execution.
+ * @hwc_dump_work: Worker to accumulate samples.
+ * @hwc_threshold_work: Worker for consuming available samples when
+ * threshold interrupt raised.
+ */
+struct kbase_hwcnt_backend_csf {
+ struct kbase_hwcnt_backend_csf_info *info;
+ enum kbase_hwcnt_backend_csf_dump_state dump_state;
+ enum kbase_hwcnt_backend_csf_enable_state enable_state;
+ u32 insert_index_to_accumulate;
+ wait_queue_head_t enable_state_waitq;
+ u64 *to_user_buf;
+ u64 *accum_buf;
+ u32 *old_sample_buf;
+ u32 watchdog_last_seen_insert_idx;
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
+ void *ring_buf_cpu_base;
+ u64 clk_enable_map;
+ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+ struct kbase_hwcnt_csf_physical_layout phys_layout;
+ struct completion dump_completed;
+ bool user_requested;
+ struct workqueue_struct *hwc_dump_workq;
+ struct work_struct hwc_dump_work;
+ struct work_struct hwc_threshold_work;
+};
+
+static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+ WARN_ON(!csf_info);
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+ return (csf_info->backend != NULL);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count
+ * tracking.
+ *
+ * @backend_csf: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void
+kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ u64 clk_enable_map = enable_map->clk_enable_map;
+ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+ size_t clk;
+
+ /* Read cycle count from CSF interface for both clock domains. */
+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+ clk_enable_map);
+
+ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk))
+ backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+ }
+
+ /* Keep clk_enable_map for dump_request. */
+ backend_csf->clk_enable_map = clk_enable_map;
+}
+
+static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS];
+ size_t clk;
+
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
+ backend_csf->clk_enable_map);
+
+ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) {
+ backend_csf->cycle_count_elapsed[clk] =
+ cycle_counts[clk] - backend_csf->prev_cycle_count[clk];
+ backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
+ }
+ }
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+ if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if)
+ return 0;
+
+ return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx);
+}
+
+/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
+ * guarantee headers are
+ * enabled if any counter is
+ * required.
+ *@phys_enable_map: HWC physical enable map to be processed.
+ */
+static void
+kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map)
+{
+ WARN_ON(!phys_enable_map);
+
+ /* Enable header if any counter is required from user, the header is
+ * controlled by bit 0 of the enable mask.
+ */
+ if (phys_enable_map->fe_bm)
+ phys_enable_map->fe_bm |= 1;
+
+ if (phys_enable_map->tiler_bm)
+ phys_enable_map->tiler_bm |= 1;
+
+ if (phys_enable_map->mmu_l2_bm)
+ phys_enable_map->mmu_l2_bm |= 1;
+
+ if (phys_enable_map->shader_bm)
+ phys_enable_map->shader_bm |= 1;
+}
+
+static void kbasep_hwcnt_backend_csf_init_layout(
+ const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info,
+ struct kbase_hwcnt_csf_physical_layout *phys_layout)
+{
+ size_t shader_core_cnt;
+ size_t values_per_block;
+ size_t fw_blocks_count;
+ size_t hw_blocks_count;
+
+ WARN_ON(!prfcnt_info);
+ WARN_ON(!phys_layout);
+
+ shader_core_cnt = fls64(prfcnt_info->core_mask);
+ values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+ fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
+ hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
+
+ /* The number of hardware counters reported by the GPU matches the legacy guess-work we
+ * have done in the past
+ */
+ WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT +
+ KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ prfcnt_info->l2_count + shader_core_cnt);
+
+ *phys_layout = (struct kbase_hwcnt_csf_physical_layout){
+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+ .mmu_l2_cnt = prfcnt_info->l2_count,
+ .shader_cnt = shader_core_cnt,
+ .fw_block_cnt = fw_blocks_count,
+ .hw_block_cnt = hw_blocks_count,
+ .block_cnt = fw_blocks_count + hw_blocks_count,
+ .shader_avail_mask = prfcnt_info->core_mask,
+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .values_per_block = values_per_block,
+ .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+ };
+}
+
+static void
+kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
+ memset(backend_csf->to_user_buf, 0, user_buf_bytes);
+ memset(backend_csf->accum_buf, 0, user_buf_bytes);
+ memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes);
+}
+
+static void
+kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf,
+ u32 *sample)
+{
+ u32 block_idx;
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout;
+ u32 *block_buf;
+
+ phys_layout = &backend_csf->phys_layout;
+
+ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+ block_buf = sample + block_idx * phys_layout->values_per_block;
+ block_buf[phys_layout->enable_mask_offset] = 0;
+ }
+}
+
+static void
+kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ u32 idx;
+ u32 *sample;
+ char *cpu_dump_base;
+ size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+
+ cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base;
+
+ for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) {
+ sample = (u32 *)&cpu_dump_base[idx * dump_bytes];
+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
+ }
+}
+
+static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+
+ /* Copy the data into the sample and wait for the user to get it. */
+ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes);
+
+ /* After copied data into user sample, clear the accumulator values to
+ * prepare for the next accumulator, such as the next request or
+ * threshold.
+ */
+ memset(backend_csf->accum_buf, 0, user_buf_bytes);
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_sample(
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
+ u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples)
+{
+ size_t block_idx;
+ const u32 *old_block = old_sample_buf;
+ const u32 *new_block = new_sample_buf;
+ u64 *acc_block = accum_buf;
+ const size_t values_per_block = phys_layout->values_per_block;
+
+ /* Performance counter blocks for firmware are stored before blocks for hardware.
+ * We skip over the firmware's performance counter blocks (counters dumping is not
+ * supported for firmware blocks, only hardware ones).
+ */
+ old_block += values_per_block * phys_layout->fw_block_cnt;
+ new_block += values_per_block * phys_layout->fw_block_cnt;
+
+ for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
+ block_idx++) {
+ const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
+ const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
+
+ if (new_enable_mask == 0) {
+ /* Hardware block was unavailable or we didn't turn on
+ * any counters. Do nothing.
+ */
+ } else {
+ /* Hardware block was available and it had some counters
+ * enabled. We need to update the accumulation buffer.
+ */
+ size_t ctr_idx;
+
+ /* Unconditionally copy the headers. */
+ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) {
+ acc_block[ctr_idx] = new_block[ctr_idx];
+ }
+
+ /* Accumulate counter samples
+ *
+ * When accumulating samples we need to take into
+ * account whether the counter sampling method involves
+ * clearing counters back to zero after each sample is
+ * taken.
+ *
+ * The intention for CSF was that all HW should use
+ * counters which wrap to zero when their maximum value
+ * is reached. This, combined with non-clearing
+ * sampling, enables multiple concurrent users to
+ * request samples without interfering with each other.
+ *
+ * However some early HW may not support wrapping
+ * counters, for these GPUs counters must be cleared on
+ * sample to avoid loss of data due to counters
+ * saturating at their maximum value.
+ */
+ if (!clearing_samples) {
+ if (old_enable_mask == 0) {
+ /* Hardware block was previously
+ * unavailable. Accumulate the new
+ * counters only, as we know previous
+ * values are zeroes.
+ */
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < values_per_block; ctr_idx++) {
+ acc_block[ctr_idx] += new_block[ctr_idx];
+ }
+ } else {
+ /* Hardware block was previously
+ * available. Accumulate the delta
+ * between old and new counter values.
+ */
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < values_per_block; ctr_idx++) {
+ acc_block[ctr_idx] +=
+ new_block[ctr_idx] - old_block[ctr_idx];
+ }
+ }
+ } else {
+ for (ctr_idx = phys_layout->headers_per_block;
+ ctr_idx < values_per_block; ctr_idx++) {
+ acc_block[ctr_idx] += new_block[ctr_idx];
+ }
+ }
+ }
+ old_block += values_per_block;
+ new_block += values_per_block;
+ acc_block += values_per_block;
+ }
+
+ WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
+ (values_per_block * phys_layout->fw_block_cnt));
+ (void)dump_bytes;
+}
+
+static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf,
+ u32 extract_index_to_start,
+ u32 insert_index_to_stop)
+{
+ u32 raw_idx;
+ unsigned long flags;
+ u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
+ const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
+ const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
+ bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
+ u32 *old_sample_buf = backend_csf->old_sample_buf;
+ u32 *new_sample_buf = old_sample_buf;
+
+ if (extract_index_to_start == insert_index_to_stop)
+ /* No samples to accumulate. Early out. */
+ return;
+
+ /* Sync all the buffers to CPU side before read the data. */
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, extract_index_to_start,
+ insert_index_to_stop, true);
+
+ /* Consider u32 wrap case, '!=' is used here instead of '<' operator */
+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
+ /* The logical "&" acts as a modulo operation since buf_count
+ * must be a power of two.
+ */
+ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+
+ new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+ kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout,
+ buf_dump_bytes, backend_csf->accum_buf,
+ old_sample_buf, new_sample_buf,
+ clearing_samples);
+
+ old_sample_buf = new_sample_buf;
+ }
+
+ /* Save the newest buffer as the old buffer for next time. */
+ memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes);
+
+ /* Reset the prfcnt_en header on each sample before releasing them. */
+ for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) {
+ const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
+ u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
+
+ kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample);
+ }
+
+ /* Sync zeroed buffers to avoid coherency issues on future use. */
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, extract_index_to_start,
+ insert_index_to_stop, false);
+
+ /* After consuming all samples between extract_idx and insert_idx,
+ * set the raw extract index to insert_idx so that the sample buffers
+ * can be released back to the ring buffer pool.
+ */
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx,
+ insert_index_to_stop);
+ /* Update the watchdog last seen index to check any new FW auto samples
+ * in next watchdog callback.
+ */
+ backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop;
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ struct kbase_hwcnt_backend_csf *backend_csf,
+ enum kbase_hwcnt_backend_csf_enable_state new_state)
+{
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ if (backend_csf->enable_state != new_state) {
+ backend_csf->enable_state = new_state;
+
+ wake_up(&backend_csf->enable_state_waitq);
+ }
+}
+
+static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info = info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+ unsigned long flags;
+
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+
+ if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) {
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+ return;
+ }
+
+ backend_csf = csf_info->backend;
+
+ /* Only do watchdog request when all conditions are met: */
+ if (/* 1. Backend is enabled. */
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+ /* 2. FW is not in protected mode. */
+ (!csf_info->fw_in_protected_mode) &&
+ /* 3. dump state indicates no other dumping is in progress. */
+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) ||
+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) {
+ u32 extract_index;
+ u32 insert_index;
+
+ /* Read the raw extract and insert indexes from the CSF interface. */
+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index);
+
+ /* Do watchdog request if no new FW auto samples. */
+ if (insert_index == backend_csf->watchdog_last_seen_insert_idx) {
+ /* Trigger the watchdog request. */
+ csf_info->csf_if->dump_request(csf_info->csf_if->ctx);
+
+ /* A watchdog dump is required, change the state to
+ * start the request process.
+ */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED;
+ }
+ }
+
+ /* Must schedule another callback when in the transitional state because
+ * this function can be called for the first time before the performance
+ * counter enabled interrupt.
+ */
+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) ||
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) {
+ /* Reschedule the timer for next watchdog callback. */
+ csf_info->watchdog_if->modify(csf_info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+ }
+
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker.
+ * @work: Work structure.
+ *
+ * To accumulate all available samples in the ring buffer when a request has
+ * been done.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+ u32 insert_index_to_acc;
+ u32 extract_index;
+ u32 insert_index;
+
+ WARN_ON(!work);
+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ /* Assert the backend is not destroyed. */
+ WARN_ON(backend_csf != backend_csf->info->backend);
+
+ /* The backend was disabled or had an error while the worker was being
+ * launched.
+ */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+ WARN_ON(!completion_done(&backend_csf->dump_completed));
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return;
+ }
+
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED);
+
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING;
+ insert_index_to_acc = backend_csf->insert_index_to_accumulate;
+
+ /* Read the raw extract and insert indexes from the CSF interface. */
+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+ &insert_index);
+
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* Accumulate up to the insert we grabbed at the prfcnt request
+ * interrupt.
+ */
+ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index,
+ insert_index_to_acc);
+
+ /* Copy to the user buffer so if a threshold interrupt fires
+ * between now and get(), the accumulations are untouched.
+ */
+ kbasep_hwcnt_backend_csf_update_user_sample(backend_csf);
+
+ /* Dump done, set state back to COMPLETED for next request. */
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ /* Assert the backend is not destroyed. */
+ WARN_ON(backend_csf != backend_csf->info->backend);
+
+ /* The backend was disabled or had an error while we were accumulating.
+ */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+ WARN_ON(!completion_done(&backend_csf->dump_completed));
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return;
+ }
+
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING);
+
+ /* Our work here is done - set the wait object and unblock waiters. */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ complete_all(&backend_csf->dump_completed);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker.
+ *
+ * @work: Work structure.
+ *
+ * Called when a HWC threshold interrupt raised to consume all available samples
+ * in the ring buffer.
+ */
+static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+ u32 extract_index;
+ u32 insert_index;
+
+ WARN_ON(!work);
+
+ backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work);
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ /* Assert the backend is not destroyed. */
+ WARN_ON(backend_csf != backend_csf->info->backend);
+
+ /* Read the raw extract and insert indexes from the CSF interface. */
+ backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index,
+ &insert_index);
+
+ /* The backend was disabled or had an error while the worker was being
+ * launched.
+ */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return;
+ }
+
+ /* Early out if we are not in the IDLE state or COMPLETED state, as this
+ * means a concurrent dump is in progress and we don't want to
+ * interfere.
+ */
+ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return;
+ }
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* Accumulate everything we possibly can. We grabbed the insert index
+ * immediately after we acquired the lock but before we checked whether
+ * a concurrent dump was triggered. This ensures that if a concurrent
+ * dump was triggered between releasing the lock and now, we know for a
+ * fact that our insert will not exceed the concurrent dump's
+ * insert_to_accumulate, so we don't risk accumulating too much data.
+ */
+ kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index);
+
+ /* No need to wake up anything since it is not a user dump request. */
+}
+
+static void
+kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info)
+{
+ u32 extract_index;
+
+ WARN_ON(!csf_info);
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info));
+ WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED);
+ WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT);
+
+ /* Save insert index now so that the dump worker only accumulates the
+ * HWC data associated with this request. Extract index is not stored
+ * as that needs to be checked when accumulating to prevent re-reading
+ * buffers that have already been read and returned to the GPU.
+ */
+ csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index,
+ &csf_info->backend->insert_index_to_accumulate);
+ csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED;
+
+ /* Submit the accumulator task into the work queue. */
+ queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work);
+}
+
+static void
+kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+ enum kbase_hwcnt_physical_set phys_counter_set;
+ struct kbase_hwcnt_physical_enable_map phys_enable_map;
+
+ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
+
+ /* process the enable_map to guarantee the block header is enabled which
+ * is needed for delta calculation.
+ */
+ kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map);
+
+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set);
+
+ /* Use processed enable_map to enable HWC in HW level. */
+ enable->fe_bm = phys_enable_map.fe_bm;
+ enable->shader_bm = phys_enable_map.shader_bm;
+ enable->tiler_bm = phys_enable_map.tiler_bm;
+ enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+ enable->counter_set = phys_counter_set;
+ enable->clk_enable_map = enable_map->clk_enable_map;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int
+kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ struct kbase_hwcnt_backend_csf_if_enable enable;
+ int err;
+
+ if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata))
+ return -EINVAL;
+
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable);
+
+ /* enable_state should be DISABLED before we transfer it to enabled */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)
+ return -EIO;
+
+ err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS,
+ kbasep_hwcnt_backend_watchdog_timer_cb,
+ backend_csf->info);
+ if (err)
+ return err;
+
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+ WARN_ON(!completion_done(&backend_csf->dump_completed));
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED);
+
+ backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, &enable);
+
+ kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map);
+
+ return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode;
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+ if (!backend_csf)
+ return -EINVAL;
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map);
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return errcode;
+}
+
+static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
+ struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags)
+{
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) ||
+ (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags);
+
+ wait_event(backend_csf->enable_state_waitq,
+ (backend_csf->enable_state !=
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) &&
+ (backend_csf->enable_state !=
+ KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED));
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags);
+ }
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ bool do_disable = false;
+
+ WARN_ON(!backend_csf);
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ /* Make sure we wait until any previous enable or disable have completed
+ * before doing anything.
+ */
+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
+
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED ||
+ backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+ /* If we are already disabled or in an unrecoverable error
+ * state, there is nothing for us to do.
+ */
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return;
+ }
+
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+ complete_all(&backend_csf->dump_completed);
+ /* Only disable if we were previously enabled - in all other
+ * cases the call to disable will have already been made.
+ */
+ do_disable = true;
+ }
+
+ WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE);
+ WARN_ON(!completion_done(&backend_csf->dump_completed));
+
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* Deregister the timer and block until any timer callback has completed.
+ * We've transitioned out of the ENABLED state so we can guarantee it
+ * won't reschedule itself.
+ */
+ backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer);
+
+ /* Block until any async work has completed. We have transitioned out of
+ * the ENABLED state so we can guarantee no new work will concurrently
+ * be submitted.
+ */
+ flush_workqueue(backend_csf->hwc_dump_workq);
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ if (do_disable)
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+
+ kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags);
+
+ switch (backend_csf->enable_state) {
+ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+ break;
+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+ break;
+ default:
+ WARN_ON(true);
+ break;
+ }
+
+ backend_csf->user_requested = false;
+ backend_csf->watchdog_last_seen_insert_idx = 0;
+
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* After disable, zero the header of all buffers in the ring buffer back
+ * to 0 to prepare for the next enable.
+ */
+ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+ /* Sync zeroed buffers to avoid coherency issues on future use. */
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, 0,
+ backend_csf->info->ring_buf_cnt, false);
+
+ /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare
+ * for next enable.
+ */
+ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf);
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ bool do_request = false;
+ bool watchdog_dumping = false;
+
+ if (!backend_csf)
+ return -EINVAL;
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+
+ /* If we're transitioning to enabled there's nothing to accumulate, and
+ * the user dump buffer is already zeroed. We can just short circuit to
+ * the DUMP_COMPLETED state.
+ */
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+ kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+ backend_csf->user_requested = true;
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return 0;
+ }
+
+ /* Otherwise, make sure we're already enabled. */
+ if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ return -EIO;
+ }
+
+ /* Make sure that this is either the first request since enable or the
+ * previous user dump has completed or a watchdog dump is in progress,
+ * so we can avoid midway through a user dump.
+ * If user request comes while a watchdog dumping is in progress,
+ * the user request takes the ownership of the watchdog dumping sample by
+ * changing the dump_state so the interrupt for the watchdog
+ * request can be processed instead of ignored.
+ */
+ if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) &&
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) &&
+ (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) {
+ /* HWC is disabled or another user dump is ongoing,
+ * or we're on fault.
+ */
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ /* HWC is disabled or another dump is ongoing, or we are on
+ * fault.
+ */
+ return -EIO;
+ }
+
+ /* Reset the completion so dump_wait() has something to wait on. */
+ reinit_completion(&backend_csf->dump_completed);
+
+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)
+ watchdog_dumping = true;
+
+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) &&
+ !backend_csf->info->fw_in_protected_mode) {
+ /* Only do the request if we are fully enabled and not in
+ * protected mode.
+ */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED;
+ do_request = true;
+ } else {
+ /* Skip the request and waiting for ack and go straight to
+ * checking the insert and kicking off the worker to do the dump
+ */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+ }
+
+ /* CSF firmware might enter protected mode now, but still call request.
+ * That is fine, as we changed state while holding the lock, so the
+ * protected mode enter function will query the insert and launch the
+ * dumping worker.
+ * At some point we will get the dump request ACK saying a dump is done,
+ * but we can ignore it if we are not in the REQUESTED state and process
+ * it in next round dumping worker.
+ */
+
+ *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend);
+ kbasep_hwcnt_backend_csf_cc_update(backend_csf);
+ backend_csf->user_requested = true;
+
+ if (do_request) {
+ /* If a watchdog dumping is in progress, don't need to do
+ * another request, just update the dump_state and take the
+ * ownership of the sample which watchdog requested.
+ */
+ if (!watchdog_dumping)
+ backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx);
+ } else
+ kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info);
+
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* Modify watchdog timer to delay the regular check time since
+ * just requested.
+ */
+ backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer,
+ HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS);
+
+ return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ int errcode;
+
+ if (!backend_csf)
+ return -EINVAL;
+
+ wait_for_completion(&backend_csf->dump_completed);
+
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ /* Make sure the last dump actually succeeded when user requested is
+ * set.
+ */
+ if (backend_csf->user_requested &&
+ ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) ||
+ (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)))
+ errcode = 0;
+ else
+ errcode = -EIO;
+
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ int errcode;
+ u64 ts;
+
+ if (!backend_csf)
+ return -EINVAL;
+
+ /* Request a dump so we can clear all current counters. */
+ errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts);
+ if (!errcode)
+ /* Wait for the manual dump or auto dump to be done and
+ * accumulator to be updated.
+ */
+ errcode = kbasep_hwcnt_backend_csf_dump_wait(backend);
+
+ return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
+{
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+ int ret;
+ size_t clk;
+
+ if (!backend_csf || !dst || !dst_enable_map ||
+ (backend_csf->info->metadata != dst->metadata) ||
+ (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ /* Extract elapsed cycle count for each clock domain if enabled. */
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+ continue;
+
+ /* Reset the counter to zero if accumulation is off. */
+ if (!accumulate)
+ dst->clk_cnt_buf[clk] = 0;
+ dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk];
+ }
+
+ /* We just return the user buffer without checking the current state,
+ * as it is undefined to call this function without a prior succeeding
+ * one to dump_wait().
+ */
+ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate);
+
+ return ret;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend.
+ * @backend_csf: Pointer to CSF backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ if (!backend_csf)
+ return;
+
+ destroy_workqueue(backend_csf->hwc_dump_workq);
+
+ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf);
+
+ kfree(backend_csf->accum_buf);
+ backend_csf->accum_buf = NULL;
+
+ kfree(backend_csf->old_sample_buf);
+ backend_csf->old_sample_buf = NULL;
+
+ kfree(backend_csf->to_user_buf);
+ backend_csf->to_user_buf = NULL;
+
+ kfree(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance.
+ *
+ * @csf_info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info,
+ struct kbase_hwcnt_backend_csf **out_backend)
+{
+ struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+ int errcode = -ENOMEM;
+
+ WARN_ON(!csf_info);
+ WARN_ON(!out_backend);
+
+ backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL);
+ if (!backend_csf)
+ goto alloc_error;
+
+ backend_csf->info = csf_info;
+ kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout);
+
+ backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+ if (!backend_csf->accum_buf)
+ goto err_alloc_acc_buf;
+
+ backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL);
+ if (!backend_csf->old_sample_buf)
+ goto err_alloc_pre_sample_buf;
+
+ backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL);
+ if (!backend_csf->to_user_buf)
+ goto err_alloc_user_sample_buf;
+
+ errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
+ &backend_csf->ring_buf_cpu_base,
+ &backend_csf->ring_buf);
+ if (errcode)
+ goto err_ring_buf_alloc;
+ errcode = -ENOMEM;
+
+ /* Zero all performance enable header to prepare for first enable. */
+ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf);
+
+ /* Sync zeroed buffers to avoid coherency issues on use. */
+ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf, 0,
+ backend_csf->info->ring_buf_cnt, false);
+
+ init_completion(&backend_csf->dump_completed);
+
+ init_waitqueue_head(&backend_csf->enable_state_waitq);
+
+ /* Allocate a single threaded work queue for dump worker and threshold
+ * worker.
+ */
+ backend_csf->hwc_dump_workq =
+ alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!backend_csf->hwc_dump_workq)
+ goto err_alloc_workqueue;
+
+ INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker);
+ INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker);
+
+ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED;
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+ complete_all(&backend_csf->dump_completed);
+ backend_csf->user_requested = false;
+ backend_csf->watchdog_last_seen_insert_idx = 0;
+
+ *out_backend = backend_csf;
+ return 0;
+
+err_alloc_workqueue:
+ backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
+ backend_csf->ring_buf);
+err_ring_buf_alloc:
+ kfree(backend_csf->to_user_buf);
+ backend_csf->to_user_buf = NULL;
+err_alloc_user_sample_buf:
+ kfree(backend_csf->old_sample_buf);
+ backend_csf->old_sample_buf = NULL;
+err_alloc_pre_sample_buf:
+ kfree(backend_csf->accum_buf);
+ backend_csf->accum_buf = NULL;
+err_alloc_acc_buf:
+ kfree(backend_csf);
+alloc_error:
+ return errcode;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = NULL;
+ struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info;
+ int errcode;
+ bool success = false;
+
+ if (!info || !out_backend)
+ return -EINVAL;
+
+ /* Create the backend. */
+ errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf);
+ if (errcode)
+ return errcode;
+
+ /* If it was not created before, attach it to csf_info.
+ * Use spin lock to avoid concurrent initialization.
+ */
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ if (csf_info->backend == NULL) {
+ csf_info->backend = backend_csf;
+ *out_backend = (struct kbase_hwcnt_backend *)backend_csf;
+ success = true;
+ if (csf_info->unrecoverable_error_happened)
+ backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR;
+ }
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ /* Destroy the new created backend if the backend has already created
+ * before. In normal case, this won't happen if the client call init()
+ * function properly.
+ */
+ if (!success) {
+ kbasep_hwcnt_backend_csf_destroy(backend_csf);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
+
+ if (!backend)
+ return;
+
+ kbasep_hwcnt_backend_csf_dump_disable(backend);
+
+ /* Set the backend in csf_info to NULL so we won't handle any external
+ * notification anymore since we are terminating.
+ */
+ backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
+ backend_csf->info->backend = NULL;
+ backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+
+ kbasep_hwcnt_backend_csf_destroy(backend_csf);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ *
+ */
+static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info)
+{
+ if (!info)
+ return;
+
+ /* The backend should be destroyed before the info object destroy. */
+ WARN_ON(info->backend != NULL);
+
+ /* The metadata should be destroyed before the info object destroy. */
+ WARN_ON(info->metadata != NULL);
+
+ kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info.
+ *
+ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure
+ * used to create backend interface.
+ * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer.
+ * MUST be power of 2.
+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create
+ * backend interface.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ const struct kbase_hwcnt_backend_csf_info **out_info)
+{
+ struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+ if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) ||
+ WARN_ON(!is_power_of_2(ring_buf_cnt)))
+ return -EINVAL;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ *info = (struct kbase_hwcnt_backend_csf_info)
+ {
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+ .counter_set = KBASE_HWCNT_SET_SECONDARY,
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+ .counter_set = KBASE_HWCNT_SET_TERTIARY,
+#else
+ /* Default to primary */
+ .counter_set = KBASE_HWCNT_SET_PRIMARY,
+#endif
+ .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt,
+ .fw_in_protected_mode = false, .unrecoverable_error_happened = false,
+ .watchdog_if = watchdog_if,
+ };
+ *out_info = info;
+
+ return 0;
+}
+
+/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+ if (!info)
+ return NULL;
+
+ WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata);
+
+ return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata;
+}
+
+static void
+kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ bool do_disable = false;
+
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ /* We are already in or transitioning to the unrecoverable error state.
+ * Early out.
+ */
+ if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) ||
+ (backend_csf->enable_state ==
+ KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER))
+ return;
+
+ /* If we are disabled, we know we have no pending workers, so skip the
+ * waiting state.
+ */
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR);
+ return;
+ }
+
+ /* Trigger a disable only if we are not already transitioning to
+ * disabled, we don't want to disable twice if an unrecoverable error
+ * happens while we are disabling.
+ */
+ do_disable =
+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER);
+
+ /* Transition the dump to the IDLE state and unblock any waiters. The
+ * IDLE state signifies an error.
+ */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+ complete_all(&backend_csf->dump_completed);
+
+ /* Trigger a disable only if we are not already transitioning to
+ * disabled, - we don't want to disable twice if an unrecoverable error
+ * happens while we are disabling.
+ */
+ if (do_disable)
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+}
+
+static void
+kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf)
+{
+ backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+
+ switch (backend_csf->enable_state) {
+ case KBASE_HWCNT_BACKEND_CSF_DISABLED:
+ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED:
+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:
+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+ /* Already disabled or disabling, or in an unrecoverable error.
+ * Nothing to be done to handle the error.
+ */
+ return;
+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
+ /* A seemingly recoverable error that occurs while we are
+ * transitioning to enabled is probably unrecoverable.
+ */
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf);
+ return;
+ case KBASE_HWCNT_BACKEND_CSF_ENABLED:
+ /* Start transitioning to the disabled state. We can't wait for
+ * it as this recoverable error might be triggered from an
+ * interrupt. The wait will be done in the eventual call to
+ * disable().
+ */
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED);
+ /* Transition the dump to the IDLE state and unblock any
+ * waiters. The IDLE state signifies an error.
+ */
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE;
+ complete_all(&backend_csf->dump_completed);
+
+ backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx);
+ return;
+ }
+}
+
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info =
+ (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+ csf_info->fw_in_protected_mode = true;
+
+ /* Call on_prfcnt_sample() to trigger collection of the protected mode
+ * entry auto-sample if there is currently a pending dump request.
+ */
+ kbase_hwcnt_backend_csf_on_prfcnt_sample(iface);
+}
+
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+ csf_info->fw_in_protected_mode = false;
+}
+
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+ csf_info->unrecoverable_error_happened = true;
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+ return;
+ }
+
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags);
+ csf_info->unrecoverable_error_happened = false;
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) {
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+ return;
+ }
+ backend_csf = csf_info->backend;
+
+ if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) &&
+ (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) {
+ /* Before a reset occurs, we must either have been disabled
+ * (else we lose data) or we should have encountered an
+ * unrecoverable error. Either way, we will have disabled the
+ * interface and waited for any workers that might have still
+ * been in flight.
+ * If not in these states, fire off one more disable to make
+ * sure everything is turned off before the power is pulled.
+ * We can't wait for this disable to complete, but it doesn't
+ * really matter, the power is being pulled.
+ */
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+ }
+
+ /* A reset is the only way to exit the unrecoverable error state */
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) {
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED);
+ }
+
+ csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+ return;
+ backend_csf = csf_info->backend;
+
+ /* Skip the dump_work if it's a watchdog request. */
+ if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) {
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED;
+ return;
+ }
+
+ /* If the current state is not REQUESTED, this HWC sample will be
+ * skipped and processed in next dump_request.
+ */
+ if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED)
+ return;
+ backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT;
+
+ kbase_hwcnt_backend_csf_submit_dump_worker(csf_info);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+ return;
+ backend_csf = csf_info->backend;
+
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED)
+ /* Submit the threshold work into the work queue to consume the
+ * available samples.
+ */
+ queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+ return;
+
+ /* Called when an overflow occurs. We treat this as a recoverable error,
+ * so we start transitioning to the disabled state.
+ * We could try and handle it while enabled, but in a real system we
+ * never expect an overflow to occur so there is no point implementing
+ * complex recovery code when we can just turn ourselves off instead for
+ * a while.
+ */
+ kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend);
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+ return;
+ backend_csf = csf_info->backend;
+
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) {
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED);
+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) {
+ /* Unexpected, but we are already in the right state so just
+ * ignore it.
+ */
+ } else {
+ /* Unexpected state change, assume everything is broken until
+ * we reset.
+ */
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+ }
+}
+
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_backend_csf *backend_csf;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
+
+ /* Early out if the backend does not exist. */
+ if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info))
+ return;
+ backend_csf = csf_info->backend;
+
+ if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) {
+ kbasep_hwcnt_backend_csf_change_es_and_wake_waiters(
+ backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER);
+ } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) {
+ /* Unexpected, but we are already in the right state so just
+ * ignore it.
+ */
+ } else {
+ /* Unexpected state change, assume everything is broken until
+ * we reset.
+ */
+ kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend);
+ }
+}
+
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+ struct kbase_hwcnt_gpu_info gpu_info;
+
+ if (!iface)
+ return -EINVAL;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ WARN_ON(!csf_info->csf_if->get_prfcnt_info);
+
+ csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info);
+
+ /* The clock domain counts should not exceed the number of maximum
+ * number of clock regulators.
+ */
+ if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
+ return -EIO;
+
+ gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
+ gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+ gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
+ gpu_info.prfcnt_values_per_block =
+ csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
+ return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set,
+ &csf_info->metadata);
+}
+
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+
+ if (!iface)
+ return;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+ if (csf_info->metadata) {
+ kbase_hwcnt_csf_metadata_destroy(csf_info->metadata);
+ csf_info->metadata = NULL;
+ }
+}
+
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ int errcode;
+ const struct kbase_hwcnt_backend_csf_info *info = NULL;
+
+ if (!iface || !csf_if || !watchdog_if)
+ return -EINVAL;
+
+ /* The buffer count must be power of 2 */
+ if (!is_power_of_2(ring_buf_cnt))
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info);
+ if (errcode)
+ return errcode;
+
+ iface->info = (struct kbase_hwcnt_backend_info *)info;
+ iface->metadata = kbasep_hwcnt_backend_csf_metadata;
+ iface->init = kbasep_hwcnt_backend_csf_init;
+ iface->term = kbasep_hwcnt_backend_csf_term;
+ iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns;
+ iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable;
+ iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock;
+ iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable;
+ iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear;
+ iface->dump_request = kbasep_hwcnt_backend_csf_dump_request;
+ iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait;
+ iface->dump_get = kbasep_hwcnt_backend_csf_dump_get;
+
+ return 0;
+}
+
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+ if (!iface)
+ return;
+
+ kbasep_hwcnt_backend_csf_info_destroy(
+ (const struct kbase_hwcnt_backend_csf_info *)iface->info);
+ memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
new file mode 100644
index 0000000..9c5a5c9
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
+ * interface.
+ * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure
+ * used to create backend interface.
+ * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring
+ * buffer, MUST be power of 2.
+ * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used
+ * to create backend interface.
+ * @iface: Non-NULL pointer to backend interface structure that is filled
+ * in on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt,
+ struct kbase_hwcnt_watchdog_interface *watchdog_if,
+ struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF
+ * hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF
+ * hardware counter backend.
+ * @iface: Non-NULL pointer to backend interface structure.
+ */
+void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend
+ * interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive
+ * notification that protected mode
+ * has been entered.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive
+ * notification that protected mode has
+ * been exited.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function
+ * called when unrecoverable
+ * errors are detected.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ *
+ * This should be called on encountering errors that can only be recovered from
+ * with reset, or that may put HWC logic in state that could result in hang. For
+ * example, on bus error, or when FW becomes unresponsive.
+ */
+void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be
+ * called immediately before a
+ * reset. Takes us out of the
+ * unrecoverable error state, if we
+ * were in it.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
+ * complete interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter
+ * buffer reach threshold
+ * interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer
+ * overflow interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled
+ * interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter
+ * disabled interrupt handler.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ */
+void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
new file mode 100644
index 0000000..382a3ad
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for CSF hardware counter backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_backend_csf_if_ctx;
+
+struct kbase_hwcnt_backend_csf_if_ring_buf;
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection
+ * structure.
+ * @fe_bm: Front End counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm: Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ * @counter_set: The performance counter set to enable.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ * counter for a given clock domain.
+ */
+struct kbase_hwcnt_backend_csf_if_enable {
+ u32 fe_bm;
+ u32 shader_bm;
+ u32 tiler_bm;
+ u32 mmu_l2_bm;
+ u8 counter_set;
+ u64 clk_enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter
+ * information.
+ * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware
+ * counters are sub-divided into 4 classes: front-end, shader, tiler, and
+ * memory system (l2 cache + MMU).
+ * @prfcnt_fw_size: Total length in bytes of all the firmware counters data.
+ * @dump_bytes: Bytes of GPU memory required to perform a performance
+ * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size.
+ * @prfcnt_block_size: Bytes of each performance counter block.
+ * @l2_count: The MMU L2 cache count.
+ * @core_mask: Shader core mask.
+ * @clk_cnt: Clock domain count in the system.
+ * @clearing_samples: Indicates whether counters are cleared after each sample
+ * is taken.
+ */
+struct kbase_hwcnt_backend_csf_if_prfcnt_info {
+ size_t prfcnt_hw_size;
+ size_t prfcnt_fw_size;
+ size_t dump_bytes;
+ size_t prfcnt_block_size;
+ size_t l2_count;
+ u64 core_mask;
+ u8 clk_cnt;
+ bool clearing_samples;
+};
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the
+ * backend spinlock is
+ * held.
+ * @ctx: Non-NULL pointer to a CSF context.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
+ *
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @flags: Pointer to the memory location that would store the previous
+ * interrupt state.
+ */
+typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
+ *
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @flags: Previously stored interrupt state when Scheduler interrupt
+ * spinlock was acquired.
+ */
+typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long flags);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
+ * counter information.
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @prfcnt_info: Non-NULL pointer to struct where performance counter
+ * information should be stored.
+ */
+typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer
+ * for CSF interface.
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @buf_count: The buffer count in the ring buffer to be allocated,
+ * MUST be power of 2.
+ * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is
+ * stored when success.
+ * @ring_buf: Non-NULL pointer to where ring buffer is stored when success.
+ *
+ * A ring buffer is needed by the CSF interface to do manual HWC sample and
+ * automatic HWC samples, the buffer count in the ring buffer MUST be power
+ * of 2 to meet the hardware requirement.
+ *
+ * Return: 0 on success, else error code.
+ */
+typedef int
+kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 buf_count, void **cpu_dump_base,
+ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
+ * memory.
+ * @ctx: Non-NULL pointer to a CSF context.
+ * @ring_buf: Non-NULL pointer to the ring buffer.
+ * @buf_index_first: The first buffer index in the ring buffer to be synced,
+ * inclusive.
+ * @buf_index_last: The last buffer index in the ring buffer to be synced,
+ * exclusive.
+ * @for_cpu: The direction of sync to be applied, set to true when CPU
+ * cache needs invalidating before reading the buffer, and set
+ * to false after CPU writes to flush these before this memory
+ * is overwritten by the GPU.
+ *
+ * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
+ * are correctly observed.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ u32 buf_index_first, u32 buf_index_last, bool for_cpu);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
+ * the CSF interface.
+ *
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
+ * timestamp of the CSF
+ * interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Return: CSF interface timestamp in nanoseconds.
+ */
+typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
+ * counter in CSF interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC.
+ * @enable: Non-NULL pointer to the enable map of HWC.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ struct kbase_hwcnt_backend_csf_if_enable *enable);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
+ * in CSF interface.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
+ *
+ * @ctx: Non-NULL pointer to the interface context.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
+ * insert indexes of the
+ * ring buffer.
+ *
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ * @extract_index: Non-NULL pointer where current extract index to be saved.
+ * @insert_index: Non-NULL pointer where current insert index to be saved.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 *extract_index, u32 *insert_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
+ * index of the ring
+ * buffer.
+ *
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ * @extract_index: New extract index to be set.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 extract_index);
+
+/**
+ * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
+ * GPU cycle count.
+ * @ctx: Non-NULL pointer to a CSF interface context.
+ * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved,
+ * the array size should be at least as big as the number of
+ * clock domains returned by get_prfcnt_info interface.
+ * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock
+ * domain.
+ *
+ * Requires lock to be taken before calling.
+ */
+typedef void
+kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u64 *cycle_counts, u64 clk_enable_map);
+
+/**
+ * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
+ * interface.
+ * @ctx: CSF interface context.
+ * @assert_lock_held: Function ptr to assert backend spinlock is held.
+ * @lock: Function ptr to acquire backend spinlock.
+ * @unlock: Function ptr to release backend spinlock.
+ * @get_prfcnt_info: Function ptr to get performance counter related
+ * information.
+ * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC.
+ * @ring_buf_sync: Function ptr to sync ring buffer to CPU.
+ * @ring_buf_free: Function ptr to free ring buffer for CSF HWC.
+ * @timestamp_ns: Function ptr to get the current CSF interface
+ * timestamp.
+ * @dump_enable: Function ptr to enable dumping.
+ * @dump_disable: Function ptr to disable dumping.
+ * @dump_request: Function ptr to request a dump.
+ * @get_indexes: Function ptr to get extract and insert indexes of the
+ * ring buffer.
+ * @set_extract_index: Function ptr to set extract index of ring buffer.
+ * @get_gpu_cycle_count: Function ptr to get the GPU cycle count.
+ */
+struct kbase_hwcnt_backend_csf_if {
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx;
+ kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held;
+ kbase_hwcnt_backend_csf_if_lock_fn *lock;
+ kbase_hwcnt_backend_csf_if_unlock_fn *unlock;
+ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info;
+ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc;
+ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync;
+ kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free;
+ kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns;
+ kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable;
+ kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable;
+ kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request;
+ kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes;
+ kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index;
+ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count;
+};
+
+#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
new file mode 100644
index 0000000..a3a0e02
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -0,0 +1,784 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * CSF GPU HWC backend firmware interface APIs.
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_regmap.h>
+#include <device/mali_kbase_device.h>
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include <csf/mali_kbase_csf_registers.h>
+
+#include "csf/mali_kbase_csf_firmware.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include <linux/log2.h>
+#include "mali_kbase_ccswe.h"
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include <backend/gpu/mali_kbase_model_dummy.h>
+#endif /* CONFIG_MALI_NO_MALI */
+
+/* Ring buffer virtual address start at 4GB */
+#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface
+ * used to save the manual and
+ * auto HWC samples from
+ * firmware.
+ * @gpu_dump_base: Starting GPU base address of the ring buffer.
+ * @cpu_dump_base: Starting CPU address for the mapping.
+ * @buf_count: Buffer count in the ring buffer, MUST be power of 2.
+ * @as_nr: Address space number for the memory mapping.
+ * @phys: Physical memory allocation used by the mapping.
+ * @num_pages: Size of the mapping, in memory pages.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ring_buf {
+ u64 gpu_dump_base;
+ void *cpu_dump_base;
+ size_t buf_count;
+ u32 as_nr;
+ struct tagged_addr *phys;
+ size_t num_pages;
+};
+
+/**
+ * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF
+ * interface, used to communicate
+ * with firmware.
+ * @kbdev: KBase device.
+ * @buf_bytes: The size in bytes for each buffer in the ring buffer.
+ * @clk_cnt: The number of clock domains in the system.
+ * The maximum is 64.
+ * @clk_enable_map: Bitmask of enabled clocks
+ * @rate_listener: Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ */
+struct kbase_hwcnt_backend_csf_if_fw_ctx {
+ struct kbase_device *kbdev;
+ size_t buf_bytes;
+ u8 clk_cnt;
+ u64 clk_enable_map;
+ struct kbase_clk_rate_listener rate_listener;
+ struct kbase_ccswe ccswe_shader_cores;
+};
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock(kbdev, flags);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long flags)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+
+ WARN_ON(!ctx);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback
+ *
+ * @rate_listener: Callback state
+ * @clk_index: Clock index
+ * @clk_rate_hz: Clock frequency(hz)
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+ u32 clk_index, u32 clk_rate_hz)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of(
+ rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener);
+ u64 timestamp_ns;
+
+ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+ return;
+
+ timestamp_ns = ktime_get_raw_ns();
+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking
+ *
+ * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
+ * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx,
+ u64 clk_enable_map)
+{
+ struct kbase_device *kbdev = fw_ctx->kbdev;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ /* software estimation for non-top clock domains */
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+ u32 cur_freq;
+ unsigned long flags;
+ u64 timestamp_ns;
+
+ timestamp_ns = ktime_get_raw_ns();
+
+ spin_lock_irqsave(&rtm->lock, flags);
+
+ cur_freq = (u32)clk_data->clock_val;
+ kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores);
+ kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq);
+
+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener);
+
+ spin_unlock_irqrestore(&rtm->lock, flags);
+ }
+
+ fw_ctx->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking
+ *
+ * @fw_ctx: Non-NULL pointer to CSF firmware interface context.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+ struct kbase_device *kbdev = fw_ctx->kbdev;
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ u64 clk_enable_map = fw_ctx->clk_enable_map;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES))
+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
+{
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+ .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
+ .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+ .prfcnt_hw_size =
+ KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .prfcnt_fw_size =
+ KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE,
+ .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE,
+ .clk_cnt = 1,
+ .clearing_samples = true,
+ };
+
+ fw_ctx->buf_bytes = prfcnt_info->dump_bytes;
+#else
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx;
+ struct kbase_device *kbdev;
+ u32 prfcnt_size;
+ u32 prfcnt_hw_size;
+ u32 prfcnt_fw_size;
+ u32 prfcnt_block_size =
+ KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
+
+ WARN_ON(!ctx);
+ WARN_ON(!prfcnt_info);
+
+ fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ kbdev = fw_ctx->kbdev;
+ prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
+ prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
+ prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
+ fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
+
+ /* Read the block size if the GPU has the register PRFCNT_FEATURES
+ * which was introduced in architecture version 11.x.7.
+ */
+ if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
+ GPU_ID2_PRODUCT_TTUX) {
+ prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
+ kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
+ << 8;
+ }
+
+ *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
+ .prfcnt_hw_size = prfcnt_hw_size,
+ .prfcnt_fw_size = prfcnt_fw_size,
+ .dump_bytes = fw_ctx->buf_bytes,
+ .prfcnt_block_size = prfcnt_block_size,
+ .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
+ .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
+ .clk_cnt = fw_ctx->clk_cnt,
+ .clearing_samples = true,
+ };
+
+ /* Block size must be multiple of counter size. */
+ WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
+ /* Total size must be multiple of block size. */
+ WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0);
+#endif
+}
+
+static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
+ struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf)
+{
+ struct kbase_device *kbdev;
+ struct tagged_addr *phys;
+ struct page **page_list;
+ void *cpu_addr;
+ int ret;
+ int i;
+ size_t num_pages;
+ u64 flags;
+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
+
+ pgprot_t cpu_map_prot = PAGE_KERNEL;
+ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ WARN_ON(!ctx);
+ WARN_ON(!cpu_dump_base);
+ WARN_ON(!out_ring_buf);
+
+ kbdev = fw_ctx->kbdev;
+
+ /* The buffer count must be power of 2 */
+ if (!is_power_of_2(buf_count))
+ return -EINVAL;
+
+ /* alignment failure */
+ if (gpu_va_base & (2048 - 1))
+ return -EINVAL;
+
+ fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL);
+ if (!fw_ring_buf)
+ return -ENOMEM;
+
+ num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count);
+ phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL);
+ if (!phys)
+ goto phys_alloc_error;
+
+ page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL);
+ if (!page_list)
+ goto page_list_alloc_error;
+
+ /* Get physical page for the buffer */
+ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
+ phys, false);
+ if (ret != num_pages)
+ goto phys_mem_pool_alloc_error;
+
+ /* Get the CPU virtual address */
+ for (i = 0; i < num_pages; i++)
+ page_list[i] = as_page(phys[i]);
+
+ cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot);
+ if (!cpu_addr)
+ goto vmap_error;
+
+ flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+
+ /* Update MMU table */
+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
+ num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
+ mmu_sync_info);
+ if (ret)
+ goto mmu_insert_failed;
+
+ kfree(page_list);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr;
+#else
+ fw_ring_buf->gpu_dump_base = gpu_va_base;
+#endif /* CONFIG_MALI_NO_MALI */
+ fw_ring_buf->cpu_dump_base = cpu_addr;
+ fw_ring_buf->phys = phys;
+ fw_ring_buf->num_pages = num_pages;
+ fw_ring_buf->buf_count = buf_count;
+ fw_ring_buf->as_nr = MCU_AS_NR;
+
+ *cpu_dump_base = fw_ring_buf->cpu_dump_base;
+ *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf;
+
+ return 0;
+
+mmu_insert_failed:
+ vunmap(cpu_addr);
+vmap_error:
+ kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys,
+ false, false);
+phys_mem_pool_alloc_error:
+ kfree(page_list);
+page_list_alloc_error:
+ kfree(phys);
+phys_alloc_error:
+ kfree(fw_ring_buf);
+ return -ENOMEM;
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ u32 buf_index_first, u32 buf_index_last, bool for_cpu)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ size_t i;
+ size_t pg_first;
+ size_t pg_last;
+ u64 start_address;
+ u64 stop_address;
+ u32 ring_buf_index_first;
+ u32 ring_buf_index_last;
+
+ WARN_ON(!ctx);
+ WARN_ON(!ring_buf);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* When using the dummy backend syncing the ring buffer is unnecessary as
+ * the ring buffer is only accessed by the CPU. It may also cause data loss
+ * due to cache invalidation so return early.
+ */
+ return;
+#endif /* CONFIG_MALI_NO_MALI */
+
+ /* The index arguments for this function form an inclusive, exclusive
+ * range.
+ * However, when masking back to the available buffers we will make this
+ * inclusive at both ends so full flushes are not 0 -> 0.
+ */
+ ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1);
+ ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1);
+
+ /* The start address is the offset of the first buffer. */
+ start_address = fw_ctx->buf_bytes * ring_buf_index_first;
+ pg_first = start_address >> PAGE_SHIFT;
+
+ /* The stop address is the last byte in the final buffer. */
+ stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1;
+ pg_last = stop_address >> PAGE_SHIFT;
+
+ /* Check whether the buffer range wraps. */
+ if (start_address > stop_address) {
+ /* sync the first part to the end of ring buffer. */
+ for (i = pg_first; i < fw_ring_buf->num_pages; i++) {
+ struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+ if (for_cpu) {
+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg),
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ } else {
+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg),
+ PAGE_SIZE, DMA_BIDIRECTIONAL);
+ }
+ }
+
+ /* second part starts from page 0. */
+ pg_first = 0;
+ }
+
+ for (i = pg_first; i <= pg_last; i++) {
+ struct page *pg = as_page(fw_ring_buf->phys[i]);
+
+ if (for_cpu) {
+ kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ } else {
+ kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+ }
+}
+
+static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+ CSTD_UNUSED(ctx);
+ return ktime_get_raw_ns();
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ if (!fw_ring_buf)
+ return;
+
+ if (fw_ring_buf->phys) {
+ u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
+
+ WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
+ gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
+ fw_ring_buf->num_pages, MCU_AS_NR));
+
+ vunmap(fw_ring_buf->cpu_dump_base);
+
+ kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
+
+ kfree(fw_ring_buf->phys);
+
+ kfree(fw_ring_buf);
+ }
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ struct kbase_hwcnt_backend_csf_if_enable *enable)
+{
+ u32 prfcnt_config;
+ struct kbase_device *kbdev;
+ struct kbase_csf_global_iface *global_iface;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
+ (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+
+ WARN_ON(!ctx);
+ WARN_ON(!ring_buf);
+ WARN_ON(!enable);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ kbdev = fw_ctx->kbdev;
+ global_iface = &kbdev->csf.global_iface;
+
+ /* Configure */
+ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
+ prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set);
+
+ /* Configure the ring buffer base address */
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO,
+ fw_ring_buf->gpu_dump_base & U32_MAX);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI,
+ fw_ring_buf->gpu_dump_base >> 32);
+
+ /* Set extract position to 0 */
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0);
+
+ /* Configure the enable bitmap */
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
+
+ /* Configure the HWC set and buffer size */
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
+
+ kbdev->csf.hwcnt.enable_pending = true;
+
+ /* Unmask the interrupts */
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK,
+ GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK);
+
+ /* Enable the HWC */
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ,
+ (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT),
+ GLB_REQ_PRFCNT_ENABLE_MASK);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG);
+
+ kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+ struct kbase_device *kbdev;
+ struct kbase_csf_global_iface *global_iface;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ kbdev = fw_ctx->kbdev;
+ global_iface = &kbdev->csf.global_iface;
+
+ /* Disable the HWC */
+ kbdev->csf.hwcnt.enable_pending = true;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+
+ /* mask the interrupts */
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK);
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0,
+ GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK);
+
+ /* In case we have a previous request in flight when the disable
+ * happens.
+ */
+ kbdev->csf.hwcnt.request_pending = false;
+
+ kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx)
+{
+ u32 glb_req;
+ struct kbase_device *kbdev;
+ struct kbase_csf_global_iface *global_iface;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ kbdev = fw_ctx->kbdev;
+ global_iface = &kbdev->csf.global_iface;
+
+ /* Trigger dumping */
+ kbdev->csf.hwcnt.request_pending = true;
+ glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
+ glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req,
+ GLB_REQ_PRFCNT_SAMPLE_MASK);
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 *extract_index, u32 *insert_index)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ WARN_ON(!ctx);
+ WARN_ON(!extract_index);
+ WARN_ON(!insert_index);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface,
+ GLB_PRFCNT_EXTRACT);
+ *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface,
+ GLB_PRFCNT_INSERT);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u32 extract_idx)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+
+ WARN_ON(!ctx);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ /* Set the raw extract index to release the buffer back to the ring
+ * buffer.
+ */
+ kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT,
+ extract_idx);
+}
+
+static void
+kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ u64 *cycle_counts, u64 clk_enable_map)
+{
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx =
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
+ u8 clk;
+ u64 timestamp_ns = ktime_get_raw_ns();
+
+ WARN_ON(!ctx);
+ WARN_ON(!cycle_counts);
+ kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx);
+
+ for (clk = 0; clk < fw_ctx->clk_cnt; clk++) {
+ if (!(clk_enable_map & (1ull << clk)))
+ continue;
+
+ if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+ /* Read cycle count for top clock domain. */
+ kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk],
+ NULL, NULL);
+ } else {
+ /* Estimate cycle count for non-top clock domain. */
+ cycle_counts[clk] =
+ kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns);
+ }
+ }
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context.
+ *
+ * @fw_ctx: Pointer to context to destroy.
+ */
+static void
+kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx)
+{
+ if (!fw_ctx)
+ return;
+
+ kfree(fw_ctx);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context.
+ *
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_ctx: Non-NULL pointer to where info is stored on success.
+ * Return: 0 on success, else error code.
+ */
+static int
+kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx)
+{
+ u8 clk;
+ int errcode = -ENOMEM;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+ WARN_ON(!kbdev);
+ WARN_ON(!out_ctx);
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ goto error;
+
+ ctx->kbdev = kbdev;
+
+ /* Determine the number of available clock domains. */
+ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+ if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+ break;
+ }
+ ctx->clk_cnt = clk;
+
+ ctx->clk_enable_map = 0;
+ kbase_ccswe_init(&ctx->ccswe_shader_cores);
+ ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change;
+
+ *out_ctx = ctx;
+
+ return 0;
+error:
+ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx);
+ return errcode;
+}
+
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+ if (!if_fw)
+ return;
+
+ kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(
+ (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx);
+ memset(if_fw, 0, sizeof(*if_fw));
+}
+
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if *if_fw)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL;
+
+ if (!kbdev || !if_fw)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx);
+ if (errcode)
+ return errcode;
+
+ if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx;
+ if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held;
+ if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock;
+ if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock;
+ if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info;
+ if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc;
+ if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync;
+ if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free;
+ if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns;
+ if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable;
+ if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable;
+ if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request;
+ if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count;
+ if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes;
+ if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index;
+
+ return 0;
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
new file mode 100644
index 0000000..71d1506
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface
+ * of hardware counter backend.
+ * @kbdev: Non-NULL pointer to Kbase device.
+ * @if_fw: Non-NULL pointer to backend interface structure that is filled in on
+ * creation success.
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_csf_if *if_fw);
+
+/**
+ * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of
+ * hardware counter backend.
+ * @if_fw: Pointer to a CSF interface to destroy.
+ */
+void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw);
+
+#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
new file mode 100644
index 0000000..6ddd7ba
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -0,0 +1,863 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include "mali_kbase.h"
+#include "backend/gpu/mali_kbase_pm_ca.h"
+#include "mali_kbase_hwaccess_instr.h"
+#include "mali_kbase_hwaccess_time.h"
+#include "mali_kbase_ccswe.h"
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+#include "backend/gpu/mali_kbase_model_dummy.h"
+#endif /* CONFIG_MALI_NO_MALI */
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+
+#include "backend/gpu/mali_kbase_pm_internal.h"
+
+/**
+ * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
+ * of a JM hardware counter backend.
+ * @kbdev: KBase device.
+ * @counter_set: The performance counter set to use.
+ * @metadata: Hardware counter metadata.
+ * @dump_bytes: Bytes of GPU memory required to perform a
+ * hardware counter dump.
+ * @hwcnt_gpu_info: Hardware counter block information.
+ */
+struct kbase_hwcnt_backend_jm_info {
+ struct kbase_device *kbdev;
+ enum kbase_hwcnt_set counter_set;
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_bytes;
+ struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
+};
+
+/**
+ * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout
+ * information.
+ * @fe_cnt: Front end block count.
+ * @tiler_cnt: Tiler block count.
+ * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count.
+ * @shader_cnt: Shader Core block count.
+ * @block_cnt: Total block count (sum of all other block counts).
+ * @shader_avail_mask: Bitmap of all shader cores in the system.
+ * @enable_mask_offset: Offset in array elements of enable mask in each block
+ * starting from the beginning of block.
+ * @headers_per_block: Header size per block.
+ * @counters_per_block: Counters size per block.
+ * @values_per_block: Total size per block.
+ */
+struct kbase_hwcnt_jm_physical_layout {
+ u8 fe_cnt;
+ u8 tiler_cnt;
+ u8 mmu_l2_cnt;
+ u8 shader_cnt;
+ u8 block_cnt;
+ u64 shader_avail_mask;
+ size_t enable_mask_offset;
+ size_t headers_per_block;
+ size_t counters_per_block;
+ size_t values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
+ * @info: Info used to create the backend.
+ * @kctx: KBase context used for GPU memory allocation and
+ * counter dumping.
+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va: CPU mapping of gpu_dump_va.
+ * @vmap: Dump buffer vmap.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
+ * @enabled: True if dumping has been enabled, else false.
+ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
+ * dumping.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
+ * @clk_enable_map: The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed:
+ * Cycle count elapsed for a given sample period.
+ * The top clock cycle, index 0, is read directly from
+ * hardware, but the other clock domains need to be
+ * calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ * sample period.
+ * @rate_listener: Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ * @phys_layout: Physical memory layout information of HWC sample buffer.
+ */
+struct kbase_hwcnt_backend_jm {
+ const struct kbase_hwcnt_backend_jm_info *info;
+ struct kbase_context *kctx;
+ u64 gpu_dump_va;
+ void *cpu_dump_va;
+ struct kbase_vmap_struct *vmap;
+ u64 *to_user_buf;
+ bool enabled;
+ u64 pm_core_mask;
+ struct kbase_hwcnt_curr_config curr_config;
+ u64 clk_enable_map;
+ u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
+ u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
+ struct kbase_clk_rate_listener rate_listener;
+ struct kbase_ccswe ccswe_shader_cores;
+ struct kbase_hwcnt_jm_physical_layout phys_layout;
+};
+
+/**
+ * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used
+ * to create the hwcnt metadata.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @info: Non-NULL pointer to data structure to be filled in.
+ *
+ * The initialised info struct will only be valid for use while kbdev is valid.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
+ struct kbase_hwcnt_gpu_info *info)
+{
+ size_t clk;
+
+ if (!kbdev || !info)
+ return -EINVAL;
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+#else /* CONFIG_MALI_NO_MALI */
+ {
+ const struct base_gpu_props *props = &kbdev->gpu_props.props;
+ const size_t l2_count = props->l2_props.num_l2_slices;
+ const size_t core_mask = props->coherency_info.group[0].core_mask;
+
+ info->l2_count = l2_count;
+ info->core_mask = core_mask;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
+ }
+#endif /* CONFIG_MALI_NO_MALI */
+
+ /* Determine the number of available clock domains. */
+ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
+ if (kbdev->pm.clk_rtm.clks[clk] == NULL)
+ break;
+ }
+ info->clk_cnt = clk;
+
+ return 0;
+}
+
+static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
+ struct kbase_hwcnt_jm_physical_layout *phys_layout)
+{
+ u8 shader_core_cnt;
+
+ WARN_ON(!gpu_info);
+ WARN_ON(!phys_layout);
+
+ shader_core_cnt = fls64(gpu_info->core_mask);
+
+ *phys_layout = (struct kbase_hwcnt_jm_physical_layout){
+ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
+ .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
+ .mmu_l2_cnt = gpu_info->l2_count,
+ .shader_cnt = shader_core_cnt,
+ .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ gpu_info->l2_count + shader_core_cnt,
+ .shader_avail_mask = gpu_info->core_mask,
+ .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .values_per_block = gpu_info->prfcnt_values_per_block,
+ .counters_per_block =
+ gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
+ };
+}
+
+static void
+kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
+{
+ size_t block_idx;
+ const u32 *new_sample_buf = backend_jm->cpu_dump_va;
+ const u32 *new_block = new_sample_buf;
+ u64 *dst_buf = backend_jm->to_user_buf;
+ u64 *dst_block = dst_buf;
+ const size_t values_per_block = backend_jm->phys_layout.values_per_block;
+ const size_t dump_bytes = backend_jm->info->dump_bytes;
+
+ for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
+ size_t ctr_idx;
+
+ for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
+ dst_block[ctr_idx] = new_block[ctr_idx];
+
+ new_block += values_per_block;
+ dst_block += values_per_block;
+ }
+
+ WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+ WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
+ *
+ * @rate_listener: Callback state
+ * @clk_index: Clock index
+ * @clk_rate_hz: Clock frequency(hz)
+ */
+static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
+ u32 clk_index, u32 clk_rate_hz)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm =
+ container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
+ u64 timestamp_ns;
+
+ if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
+ return;
+
+ timestamp_ns = ktime_get_raw_ns();
+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
+ *
+ * @backend_jm: Non-NULL pointer to backend.
+ * @enable_map: Non-NULL pointer to enable map specifying enabled counters.
+ * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
+ */
+static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 timestamp_ns)
+{
+ struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+ u64 clk_enable_map = enable_map->clk_enable_map;
+ u64 cycle_count;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ /* turn on the cycle counter */
+ kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
+ /* Read cycle count for top clock domain. */
+ kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);
+
+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
+ }
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ /* software estimation for non-top clock domains */
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
+ u32 cur_freq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtm->lock, flags);
+
+ cur_freq = (u32)clk_data->clock_val;
+ kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
+ kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);
+
+ kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);
+
+ spin_unlock_irqrestore(&rtm->lock, flags);
+
+ /* ccswe was reset. The estimated cycle is zero. */
+ backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
+ }
+
+ /* Keep clk_enable_map for dump_request. */
+ backend_jm->clk_enable_map = clk_enable_map;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
+ *
+ * @backend_jm: Non-NULL pointer to backend.
+ */
+static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
+{
+ struct kbase_device *kbdev = backend_jm->kctx->kbdev;
+ struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
+ u64 clk_enable_map = backend_jm->clk_enable_map;
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
+ /* turn off the cycle counter */
+ kbase_pm_release_gpu_cycle_counter(kbdev);
+ }
+
+ if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
+ kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
+ }
+}
+
+/**
+ * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
+ * current config information.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @curr_config: Non-NULL pointer to return the current configuration of
+ * hardware allocated to the GPU.
+ *
+ * The current configuration information is used for architectures where the
+ * max_config interface is available from the Arbiter. In this case the current
+ * allocated hardware is not always the same, so the current config information
+ * is used to correctly map the current allocated resources to the memory layout
+ * that is copied to the user space.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
+ struct kbase_hwcnt_curr_config *curr_config)
+{
+ if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
+ return -EINVAL;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
+ curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
+ return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
+static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+ (void)backend;
+ return ktime_get_raw_ns();
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
+static int
+kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_context *kctx;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_physical_enable_map phys_enable_map;
+ enum kbase_hwcnt_physical_set phys_counter_set;
+ struct kbase_instr_hwcnt_enable enable;
+ u64 timestamp_ns;
+
+ if (!backend_jm || !enable_map || backend_jm->enabled ||
+ (enable_map->metadata != backend_jm->info->metadata))
+ return -EINVAL;
+
+ kctx = backend_jm->kctx;
+ kbdev = backend_jm->kctx->kbdev;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);
+
+ kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
+
+ enable.fe_bm = phys_enable_map.fe_bm;
+ enable.shader_bm = phys_enable_map.shader_bm;
+ enable.tiler_bm = phys_enable_map.tiler_bm;
+ enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+ enable.counter_set = phys_counter_set;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ /* The dummy model needs the CPU mapping. */
+ enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va;
+#else
+ enable.dump_buffer = backend_jm->gpu_dump_va;
+#endif /* CONFIG_MALI_NO_MALI */
+ enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+
+ timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+
+ /* Update the current configuration information. */
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
+ if (errcode)
+ goto error;
+
+ backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
+
+ backend_jm->enabled = true;
+
+ kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
+
+ return 0;
+error:
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
+static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ unsigned long flags;
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_device *kbdev;
+
+ if (!backend_jm)
+ return -EINVAL;
+
+ kbdev = backend_jm->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+ return;
+
+ kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
+
+ errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
+ WARN_ON(errcode);
+
+ backend_jm->enabled = false;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
+static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (!backend_jm || !backend_jm->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_clear(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
+static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+ struct kbase_device *kbdev;
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 current_cycle_count;
+ size_t clk;
+ int ret;
+
+ if (!backend_jm || !backend_jm->enabled || !dump_time_ns)
+ return -EINVAL;
+
+ kbdev = backend_jm->kctx->kbdev;
+ metadata = backend_jm->info->metadata;
+
+ /* Disable pre-emption, to make the timestamp as accurate as possible */
+ preempt_disable();
+ {
+ *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
+ ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
+ continue;
+
+ if (clk == KBASE_CLOCK_DOMAIN_TOP) {
+ /* Read cycle count for top clock domain. */
+ kbase_backend_get_gpu_time_norequest(kbdev, &current_cycle_count,
+ NULL, NULL);
+ } else {
+ /*
+ * Estimate cycle count for non-top clock
+ * domain.
+ */
+ current_cycle_count = kbase_ccswe_cycle_at(
+ &backend_jm->ccswe_shader_cores, *dump_time_ns);
+ }
+ backend_jm->cycle_count_elapsed[clk] =
+ current_cycle_count - backend_jm->prev_cycle_count[clk];
+
+ /*
+ * Keep the current cycle count for later calculation.
+ */
+ backend_jm->prev_cycle_count[clk] = current_cycle_count;
+ }
+ }
+ preempt_enable();
+
+ return ret;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
+static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+
+ if (!backend_jm || !backend_jm->enabled)
+ return -EINVAL;
+
+ return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
+static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ bool accumulate)
+{
+ struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
+ size_t clk;
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ struct kbase_device *kbdev;
+ unsigned long flags;
+ int errcode;
+#endif /* CONFIG_MALI_NO_MALI */
+
+ if (!backend_jm || !dst || !dst_enable_map ||
+ (backend_jm->info->metadata != dst->metadata) ||
+ (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ /* Invalidate the kernel buffer before reading from it. */
+ kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);
+
+ /* Dump sample to the internal 64-bit user buffer. */
+ kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
+
+ /* Extract elapsed cycle count for each clock domain if enabled. */
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
+ {
+ if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+ continue;
+
+ /* Reset the counter to zero if accumulation is off. */
+ if (!accumulate)
+ dst->clk_cnt_buf[clk] = 0;
+ dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk];
+ }
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ kbdev = backend_jm->kctx->kbdev;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ /* Update the current configuration information. */
+ errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
+
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (errcode)
+ return errcode;
+#endif /* CONFIG_MALI_NO_MALI */
+ return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
+ backend_jm->pm_core_mask, &backend_jm->curr_config,
+ accumulate);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer.
+ * @info: Non-NULL pointer to JM backend info.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
+ * is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_context *kctx, u64 *gpu_dump_va)
+{
+ struct kbase_va_region *reg;
+ u64 flags;
+ u64 nr_pages;
+
+ /* Calls to this function are inherently asynchronous, with respect to
+ * MMU operations.
+ */
+ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+
+ WARN_ON(!info);
+ WARN_ON(!kctx);
+ WARN_ON(!gpu_dump_va);
+
+ flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
+ BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;
+
+ nr_pages = PFN_UP(info->dump_bytes);
+
+ reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);
+
+ if (!reg)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer.
+ * @kctx: Non-NULL pointer to kbase context.
+ * @gpu_dump_va: GPU dump buffer virtual address.
+ */
+static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
+{
+ WARN_ON(!kctx);
+ if (gpu_dump_va)
+ kbase_mem_free(kctx, gpu_dump_va);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend.
+ * @backend: Pointer to JM backend to destroy.
+ *
+ * Can be safely called on a backend in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
+{
+ if (!backend)
+ return;
+
+ if (backend->kctx) {
+ struct kbase_context *kctx = backend->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+ if (backend->cpu_dump_va)
+ kbase_phy_alloc_mapping_put(kctx, backend->vmap);
+
+ if (backend->gpu_dump_va)
+ kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);
+
+ kbasep_js_release_privileged_ctx(kbdev, kctx);
+ kbase_destroy_context(kctx);
+ }
+
+ kfree(backend->to_user_buf);
+
+ kfree(backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_create() - Create a JM backend.
+ * @info: Non-NULL pointer to backend info.
+ * @out_backend: Non-NULL pointer to where backend is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
+ struct kbase_hwcnt_backend_jm **out_backend)
+{
+ int errcode;
+ struct kbase_device *kbdev;
+ struct kbase_hwcnt_backend_jm *backend = NULL;
+
+ WARN_ON(!info);
+ WARN_ON(!out_backend);
+
+ kbdev = info->kbdev;
+
+ backend = kzalloc(sizeof(*backend), GFP_KERNEL);
+ if (!backend)
+ goto alloc_error;
+
+ backend->info = info;
+ kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);
+
+ backend->kctx = kbase_create_context(kbdev, true,
+ BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
+ if (!backend->kctx)
+ goto alloc_error;
+
+ kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);
+
+ errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
+ if (errcode)
+ goto error;
+
+ backend->cpu_dump_va =
+ kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
+ if (!backend->cpu_dump_va || !backend->vmap)
+ goto alloc_error;
+
+ backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
+ if (!backend->to_user_buf)
+ goto alloc_error;
+
+ kbase_ccswe_init(&backend->ccswe_shader_cores);
+ backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+
+ *out_backend = backend;
+ return 0;
+
+alloc_error:
+ errcode = -ENOMEM;
+error:
+ kbasep_hwcnt_backend_jm_destroy(backend);
+ return errcode;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+ if (!info)
+ return NULL;
+
+ return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
+{
+ int errcode;
+ struct kbase_hwcnt_backend_jm *backend = NULL;
+
+ if (!info || !out_backend)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
+ &backend);
+ if (errcode)
+ return errcode;
+
+ *out_backend = (struct kbase_hwcnt_backend *)backend;
+
+ return 0;
+}
+
+/* JM backend implementation of kbase_hwcnt_backend_term_fn */
+static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
+{
+ if (!backend)
+ return;
+
+ kbasep_hwcnt_backend_jm_dump_disable(backend);
+ kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
+ * @info: Pointer to info to destroy.
+ *
+ * Can be safely called on a backend info in any state of partial construction.
+ */
+static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
+{
+ if (!info)
+ return;
+
+ kbase_hwcnt_jm_metadata_destroy(info->metadata);
+ kfree(info);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info.
+ * @kbdev: Non_NULL pointer to kbase device.
+ * @out_info: Non-NULL pointer to where info is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
+ const struct kbase_hwcnt_backend_jm_info **out_info)
+{
+ int errcode = -ENOMEM;
+ struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+ WARN_ON(!kbdev);
+ WARN_ON(!out_info);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return errcode;
+
+ info->kbdev = kbdev;
+
+#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
+ info->counter_set = KBASE_HWCNT_SET_SECONDARY;
+#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
+ info->counter_set = KBASE_HWCNT_SET_TERTIARY;
+#else
+ /* Default to primary */
+ info->counter_set = KBASE_HWCNT_SET_PRIMARY;
+#endif
+
+ errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
+ &info->metadata, &info->dump_bytes);
+ if (errcode)
+ goto error;
+
+ *out_info = info;
+
+ return 0;
+error:
+ kbasep_hwcnt_backend_jm_info_destroy(info);
+ return errcode;
+}
+
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface)
+{
+ int errcode;
+ const struct kbase_hwcnt_backend_jm_info *info = NULL;
+
+ if (!kbdev || !iface)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info);
+
+ if (errcode)
+ return errcode;
+
+ iface->info = (struct kbase_hwcnt_backend_info *)info;
+ iface->metadata = kbasep_hwcnt_backend_jm_metadata;
+ iface->init = kbasep_hwcnt_backend_jm_init;
+ iface->term = kbasep_hwcnt_backend_jm_term;
+ iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
+ iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
+ iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
+ iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable;
+ iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear;
+ iface->dump_request = kbasep_hwcnt_backend_jm_dump_request;
+ iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait;
+ iface->dump_get = kbasep_hwcnt_backend_jm_dump_get;
+
+ return 0;
+}
+
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+ if (!iface)
+ return;
+
+ kbasep_hwcnt_backend_jm_info_destroy(
+ (const struct kbase_hwcnt_backend_jm_info *)iface->info);
+ memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
new file mode 100644
index 0000000..4a6293c
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for JM
+ * backend.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_JM_H_
+#define _KBASE_HWCNT_BACKEND_JM_H_
+
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+
+struct kbase_device;
+
+/**
+ * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend
+ * interface.
+ * @kbdev: Non-NULL pointer to kbase device.
+ * @iface: Non-NULL pointer to backend interface structure that is filled in
+ * on creation success.
+ *
+ * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
+ struct kbase_hwcnt_backend_interface *iface);
+
+/**
+ * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend
+ * interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
new file mode 100644
index 0000000..a8654ea
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+
+#include <hwcnt/mali_kbase_hwcnt_gpu.h>
+#include <hwcnt/mali_kbase_hwcnt_types.h>
+
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
+
+#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/* Backend watch dog timer interval in milliseconds: 18 seconds. */
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000;
+#else
+/* Backend watch dog timer interval in milliseconds: 1 second. */
+static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000;
+#endif /* IS_FPGA && !NO_MALI */
+
+/*
+ * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request.
+ * IDLE_BUFFER_EMPTY -> TIMER_DUMPING after
+ * hwcnt_backend_watchdog_timer_interval_ms
+ * milliseconds, if no dump_request has been
+ * called in the meantime.
+ * IDLE_BUFFER_FULL -> USER_DUMPING_BUFFER_FULL on dump_request.
+ * IDLE_BUFFER_FULL -> TIMER_DUMPING after
+ * hwcnt_backend_watchdog_timer_interval_ms
+ * milliseconds, if no dump_request has been
+ * called in the meantime.
+ * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_disable, upon discarding undumped
+ * counter values since the last dump_get.
+ * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager
+ * backend dump_clear.
+ * IDLE_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager
+ * backend dump_clear.
+ * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager
+ * backend dump_clear.
+ * USER_DUMPING_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager
+ * backend dump_clear.
+ * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY on dump_clear, upon job manager backend
+ * dump_clear completion.
+ * TIMER_DUMPING -> IDLE_BUFFER_FULL on timer's callback completion.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR on dump_clear, notifies the callback thread
+ * that there is no need for dumping the buffer
+ * anymore, and that the client will proceed
+ * clearing the buffer.
+ * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY on timer's callback completion, when a user
+ * requested a dump_clear.
+ * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED on dump_request, when a client performs a
+ * dump request while the timer is dumping (the
+ * timer will perform the dump and (once
+ * completed) the client will retrieve the value
+ * from the buffer).
+ * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY on dump_get, when a timer completed and the
+ * user reads the periodic dump buffer.
+ * Any -> ERROR if the job manager backend returns an error
+ * (of any kind).
+ * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY on dump_get (performs get, ignores the
+ * periodic dump buffer and returns).
+ * USER_DUMPING_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_get (performs get, accumulates with
+ * periodic dump buffer and returns).
+ */
+
+/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread.
+ * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls.
+ * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is
+ * empty.
+ * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full.
+ * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback
+ * thread should just ignore and reschedule another callback in
+ * hwcnt_backend_watchdog_timer_interval_ms milliseconds.
+ * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user
+ * requested a dump.
+ * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a
+ * dump_clear. The timer has to complete the periodic dump
+ * and clear buffer (internal and job manager backend).
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic
+ * dump buffer is empty.
+ * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump
+ * buffer is full.
+ *
+ * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is
+ * allowed to call the job manager backend layer.
+ */
+enum backend_watchdog_state {
+ HWCNT_JM_WD_ERROR,
+ HWCNT_JM_WD_IDLE_BUFFER_EMPTY,
+ HWCNT_JM_WD_IDLE_BUFFER_FULL,
+ HWCNT_JM_WD_BUFFER_CLEARING,
+ HWCNT_JM_WD_TIMER_DUMPING,
+ HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+ HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR,
+ HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY,
+ HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL,
+};
+
+/** enum wd_init_state - State machine for initialization / termination of the backend resources
+ */
+enum wd_init_state {
+ HWCNT_JM_WD_INIT_START,
+ HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START,
+ HWCNT_JM_WD_INIT_ENABLE_MAP,
+ HWCNT_JM_WD_INIT_DUMP_BUFFER,
+ HWCNT_JM_WD_INIT_END
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an
+ * instance of the job manager watchdog backend.
+ * @jm_backend_iface: Hardware counter backend interface. This module extends
+ * this interface with a watchdog that performs regular
+ * dumps. The new interface this module provides complies
+ * with the old backend interface.
+ * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the
+ * hardware counter in case no reads are requested within
+ * a certain time, used to avoid hardware counter's buffer
+ * saturation.
+ */
+struct kbase_hwcnt_backend_jm_watchdog_info {
+ struct kbase_hwcnt_backend_interface *jm_backend_iface;
+ struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface;
+};
+
+/**
+ * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend.
+ * @info: Immutable information used to create the job manager watchdog backend.
+ * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls.
+ * @timeout_ms: Time period in milliseconds for hardware counters dumping.
+ * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are
+ * valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED,
+ * %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL.
+ * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying
+ * the enable_map passed as argument.
+ * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled
+ * during watchdog timer dumps.
+ * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls.
+ * @locked: Members protected from concurrent access by different threads.
+ * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual
+ * exclusion).
+ * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the
+ * watchdog timer are both enabled. If false then both are disabled (or soon
+ * will be). Races between enable and disable have undefined behavior.
+ * @locked.state: State used to synchronize timer callbacks with the main thread.
+ */
+struct kbase_hwcnt_backend_jm_watchdog {
+ const struct kbase_hwcnt_backend_jm_watchdog_info *info;
+ struct kbase_hwcnt_backend *jm_backend;
+ u32 timeout_ms;
+ struct kbase_hwcnt_dump_buffer wd_dump_buffer;
+ struct kbase_hwcnt_enable_map wd_enable_map;
+ u64 wd_dump_timestamp;
+ struct completion watchdog_complete;
+ struct {
+ spinlock_t watchdog_lock;
+ bool is_enabled;
+ enum backend_watchdog_state state;
+ } locked;
+};
+
+/* timer's callback function */
+static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend;
+ unsigned long flags;
+ bool wd_accumulate;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return;
+ }
+
+ if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY ||
+ wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) {
+ /*resetting the timer. Calling modify on a disabled timer enables it.*/
+ wd_backend->info->dump_watchdog_iface->modify(
+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return;
+ }
+ /*start performing the dump*/
+
+ /* if there has been a previous timeout use accumulating dump_get()
+ * otherwise use non-accumulating to overwrite buffer
+ */
+ wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL);
+
+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING;
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+ &wd_backend->wd_dump_timestamp) ||
+ wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) ||
+ wd_backend->info->jm_backend_iface->dump_get(
+ wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map,
+ wd_accumulate)) {
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+ wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ /* Unblock user if it's waiting. */
+ complete_all(&wd_backend->watchdog_complete);
+ return;
+ }
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING &&
+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+ if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) {
+ /* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL
+ * to indicate timer dump is done and the buffer is full. If state changed to
+ * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or
+ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state
+ * machine to next state.
+ */
+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL;
+ }
+ if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) {
+ /* reset the timer to schedule another callback. Calling modify on a
+ * disabled timer enables it.
+ */
+ /*The spin lock needs to be held in case the client calls dump_enable*/
+ wd_backend->info->dump_watchdog_iface->modify(
+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms);
+ }
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ /* Unblock user if it's waiting. */
+ complete_all(&wd_backend->watchdog_complete);
+}
+
+/* helper methods, info structure creation and destruction*/
+
+static struct kbase_hwcnt_backend_jm_watchdog_info *
+kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface,
+ struct kbase_hwcnt_watchdog_interface *watchdog_iface)
+{
+ struct kbase_hwcnt_backend_jm_watchdog_info *const info =
+ kmalloc(sizeof(*info), GFP_KERNEL);
+
+ if (!info)
+ return NULL;
+
+ *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface,
+ .dump_watchdog_iface =
+ watchdog_iface };
+
+ return info;
+}
+
+/****** kbase_hwcnt_backend_interface implementation *******/
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */
+static const struct kbase_hwcnt_metadata *
+kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info)
+{
+ const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info;
+
+ if (WARN_ON(!info))
+ return NULL;
+
+ return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+}
+
+static void
+kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+ enum wd_init_state state)
+{
+ if (!wd_backend)
+ return;
+
+ WARN_ON(state > HWCNT_JM_WD_INIT_END);
+
+ while (state-- > HWCNT_JM_WD_INIT_START) {
+ switch (state) {
+ case HWCNT_JM_WD_INIT_BACKEND:
+ wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend);
+ break;
+ case HWCNT_JM_WD_INIT_ENABLE_MAP:
+ kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map);
+ break;
+ case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+ kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer);
+ break;
+ case HWCNT_JM_WD_INIT_END:
+ break;
+ }
+ }
+
+ kfree(wd_backend);
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn
+ * Calling term does *not* destroy the interface
+ */
+static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend =
+ (struct kbase_hwcnt_backend_jm_watchdog *)backend;
+
+ if (!backend)
+ return;
+
+ /* disable timer thread to avoid concurrent access to shared resources */
+ wd_backend->info->dump_watchdog_iface->disable(
+ wd_backend->info->dump_watchdog_iface->timer);
+
+ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END);
+}
+
+/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */
+static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info,
+ struct kbase_hwcnt_backend **out_backend)
+{
+ int errcode = 0;
+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL;
+ struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info;
+ const struct kbase_hwcnt_backend_info *jm_info;
+ const struct kbase_hwcnt_metadata *metadata;
+ enum wd_init_state state = HWCNT_JM_WD_INIT_START;
+
+ if (WARN_ON(!info) || WARN_ON(!out_backend))
+ return -EINVAL;
+
+ jm_info = wd_info->jm_backend_iface->info;
+ metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info);
+
+ wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL);
+ if (!wd_backend) {
+ *out_backend = NULL;
+ return -ENOMEM;
+ }
+
+ *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){
+ .info = wd_info,
+ .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms,
+ .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false }
+ };
+
+ while (state < HWCNT_JM_WD_INIT_END && !errcode) {
+ switch (state) {
+ case HWCNT_JM_WD_INIT_BACKEND:
+ errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend);
+ break;
+ case HWCNT_JM_WD_INIT_ENABLE_MAP:
+ errcode =
+ kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map);
+ break;
+ case HWCNT_JM_WD_INIT_DUMP_BUFFER:
+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata,
+ &wd_backend->wd_dump_buffer);
+ break;
+ case HWCNT_JM_WD_INIT_END:
+ break;
+ }
+ if (!errcode)
+ state++;
+ }
+
+ if (errcode) {
+ kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state);
+ *out_backend = NULL;
+ return errcode;
+ }
+
+ WARN_ON(state != HWCNT_JM_WD_INIT_END);
+
+ spin_lock_init(&wd_backend->locked.watchdog_lock);
+ init_completion(&wd_backend->watchdog_complete);
+
+ *out_backend = (struct kbase_hwcnt_backend *)wd_backend;
+ return 0;
+}
+
+/* Job manager watchdog backend, implementation of timestamp_ns */
+static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+ return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend);
+}
+
+static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+ struct kbase_hwcnt_backend_jm_watchdog *wd_backend,
+ const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler)
+{
+ int errcode = -EPERM;
+ unsigned long flags;
+
+ if (WARN_ON(!wd_backend) || WARN_ON(!enable_map))
+ return -EINVAL;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ /* If the backend is already enabled return an error */
+ if (wd_backend->locked.is_enabled) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return -EPERM;
+ }
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ /*We copy the enable map into our watchdog backend copy, for future usage*/
+ kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map);
+
+ errcode = enabler(wd_backend->jm_backend, enable_map);
+ if (!errcode) {
+ /*Enable dump watchdog*/
+ errcode = wd_backend->info->dump_watchdog_iface->enable(
+ wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms,
+ kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend);
+ if (!errcode) {
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ WARN_ON(wd_backend->locked.is_enabled);
+ wd_backend->locked.is_enabled = true;
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ } else
+ /*Reverting the job manager backend back to disabled*/
+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+ }
+
+ return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_enable */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable);
+}
+
+/* Job manager watchdog backend, implementation of dump_enable_nolock */
+static int
+kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+ return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
+ wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock);
+}
+
+/* Job manager watchdog backend, implementation of dump_disable */
+static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend)
+{
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+ unsigned long flags;
+
+ if (WARN_ON(!backend))
+ return;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ if (!wd_backend->locked.is_enabled) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return;
+ }
+
+ wd_backend->locked.is_enabled = false;
+
+ /* Discard undumped counter values since the last dump_get. */
+ if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)
+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ wd_backend->info->dump_watchdog_iface->disable(
+ wd_backend->info->dump_watchdog_iface->timer);
+
+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+}
+
+/* Job manager watchdog backend, implementation of dump_clear */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend)
+{
+ int errcode = -EPERM;
+ bool clear_wd_wait_completion = false;
+ unsigned long flags;
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+ if (WARN_ON(!backend))
+ return -EINVAL;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ if (!wd_backend->locked.is_enabled) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return -EPERM;
+ }
+
+ switch (wd_backend->locked.state) {
+ case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+ wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING;
+ errcode = 0;
+ break;
+ case HWCNT_JM_WD_TIMER_DUMPING:
+ /* The timer asked for a dump request, when complete, the job manager backend
+ * buffer will be zero
+ */
+ clear_wd_wait_completion = true;
+ /* This thread will have to wait for the callback to terminate and then call a
+ * dump_clear on the job manager backend. We change the state to
+ * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is
+ * no more need to dump the buffer (since we will clear it right after anyway).
+ * We set up a wait queue to synchronize with the callback.
+ */
+ reinit_completion(&wd_backend->watchdog_complete);
+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR;
+ errcode = 0;
+ break;
+ default:
+ errcode = -EPERM;
+ break;
+ }
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ if (!errcode) {
+ if (clear_wd_wait_completion) {
+ /* Waiting for the callback to finish */
+ wait_for_completion(&wd_backend->watchdog_complete);
+ }
+
+ /* Clearing job manager backend buffer */
+ errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend);
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR &&
+ wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING &&
+ wd_backend->locked.state != HWCNT_JM_WD_ERROR);
+
+ WARN_ON(!wd_backend->locked.is_enabled);
+
+ if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) {
+ /* Setting the internal buffer state to EMPTY */
+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+ /* Resetting the timer. Calling modify on a disabled timer
+ * enables it.
+ */
+ wd_backend->info->dump_watchdog_iface->modify(
+ wd_backend->info->dump_watchdog_iface->timer,
+ wd_backend->timeout_ms);
+ } else {
+ wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+ errcode = -EPERM;
+ }
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ }
+
+ return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_request */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend,
+ u64 *dump_time_ns)
+{
+ bool call_dump_request = false;
+ int errcode = 0;
+ unsigned long flags;
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+
+ if (WARN_ON(!backend) || WARN_ON(!dump_time_ns))
+ return -EINVAL;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ if (!wd_backend->locked.is_enabled) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return -EPERM;
+ }
+
+ switch (wd_backend->locked.state) {
+ case HWCNT_JM_WD_IDLE_BUFFER_EMPTY:
+ /* progressing the state to avoid callbacks running while calling the job manager
+ * backend
+ */
+ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY;
+ call_dump_request = true;
+ break;
+ case HWCNT_JM_WD_IDLE_BUFFER_FULL:
+ wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL;
+ call_dump_request = true;
+ break;
+ case HWCNT_JM_WD_TIMER_DUMPING:
+ /* Retrieve timing information from previous dump_request */
+ *dump_time_ns = wd_backend->wd_dump_timestamp;
+ /* On the next client call (dump_wait) the thread will have to wait for the
+ * callback to finish the dumping.
+ * We set up a wait queue to synchronize with the callback.
+ */
+ reinit_completion(&wd_backend->watchdog_complete);
+ wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED;
+ break;
+ default:
+ errcode = -EPERM;
+ break;
+ }
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ if (call_dump_request) {
+ errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend,
+ dump_time_ns);
+ if (!errcode) {
+ /*resetting the timer. Calling modify on a disabled timer enables it*/
+ wd_backend->info->dump_watchdog_iface->modify(
+ wd_backend->info->dump_watchdog_iface->timer,
+ wd_backend->timeout_ms);
+ } else {
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ WARN_ON(!wd_backend->locked.is_enabled);
+ wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ }
+ }
+
+ return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_wait */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend)
+{
+ int errcode = -EPERM;
+ bool wait_for_auto_dump = false, wait_for_user_dump = false;
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+ unsigned long flags;
+
+ if (WARN_ON(!backend))
+ return -EINVAL;
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ if (!wd_backend->locked.is_enabled) {
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ return -EPERM;
+ }
+
+ switch (wd_backend->locked.state) {
+ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+ wait_for_auto_dump = true;
+ errcode = 0;
+ break;
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+ wait_for_user_dump = true;
+ errcode = 0;
+ break;
+ default:
+ errcode = -EPERM;
+ break;
+ }
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ if (wait_for_auto_dump)
+ wait_for_completion(&wd_backend->watchdog_complete);
+ else if (wait_for_user_dump) {
+ errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend);
+ if (errcode) {
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+ WARN_ON(!wd_backend->locked.is_enabled);
+ wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ }
+ }
+
+ return errcode;
+}
+
+/* Job manager watchdog backend, implementation of dump_get */
+static int kbasep_hwcnt_backend_jm_watchdog_dump_get(
+ struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map, bool accumulate)
+{
+ bool call_dump_get = false;
+ struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
+ unsigned long flags;
+ int errcode = 0;
+
+ if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map))
+ return -EINVAL;
+
+ /* The resultant contents of the dump buffer are only well defined if a prior
+ * call to dump_wait returned successfully, and a new dump has not yet been
+ * requested by a call to dump_request.
+ */
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ switch (wd_backend->locked.state) {
+ case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED:
+ /*we assume dump_wait has been called and completed successfully*/
+ if (accumulate)
+ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+ enable_map);
+ else
+ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+ enable_map);
+
+ /*use state to indicate the the buffer is now empty*/
+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+ break;
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL:
+ /*accumulate or copy watchdog data to user buffer first so that dump_get can set
+ * the header correctly
+ */
+ if (accumulate)
+ kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer,
+ enable_map);
+ else
+ kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer,
+ enable_map);
+
+ /*accumulate backend data into user buffer on top of watchdog data*/
+ accumulate = true;
+ call_dump_get = true;
+ break;
+ case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY:
+ call_dump_get = true;
+ break;
+ default:
+ errcode = -EPERM;
+ break;
+ }
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+
+ if (call_dump_get && !errcode) {
+ /*we just dump the job manager backend into the user buffer, following
+ *accumulate flag
+ */
+ errcode = wd_backend->info->jm_backend_iface->dump_get(
+ wd_backend->jm_backend, dump_buffer, enable_map, accumulate);
+
+ spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags);
+
+ WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY &&
+ wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL &&
+ wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED);
+
+ if (!errcode)
+ wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY;
+ else
+ wd_backend->locked.state = HWCNT_JM_WD_ERROR;
+
+ spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
+ }
+
+ return errcode;
+}
+
+/* exposed methods */
+
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+ struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+ struct kbase_hwcnt_backend_interface *out_iface)
+{
+ struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL;
+
+ if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface))
+ return -EINVAL;
+
+ info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface);
+ if (!info)
+ return -ENOMEM;
+
+ /*linking the info table with the output iface, to allow the callbacks below to access the
+ *info object later on
+ */
+ *out_iface = (struct kbase_hwcnt_backend_interface){
+ .info = (void *)info,
+ .metadata = kbasep_hwcnt_backend_jm_watchdog_metadata,
+ .init = kbasep_hwcnt_backend_jm_watchdog_init,
+ .term = kbasep_hwcnt_backend_jm_watchdog_term,
+ .timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns,
+ .dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable,
+ .dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock,
+ .dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable,
+ .dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear,
+ .dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request,
+ .dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait,
+ .dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get
+ };
+
+ /*registering watchdog backend module methods on the output interface*/
+
+ return 0;
+}
+
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface)
+{
+ if (!iface || !iface->info)
+ return;
+
+ kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info);
+
+ /*blanking the watchdog backend interface*/
+ memset(iface, 0, sizeof(*iface));
+}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
new file mode 100644
index 0000000..02a7952
--- /dev/null
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager
+ * backend. This module functionally interleaves between the hardware counter
+ * (hwcnt_accumulator) module (the interface consumer) and the job manager
+ * backend module (hwcnt_backend_jm). This module provides buffering
+ * functionality for the dumping requests requested by the hwcnt_accumulator
+ * consumer. This module is NOT multi-thread safe. The programmer must
+ * ensure the exposed methods are called by at most one thread at any time.
+ */
+
+#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_
+
+#include <hwcnt/backend/mali_kbase_hwcnt_backend.h>
+#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog
+ * backend interface.
+ * @backend_iface: Non-NULL pointer to the backend interface structure that this module will
+ * extend.
+ * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface.
+ * @out_iface: Non-NULL pointer to backend interface structure that is filled in
+ * on creation success.
+ *
+ * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held.
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface,
+ struct kbase_hwcnt_watchdog_interface *watchdog_iface,
+ struct kbase_hwcnt_backend_interface *out_iface);
+
+/**
+ * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog
+ * backend interface.
+ * @iface: Pointer to interface to destroy.
+ *
+ * Can be safely called on an all-zeroed interface, or on an already destroyed
+ * interface.
+ */
+void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface);
+
+#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
new file mode 100644
index 0000000..e724572
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -0,0 +1,775 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Implementation of hardware counter context and accumulator APIs.
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+/**
+ * enum kbase_hwcnt_accum_state - Hardware counter accumulator states.
+ * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail.
+ * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled.
+ * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are
+ * any enabled counters.
+ */
+enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED };
+
+/**
+ * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure.
+ * @metadata: Pointer to immutable hwcnt metadata.
+ * @backend: Pointer to created counter backend.
+ * @state: The current state of the accumulator.
+ * - State transition from disabled->enabled or
+ * disabled->error requires state_lock.
+ * - State transition from enabled->disabled or
+ * enabled->error requires both accum_lock and
+ * state_lock.
+ * - Error state persists until next disable.
+ * @enable_map: The current set of enabled counters.
+ * - Must only be modified while holding both
+ * accum_lock and state_lock.
+ * - Can be read while holding either lock.
+ * - Must stay in sync with enable_map_any_enabled.
+ * @enable_map_any_enabled: True if any counters in the map are enabled, else
+ * false. If true, and state is ACCUM_STATE_ENABLED,
+ * then the counter backend will be enabled.
+ * - Must only be modified while holding both
+ * accum_lock and state_lock.
+ * - Can be read while holding either lock.
+ * - Must stay in sync with enable_map.
+ * @scratch_map: Scratch enable map, used as temporary enable map
+ * storage during dumps.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @accum_buf: Accumulation buffer, where dumps will be accumulated
+ * into on transition to a disable state.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @accumulated: True if the accumulation buffer has been accumulated
+ * into and not subsequently read from yet, else false.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent
+ * dump that was requested by the user.
+ * - Must only be read or modified while holding
+ * accum_lock.
+ */
+struct kbase_hwcnt_accumulator {
+ const struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_backend *backend;
+ enum kbase_hwcnt_accum_state state;
+ struct kbase_hwcnt_enable_map enable_map;
+ bool enable_map_any_enabled;
+ struct kbase_hwcnt_enable_map scratch_map;
+ struct kbase_hwcnt_dump_buffer accum_buf;
+ bool accumulated;
+ u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_context - Hardware counter context structure.
+ * @iface: Pointer to hardware counter backend interface.
+ * @state_lock: Spinlock protecting state.
+ * @disable_count: Disable count of the context. Initialised to 1.
+ * Decremented when the accumulator is acquired, and incremented
+ * on release. Incremented on calls to
+ * kbase_hwcnt_context_disable[_atomic], and decremented on
+ * calls to kbase_hwcnt_context_enable.
+ * - Must only be read or modified while holding state_lock.
+ * @accum_lock: Mutex protecting accumulator.
+ * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or
+ * termination. Set to true before accumulator initialisation,
+ * and false after accumulator termination.
+ * - Must only be modified while holding both accum_lock and
+ * state_lock.
+ * - Can be read while holding either lock.
+ * @accum: Hardware counter accumulator structure.
+ * @wq: Centralized workqueue for users of hardware counters to
+ * submit async hardware counter related work. Never directly
+ * called, but it's expected that a lot of the functions in this
+ * API will end up called from the enqueued async work.
+ */
+struct kbase_hwcnt_context {
+ const struct kbase_hwcnt_backend_interface *iface;
+ spinlock_t state_lock;
+ size_t disable_count;
+ struct mutex accum_lock;
+ bool accum_inited;
+ struct kbase_hwcnt_accumulator accum;
+ struct workqueue_struct *wq;
+};
+
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx)
+{
+ struct kbase_hwcnt_context *hctx = NULL;
+
+ if (!iface || !out_hctx)
+ return -EINVAL;
+
+ hctx = kzalloc(sizeof(*hctx), GFP_KERNEL);
+ if (!hctx)
+ goto err_alloc_hctx;
+
+ hctx->iface = iface;
+ spin_lock_init(&hctx->state_lock);
+ hctx->disable_count = 1;
+ mutex_init(&hctx->accum_lock);
+ hctx->accum_inited = false;
+
+ hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0);
+ if (!hctx->wq)
+ goto err_alloc_workqueue;
+
+ *out_hctx = hctx;
+
+ return 0;
+
+err_alloc_workqueue:
+ kfree(hctx);
+err_alloc_hctx:
+ return -ENOMEM;
+}
+
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx)
+{
+ if (!hctx)
+ return;
+
+ /* Make sure we didn't leak the accumulator */
+ WARN_ON(hctx->accum_inited);
+
+ /* We don't expect any work to be pending on this workqueue.
+ * Regardless, this will safely drain and complete the work.
+ */
+ destroy_workqueue(hctx->wq);
+ kfree(hctx);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx)
+{
+ WARN_ON(!hctx);
+ WARN_ON(!hctx->accum_inited);
+
+ kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map);
+ kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf);
+ kbase_hwcnt_enable_map_free(&hctx->accum.enable_map);
+ hctx->iface->term(hctx->accum.backend);
+ memset(&hctx->accum, 0, sizeof(hctx->accum));
+}
+
+/**
+ * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx)
+{
+ int errcode;
+
+ WARN_ON(!hctx);
+ WARN_ON(!hctx->accum_inited);
+
+ errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend);
+ if (errcode)
+ goto error;
+
+ hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info);
+ hctx->accum.state = ACCUM_STATE_ERROR;
+
+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map);
+ if (errcode)
+ goto error;
+
+ hctx->accum.enable_map_any_enabled = false;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map);
+ if (errcode)
+ goto error;
+
+ hctx->accum.accumulated = false;
+
+ hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend);
+
+ return 0;
+
+error:
+ kbasep_hwcnt_accumulator_term(hctx);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the
+ * disabled state, from the enabled or
+ * error states.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+ int errcode = 0;
+ bool backend_enabled = false;
+ struct kbase_hwcnt_accumulator *accum;
+ unsigned long flags;
+ u64 dump_time_ns;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->accum_lock);
+ WARN_ON(!hctx->accum_inited);
+
+ accum = &hctx->accum;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ WARN_ON(hctx->disable_count != 0);
+ WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED);
+
+ if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled))
+ backend_enabled = true;
+
+ if (!backend_enabled)
+ hctx->accum.state = ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ /* Early out if the backend is not already enabled */
+ if (!backend_enabled)
+ return;
+
+ if (!accumulate)
+ goto disable;
+
+ /* Try and accumulate before disabling */
+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
+ if (errcode)
+ goto disable;
+
+ errcode = hctx->iface->dump_wait(accum->backend);
+ if (errcode)
+ goto disable;
+
+ errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map,
+ accum->accumulated);
+ if (errcode)
+ goto disable;
+
+ accum->accumulated = true;
+
+disable:
+ hctx->iface->dump_disable(accum->backend);
+
+ /* Regardless of any errors during the accumulate, put the accumulator
+ * in the disabled state.
+ */
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum.state = ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+/**
+ * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the
+ * enabled state, from the disabled state.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ */
+static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx)
+{
+ int errcode = 0;
+ struct kbase_hwcnt_accumulator *accum;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->state_lock);
+ WARN_ON(!hctx->accum_inited);
+ WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED);
+
+ accum = &hctx->accum;
+
+ /* The backend only needs enabling if any counters are enabled */
+ if (accum->enable_map_any_enabled)
+ errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map);
+
+ if (!errcode)
+ accum->state = ACCUM_STATE_ENABLED;
+ else
+ accum->state = ACCUM_STATE_ERROR;
+}
+
+/**
+ * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date
+ * values of enabled counters possible, and
+ * optionally update the set of enabled
+ * counters.
+ * @hctx: Non-NULL pointer to the hardware counter context
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ * @new_map: Pointer to the new counter enable map. If non-NULL, must have
+ * the same metadata as the accumulator. If NULL, the set of
+ * enabled counters will be unchanged.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf,
+ const struct kbase_hwcnt_enable_map *new_map)
+{
+ int errcode = 0;
+ unsigned long flags;
+ enum kbase_hwcnt_accum_state state;
+ bool dump_requested = false;
+ bool dump_written = false;
+ bool cur_map_any_enabled;
+ struct kbase_hwcnt_enable_map *cur_map;
+ bool new_map_any_enabled = false;
+ u64 dump_time_ns;
+ struct kbase_hwcnt_accumulator *accum;
+
+ WARN_ON(!hctx);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata));
+ WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata));
+ WARN_ON(!hctx->accum_inited);
+ lockdep_assert_held(&hctx->accum_lock);
+
+ accum = &hctx->accum;
+ cur_map = &accum->scratch_map;
+
+ /* Save out info about the current enable map */
+ cur_map_any_enabled = accum->enable_map_any_enabled;
+ kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map);
+
+ if (new_map)
+ new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map);
+
+ /*
+ * We're holding accum_lock, so the accumulator state might transition
+ * from disabled to enabled during this function (as enabling is lock
+ * free), but it will never disable (as disabling needs to hold the
+ * accum_lock), nor will it ever transition from enabled to error (as
+ * an enable while we're already enabled is impossible).
+ *
+ * If we're already disabled, we'll only look at the accumulation buffer
+ * rather than do a real dump, so a concurrent enable does not affect
+ * us.
+ *
+ * If a concurrent enable fails, we might transition to the error
+ * state, but again, as we're only looking at the accumulation buffer,
+ * it's not an issue.
+ */
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ state = accum->state;
+
+ /*
+ * Update the new map now, such that if an enable occurs during this
+ * dump then that enable will set the new map. If we're already enabled,
+ * then we'll do it ourselves after the dump.
+ */
+ if (new_map) {
+ kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map);
+ accum->enable_map_any_enabled = new_map_any_enabled;
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ /* Error state, so early out. No need to roll back any map updates */
+ if (state == ACCUM_STATE_ERROR)
+ return -EIO;
+
+ /* Initiate the dump if the backend is enabled. */
+ if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) {
+ if (dump_buf) {
+ errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns);
+ dump_requested = true;
+ } else {
+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+ errcode = hctx->iface->dump_clear(accum->backend);
+ }
+
+ if (errcode)
+ goto error;
+ } else {
+ dump_time_ns = hctx->iface->timestamp_ns(accum->backend);
+ }
+
+ /* Copy any accumulation into the dest buffer */
+ if (accum->accumulated && dump_buf) {
+ kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map);
+ dump_written = true;
+ }
+
+ /* Wait for any requested dumps to complete */
+ if (dump_requested) {
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+ errcode = hctx->iface->dump_wait(accum->backend);
+ if (errcode)
+ goto error;
+ }
+
+ /* If we're enabled and there's a new enable map, change the enabled set
+ * as soon after the dump has completed as possible.
+ */
+ if ((state == ACCUM_STATE_ENABLED) && new_map) {
+ /* Backend is only enabled if there were any enabled counters */
+ if (cur_map_any_enabled)
+ hctx->iface->dump_disable(accum->backend);
+
+ /* (Re-)enable the backend if the new map has enabled counters.
+ * No need to acquire the spinlock, as concurrent enable while
+ * we're already enabled and holding accum_lock is impossible.
+ */
+ if (new_map_any_enabled) {
+ errcode = hctx->iface->dump_enable(accum->backend, new_map);
+ if (errcode)
+ goto error;
+ }
+ }
+
+ /* Copy, accumulate, or zero into the dest buffer to finish */
+ if (dump_buf) {
+ /* If we dumped, copy or accumulate it into the destination */
+ if (dump_requested) {
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+ errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map,
+ dump_written);
+ if (errcode)
+ goto error;
+ dump_written = true;
+ }
+
+ /* If we've not written anything into the dump buffer so far, it
+ * means there was nothing to write. Zero any enabled counters.
+ */
+ if (!dump_written)
+ kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+ }
+
+ /* Write out timestamps */
+ *ts_start_ns = accum->ts_last_dump_ns;
+ *ts_end_ns = dump_time_ns;
+
+ accum->accumulated = false;
+ accum->ts_last_dump_ns = dump_time_ns;
+
+ return 0;
+error:
+ /* An error was only physically possible if the backend was enabled */
+ WARN_ON(state != ACCUM_STATE_ENABLED);
+
+ /* Disable the backend, and transition to the error state */
+ hctx->iface->dump_disable(accum->backend);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ accum->state = ACCUM_STATE_ERROR;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Non-NULL pointer to hardware counter context.
+ * @accumulate: True if we should accumulate before disabling, else false.
+ */
+static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate)
+{
+ unsigned long flags;
+
+ WARN_ON(!hctx);
+ lockdep_assert_held(&hctx->accum_lock);
+
+ if (!kbase_hwcnt_context_disable_atomic(hctx)) {
+ kbasep_hwcnt_accumulator_disable(hctx, accumulate);
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ /* Atomic disable failed and we're holding the mutex, so current
+ * disable count must be 0.
+ */
+ WARN_ON(hctx->disable_count != 0);
+ hctx->disable_count++;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ }
+}
+
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum)
+{
+ int errcode = 0;
+ unsigned long flags;
+
+ if (!hctx || !accum)
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!hctx->accum_inited)
+ /* Set accum initing now to prevent concurrent init */
+ hctx->accum_inited = true;
+ else
+ /* Already have an accum, or already being inited */
+ errcode = -EBUSY;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+
+ if (errcode)
+ return errcode;
+
+ errcode = kbasep_hwcnt_accumulator_init(hctx);
+
+ if (errcode) {
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum_inited = false;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+ }
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ WARN_ON(hctx->disable_count == 0);
+ WARN_ON(hctx->accum.enable_map_any_enabled);
+
+ /* Decrement the disable count to allow the accumulator to be accessible
+ * now that it's fully constructed.
+ */
+ hctx->disable_count--;
+
+ /*
+ * Make sure the accumulator is initialised to the correct state.
+ * Regardless of initial state, counters don't need to be enabled via
+ * the backend, as the initial enable map has no enabled counters.
+ */
+ hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ *accum = &hctx->accum;
+
+ return 0;
+}
+
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum)
+{
+ unsigned long flags;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum)
+ return;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ mutex_lock(&hctx->accum_lock);
+
+ /* Double release is a programming error */
+ WARN_ON(!hctx->accum_inited);
+
+ /* Disable the context to ensure the accumulator is inaccesible while
+ * we're destroying it. This performs the corresponding disable count
+ * increment to the decrement done during acquisition.
+ */
+ kbasep_hwcnt_context_disable(hctx, false);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ kbasep_hwcnt_accumulator_term(hctx);
+
+ mutex_lock(&hctx->accum_lock);
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ hctx->accum_inited = false;
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+ mutex_unlock(&hctx->accum_lock);
+}
+
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx)
+{
+ if (WARN_ON(!hctx))
+ return;
+
+ /* Try and atomically disable first, so we can avoid locking the mutex
+ * if we don't need to.
+ */
+ if (kbase_hwcnt_context_disable_atomic(hctx))
+ return;
+
+ mutex_lock(&hctx->accum_lock);
+
+ kbasep_hwcnt_context_disable(hctx, true);
+
+ mutex_unlock(&hctx->accum_lock);
+}
+
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx)
+{
+ unsigned long flags;
+ bool atomic_disabled = false;
+
+ if (WARN_ON(!hctx))
+ return false;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!WARN_ON(hctx->disable_count == SIZE_MAX)) {
+ /*
+ * If disable count is non-zero, we can just bump the disable
+ * count.
+ *
+ * Otherwise, we can't disable in an atomic context.
+ */
+ if (hctx->disable_count != 0) {
+ hctx->disable_count++;
+ atomic_disabled = true;
+ }
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+
+ return atomic_disabled;
+}
+
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!hctx))
+ return;
+
+ spin_lock_irqsave(&hctx->state_lock, flags);
+
+ if (!WARN_ON(hctx->disable_count == 0)) {
+ if (hctx->disable_count == 1)
+ kbasep_hwcnt_accumulator_enable(hctx);
+
+ hctx->disable_count--;
+ }
+
+ spin_unlock_irqrestore(&hctx->state_lock, flags);
+}
+
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx)
+{
+ if (!hctx)
+ return NULL;
+
+ return hctx->iface->metadata(hctx->iface->info);
+}
+
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work)
+{
+ if (WARN_ON(!hctx) || WARN_ON(!work))
+ return false;
+
+ return queue_work(hctx->wq, work);
+}
+
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum || !new_map || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ if ((new_map->metadata != hctx->accum.metadata) ||
+ (dump_buf && (dump_buf->metadata != hctx->accum.metadata)))
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+
+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+}
+
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_context *hctx;
+
+ if (!accum || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+
+ if (dump_buf && (dump_buf->metadata != hctx->accum.metadata))
+ return -EINVAL;
+
+ mutex_lock(&hctx->accum_lock);
+
+ errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL);
+
+ mutex_unlock(&hctx->accum_lock);
+
+ return errcode;
+}
+
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum)
+{
+ struct kbase_hwcnt_context *hctx;
+
+ if (WARN_ON(!accum))
+ return 0;
+
+ hctx = container_of(accum, struct kbase_hwcnt_context, accum);
+ return hctx->iface->timestamp_ns(accum->backend);
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
new file mode 100644
index 0000000..069e020
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter accumulator API.
+ */
+
+#ifndef _KBASE_HWCNT_ACCUMULATOR_H_
+#define _KBASE_HWCNT_ACCUMULATOR_H_
+
+#include <linux/types.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_accumulator;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator
+ * for a hardware counter context.
+ * @hctx: Non-NULL pointer to a hardware counter context.
+ * @accum: Non-NULL pointer to where the pointer to the created accumulator
+ * will be stored on success.
+ *
+ * There can exist at most one instance of the hardware counter accumulator per
+ * context at a time.
+ *
+ * If multiple clients need access to the hardware counters at the same time,
+ * then an abstraction built on top of the single instance to the hardware
+ * counter accumulator is required.
+ *
+ * No counters will be enabled with the returned accumulator. A subsequent call
+ * to kbase_hwcnt_accumulator_set_counters must be used to turn them on.
+ *
+ * There are four components to a hardware counter dump:
+ * - A set of enabled counters
+ * - A start time
+ * - An end time
+ * - A dump buffer containing the accumulated counter values for all enabled
+ * counters between the start and end times.
+ *
+ * For each dump, it is guaranteed that all enabled counters were active for the
+ * entirety of the period between the start and end times.
+ *
+ * It is also guaranteed that the start time of dump "n" is always equal to the
+ * end time of dump "n - 1".
+ *
+ * For all dumps, the values of any counters that were not enabled is undefined.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx,
+ struct kbase_hwcnt_accumulator **accum);
+
+/**
+ * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * The accumulator must be released before the context the accumulator was
+ * created from is terminated.
+ */
+void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum);
+
+/**
+ * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently
+ * enabled counters, and enable a new
+ * set of counters that will be used
+ * for subsequent dumps.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ * @new_map: Non-NULL pointer to the new counter enable map. Must have the
+ * same metadata as the accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum,
+ const struct kbase_hwcnt_enable_map *new_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled
+ * counters.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * If this function fails for some unexpected reason (i.e. anything other than
+ * invalid args), then the accumulator will be put into the error state until
+ * the parent context is next disabled.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns,
+ u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend
+ * timestamp.
+ * @accum: Non-NULL pointer to the hardware counter accumulator.
+ *
+ * Return: Accumulator backend timestamp in nanoseconds.
+ */
+u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum);
+
+#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
new file mode 100644
index 0000000..89732a9
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h
@@ -0,0 +1,148 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter context API.
+ */
+
+#ifndef _KBASE_HWCNT_CONTEXT_H_
+#define _KBASE_HWCNT_CONTEXT_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kbase_hwcnt_backend_interface;
+struct kbase_hwcnt_context;
+
+/**
+ * kbase_hwcnt_context_init() - Initialise a hardware counter context.
+ * @iface: Non-NULL pointer to a hardware counter backend interface.
+ * @out_hctx: Non-NULL pointer to where the pointer to the created context will
+ * be stored on success.
+ *
+ * On creation, the disable count of the context will be 0.
+ * A hardware counter accumulator can be acquired using a created context.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface,
+ struct kbase_hwcnt_context **out_hctx);
+
+/**
+ * kbase_hwcnt_context_term() - Terminate a hardware counter context.
+ * @hctx: Pointer to context to be terminated.
+ */
+void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by
+ * the context, so related counter data
+ * structures can be created.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable() - Increment the disable count of the context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * If a call to this function increments the disable count from 0 to 1, and
+ * an accumulator has been acquired, then a counter dump will be performed
+ * before counters are disabled via the backend interface.
+ *
+ * Subsequent dumps via the accumulator while counters are disabled will first
+ * return the accumulated dump, then will return dumps with zeroed counters.
+ *
+ * After this function call returns, it is guaranteed that counters will not be
+ * enabled via the backend interface.
+ */
+void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the
+ * context if possible in an atomic
+ * context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * This function will only succeed if hardware counters are effectively already
+ * disabled, i.e. there is no accumulator, the disable count is already
+ * non-zero, or the accumulator has no counters set.
+ *
+ * After this function call returns true, it is guaranteed that counters will
+ * not be enabled via the backend interface.
+ *
+ * Return: True if the disable count was incremented, else False.
+ */
+bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_enable() - Decrement the disable count of the context.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ *
+ * If a call to this function decrements the disable count from 1 to 0, and
+ * an accumulator has been acquired, then counters will be re-enabled via the
+ * backend interface.
+ *
+ * If an accumulator has been acquired and enabling counters fails for some
+ * reason, the accumulator will be placed into an error state.
+ *
+ * It is only valid to call this function one time for each prior returned call
+ * to kbase_hwcnt_context_disable.
+ *
+ * The spinlock documented in the backend interface that was passed in to
+ * kbase_hwcnt_context_init() must be held before calling this function.
+ */
+void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx);
+
+/**
+ * kbase_hwcnt_context_queue_work() - Queue hardware counter related async
+ * work on a workqueue specialized for
+ * hardware counters.
+ * @hctx: Non-NULL pointer to the hardware counter context.
+ * @work: Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * Performance counter related work is high priority, short running, and
+ * generally CPU locality is unimportant. There is no standard workqueue that
+ * can service this flavor of work.
+ *
+ * Rather than have each user of counters define their own workqueue, we have
+ * a centralized one in here that anybody using this hardware counter API
+ * should use.
+ *
+ * Before the context is destroyed, all work submitted must have been completed.
+ * Given that the work enqueued via this function is likely to be hardware
+ * counter related and will therefore use the context object, this is likely
+ * to be behavior that will occur naturally.
+ *
+ * Historical note: prior to this centralized workqueue, the system_highpri_wq
+ * was used. This was generally fine, except when a particularly long running,
+ * higher priority thread ended up scheduled on the enqueuing CPU core. Given
+ * that hardware counters requires tight integration with power management,
+ * this meant progress through the power management states could be stalled
+ * for however long that higher priority thread took.
+ */
+bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work);
+
+#endif /* _KBASE_HWCNT_CONTEXT_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
new file mode 100644
index 0000000..74916da
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -0,0 +1,738 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/err.h>
+
+/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
+ */
+enum enable_map_idx {
+ EM_LO,
+ EM_HI,
+ EM_COUNT,
+};
+
+static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
+ break;
+ case KBASE_HWCNT_SET_SECONDARY:
+ if (is_csf)
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
+ else
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
+ break;
+ case KBASE_HWCNT_SET_TERTIARY:
+ if (is_csf)
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
+ else
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
+ break;
+ case KBASE_HWCNT_SET_SECONDARY:
+ case KBASE_HWCNT_SET_TERTIARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
+ break;
+ case KBASE_HWCNT_SET_SECONDARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
+ break;
+ case KBASE_HWCNT_SET_TERTIARY:
+ if (is_csf)
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
+ else
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
+ break;
+ case KBASE_HWCNT_SET_SECONDARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
+ break;
+ case KBASE_HWCNT_SET_TERTIARY:
+ *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata
+ * for the GPU.
+ * @gpu_info: Non-NULL pointer to hwcnt info for current GPU.
+ * @is_csf: true for CSF GPU, otherwise false.
+ * @counter_set: The performance counter set to use.
+ * @metadata: Non-NULL pointer to where created metadata is stored
+ * on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ const bool is_csf,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **metadata)
+{
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description group;
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ size_t non_sc_block_count;
+ size_t sc_block_count;
+
+ WARN_ON(!gpu_info);
+ WARN_ON(!metadata);
+
+ /* Calculate number of block instances that aren't shader cores */
+ non_sc_block_count = 2 + gpu_info->l2_count;
+ /* Calculate number of block instances that are shader cores */
+ sc_block_count = fls64(gpu_info->core_mask);
+
+ /*
+ * A system can have up to 64 shader cores, but the 64-bit
+ * availability mask can't physically represent that many cores as well
+ * as the other hardware blocks.
+ * Error out if there are more blocks than our implementation can
+ * support.
+ */
+ if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+ return -EINVAL;
+
+ /* One Front End block */
+ kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
+ blks[0].inst_cnt = 1;
+ blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+ /* One Tiler block */
+ kbasep_get_tiler_block_type(&blks[1].type, counter_set);
+ blks[1].inst_cnt = 1;
+ blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+ /* l2_count memsys blks */
+ kbasep_get_memsys_block_type(&blks[2].type, counter_set);
+ blks[2].inst_cnt = gpu_info->l2_count;
+ blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+ /*
+ * There are as many shader cores in the system as there are bits set in
+ * the core mask. However, the dump buffer memory requirements need to
+ * take into account the fact that the core mask may be non-contiguous.
+ *
+ * For example, a system with a core mask of 0b1011 has the same dump
+ * buffer memory requirements as a system with 0b1111, but requires more
+ * memory than a system with 0b0111. However, core 2 of the system with
+ * 0b1011 doesn't physically exist, and the dump buffer memory that
+ * accounts for that core will never be written to when we do a counter
+ * dump.
+ *
+ * We find the core mask's last set bit to determine the memory
+ * requirements, and embed the core mask into the availability mask so
+ * we can determine later which shader cores physically exist.
+ */
+ kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
+ blks[3].inst_cnt = sc_block_count;
+ blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+
+ WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+
+ group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
+ group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
+ group.blks = blks;
+
+ desc.grp_cnt = 1;
+ desc.grps = &group;
+ desc.clk_cnt = gpu_info->clk_cnt;
+
+ /* The JM, Tiler, and L2s are always available, and are before cores */
+ desc.avail_mask = (1ull << non_sc_block_count) - 1;
+ /* Embed the core mask directly in the availability mask */
+ desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
+
+ return kbase_hwcnt_metadata_create(&desc, metadata);
+}
+
+/**
+ * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the
+ * GPU.
+ * @gpu_info: Non-NULL pointer to hwcnt info for the GPU.
+ *
+ * Return: Size of buffer the GPU needs to perform a counter dump.
+ */
+static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info)
+{
+ WARN_ON(!gpu_info);
+
+ return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
+ gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
+}
+
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes)
+{
+ int errcode;
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_bytes;
+
+ if (!gpu_info || !out_metadata || !out_dump_bytes)
+ return -EINVAL;
+
+ /*
+ * For architectures where a max_config interface is available
+ * from the arbiter, the v5 dump bytes and the metadata v5 are
+ * based on the maximum possible allocation of the HW in the
+ * GPU cause it needs to be prepared for the worst case where
+ * all the available L2 cache and Shader cores are allocated.
+ */
+ dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info);
+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata);
+ if (errcode)
+ return errcode;
+
+ /*
+ * The physical dump size should be half of dump abstraction size in
+ * metadata since physical HW uses 32-bit per value but metadata
+ * specifies 64-bit per value.
+ */
+ WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
+
+ *out_metadata = metadata;
+ *out_dump_bytes = dump_bytes;
+
+ return 0;
+}
+
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+ if (!metadata)
+ return;
+
+ kbase_hwcnt_metadata_destroy(metadata);
+}
+
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata)
+{
+ int errcode;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if (!gpu_info || !out_metadata)
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata);
+ if (errcode)
+ return errcode;
+
+ *out_metadata = metadata;
+
+ return 0;
+}
+
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+ if (!metadata)
+ return;
+
+ kbase_hwcnt_metadata_destroy(metadata);
+}
+
+static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
+{
+ bool is_shader = false;
+
+ /* Warn on unknown group type */
+ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
+ return false;
+
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
+ is_shader = true;
+
+ return is_shader;
+}
+
+static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
+{
+ bool is_l2_cache = false;
+
+ switch (grp_type) {
+ case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
+ is_l2_cache = true;
+ break;
+ default:
+ /* Warn on unknown group type */
+ WARN_ON(true);
+ }
+
+ return is_l2_cache;
+}
+
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+ const u64 *dump_src = src;
+ size_t src_offset = 0;
+ u64 core_mask = pm_core_mask;
+
+ /* Variables to deal with the current configuration */
+ int l2_count = 0;
+
+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ const size_t ctr_cnt =
+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ const bool is_shader_core = is_block_type_shader(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
+ const bool is_l2_cache = is_block_type_l2_cache(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+ bool hw_res_available = true;
+
+ /*
+ * If l2 blocks is greater than the current allocated number of
+ * L2 slices, there is no hw allocated to that block.
+ */
+ if (is_l2_cache) {
+ l2_count++;
+ if (l2_count > curr_config->num_l2_slices)
+ hw_res_available = false;
+ else
+ hw_res_available = true;
+ }
+ /*
+ * For the shader cores, the current shader_mask allocated is
+ * always a subgroup of the maximum shader_mask, so after
+ * jumping any L2 cache not available the available shader cores
+ * will always have a matching set of blk instances available to
+ * accumulate them.
+ */
+ else
+ hw_res_available = true;
+
+ /*
+ * Skip block if no values in the destination block are enabled.
+ */
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+ u64 *dst_blk =
+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk = dump_src + src_offset;
+ bool blk_powered;
+
+ if (!is_shader_core) {
+ /* Under the current PM system, counters will
+ * only be enabled after all non shader core
+ * blocks are powered up.
+ */
+ blk_powered = true;
+ } else {
+ /* Check the PM core mask to see if the shader
+ * core is powered up.
+ */
+ blk_powered = core_mask & 1;
+ }
+
+ if (blk_powered && !is_undefined && hw_res_available) {
+ /* Only powered and defined blocks have valid data. */
+ if (accumulate) {
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+ hdr_cnt, ctr_cnt);
+ } else {
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+ (hdr_cnt + ctr_cnt));
+ }
+ } else {
+ /* Even though the block might be undefined, the
+ * user has enabled counter collection for it.
+ * We should not propagate garbage data.
+ */
+ if (accumulate) {
+ /* No-op to preserve existing values */
+ } else {
+ /* src is garbage, so zero the dst */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+ (hdr_cnt + ctr_cnt));
+ }
+ }
+ }
+
+ /* Just increase the src_offset if the HW is available */
+ if (hw_res_available)
+ src_offset += (hdr_cnt + ctr_cnt);
+ if (is_shader_core)
+ core_mask = core_mask >> 1;
+ }
+
+ return 0;
+}
+
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ const u64 *dump_src = src;
+ size_t src_offset = 0;
+ size_t grp, blk, blk_inst;
+
+ if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
+ return -EINVAL;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ const size_t ctr_cnt =
+ kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
+ kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+
+ /*
+ * Skip block if no values in the destination block are enabled.
+ */
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
+ u64 *dst_blk =
+ kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk = dump_src + src_offset;
+
+ if (!is_undefined) {
+ if (accumulate) {
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
+ hdr_cnt, ctr_cnt);
+ } else {
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
+ (hdr_cnt + ctr_cnt));
+ }
+ } else {
+ /* Even though the block might be undefined, the
+ * user has enabled counter collection for it.
+ * We should not propagate garbage data.
+ */
+ if (accumulate) {
+ /* No-op to preserve existing values */
+ } else {
+ /* src is garbage, so zero the dst */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk,
+ (hdr_cnt + ctr_cnt));
+ }
+ }
+ }
+
+ src_offset += (hdr_cnt + ctr_cnt);
+ }
+
+ return 0;
+}
+
+/**
+ * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical
+ * block enable map to a
+ * block enable map
+ * abstraction.
+ * @phys: Physical 32-bit block enable map
+ * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction
+ * will be stored.
+ * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction
+ * will be stored.
+ */
+static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi)
+{
+ u64 dwords[2] = { 0, 0 };
+
+ size_t dword_idx;
+
+ for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+ const u16 packed = phys >> (16 * dword_idx);
+ u64 dword = 0;
+
+ size_t hword_bit;
+
+ for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+ const size_t dword_bit = hword_bit * 4;
+ const u64 mask = (packed >> (hword_bit)) & 0x1;
+
+ dword |= mask << (dword_bit + 0);
+ dword |= mask << (dword_bit + 1);
+ dword |= mask << (dword_bit + 2);
+ dword |= mask << (dword_bit + 3);
+ }
+ dwords[dword_idx] = dword;
+ }
+ *lo = dwords[0];
+ *hi = dwords[1];
+}
+
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 fe_bm[EM_COUNT] = { 0 };
+ u64 shader_bm[EM_COUNT] = { 0 };
+ u64 tiler_bm[EM_COUNT] = { 0 };
+ u64 mmu_l2_bm[EM_COUNT] = { 0 };
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!src) || WARN_ON(!dst))
+ return;
+
+ metadata = src->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
+
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+ size_t map_idx;
+
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
+
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+ /* Nothing to do in this case. */
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ fe_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ tiler_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ shader_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ mmu_l2_bm[map_idx] |= blk_map[map_idx];
+ break;
+ default:
+ WARN_ON(true);
+ }
+ }
+ } else {
+ WARN_ON(true);
+ }
+ }
+
+ dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]);
+ dst->shader_bm =
+ kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]);
+ dst->tiler_bm =
+ kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
+ dst->mmu_l2_bm =
+ kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
+}
+
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
+{
+ switch (src) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY;
+ break;
+ case KBASE_HWCNT_SET_SECONDARY:
+ *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY;
+ break;
+ case KBASE_HWCNT_SET_TERTIARY:
+ *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY;
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+
+ u64 fe_bm[EM_COUNT] = { 0 };
+ u64 shader_bm[EM_COUNT] = { 0 };
+ u64 tiler_bm[EM_COUNT] = { 0 };
+ u64 mmu_l2_bm[EM_COUNT] = { 0 };
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!src) || WARN_ON(!dst))
+ return;
+
+ metadata = dst->metadata;
+
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
+ &shader_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
+ &tiler_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
+ &mmu_l2_bm[EM_HI]);
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
+ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+ size_t map_idx;
+
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
+
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+ /* Nothing to do in this case. */
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ blk_map[map_idx] = fe_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ blk_map[map_idx] = tiler_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ blk_map[map_idx] = shader_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ blk_map[map_idx] = mmu_l2_bm[map_idx];
+ break;
+ default:
+ WARN_ON(true);
+ }
+ }
+ } else {
+ WARN_ON(true);
+ }
+ }
+}
+
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
+ return;
+
+ metadata = buf->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
+ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
+ const u64 *blk_map =
+ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+
+ if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
+ u64 prfcnt_bm[EM_COUNT] = { 0 };
+ u32 prfcnt_en = 0;
+ size_t map_idx;
+
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
+
+ prfcnt_bm[map_idx] = blk_map[map_idx];
+ }
+
+ prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
+ prfcnt_bm[EM_HI]);
+
+ buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
+ } else {
+ WARN_ON(true);
+ }
+ }
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
new file mode 100644
index 0000000..a49c31e
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -0,0 +1,407 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_H_
+#define _KBASE_HWCNT_GPU_H_
+
+#include <linux/bug.h>
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/* Hardware counter version 5 definitions, V5 is the only supported version. */
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
+#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
+#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
+ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
+
+/* FrontEnd block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1
+/* Tiler block count in V5 GPU hardware counter. */
+#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1
+
+/* Index of the PRFCNT_EN header into a V5 counter block */
+#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2
+
+/* Number of bytes for each counter value in hardware. */
+#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
+
+/**
+ * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
+ * identify metadata groups.
+ * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
+ */
+enum kbase_hwcnt_gpu_group_type {
+ KBASE_HWCNT_GPU_GROUP_TYPE_V5,
+};
+
+/**
+ * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
+ * used to identify metadata blocks.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager
+ * or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job
+ * manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job
+ * manager or CSF HW).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block
+ * (e.g. if a counter set that
+ * a block doesn't support is
+ * used).
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block.
+ */
+enum kbase_hwcnt_gpu_v5_block_type {
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED,
+};
+
+/**
+ * enum kbase_hwcnt_set - GPU hardware counter sets
+ * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters
+ * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters
+ * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters
+ * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters
+ */
+enum kbase_hwcnt_set {
+ KBASE_HWCNT_SET_PRIMARY,
+ KBASE_HWCNT_SET_SECONDARY,
+ KBASE_HWCNT_SET_TERTIARY,
+ KBASE_HWCNT_SET_UNDEFINED = 255,
+};
+
+/**
+ * struct kbase_hwcnt_physical_enable_map - Representation of enable map
+ * directly used by GPU.
+ * @fe_bm: Front end (JM/CSHW) counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm: Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ */
+struct kbase_hwcnt_physical_enable_map {
+ u32 fe_bm;
+ u32 shader_bm;
+ u32 tiler_bm;
+ u32 mmu_l2_bm;
+};
+
+/*
+ * Values for Hardware Counter SET_SELECT value.
+ * Directly passed to HW.
+ */
+enum kbase_hwcnt_physical_set {
+ KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0,
+ KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1,
+ KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2,
+};
+
+/**
+ * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
+ * @l2_count: L2 cache count.
+ * @core_mask: Shader core mask. May be sparse.
+ * @clk_cnt: Number of clock domains available.
+ * @prfcnt_values_per_block: Total entries (header + counters) of performance
+ * counter per block.
+ */
+struct kbase_hwcnt_gpu_info {
+ size_t l2_count;
+ u64 core_mask;
+ u8 clk_cnt;
+ size_t prfcnt_values_per_block;
+};
+
+/**
+ * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the
+ * GPU.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @shader_present: Current shader present bitmap that is allocated to the GPU.
+ *
+ * For architectures with the max_config interface available from the Arbiter,
+ * the current resources allocated may change during runtime due to a
+ * re-partitioning (possible with partition manager). Thus, the HWC needs to be
+ * prepared to report any possible set of counters. For this reason the memory
+ * layout in the userspace is based on the maximum possible allocation. On the
+ * other hand, each partition has just the view of its currently allocated
+ * resources. Therefore, it is necessary to correctly map the dumped HWC values
+ * from the registers into this maximum memory layout so that it can be exposed
+ * to the userspace side correctly.
+ *
+ * For L2 cache just the number is enough once the allocated ones will be
+ * accumulated on the first L2 slots available in the destination buffer.
+ *
+ * For the correct mapping of the shader cores it is necessary to jump all the
+ * L2 cache slots in the destination buffer that are not allocated. But, it is
+ * not necessary to add any logic to map the shader cores bitmap into the memory
+ * layout because the shader_present allocated will always be a subset of the
+ * maximum shader_present. It is possible because:
+ * 1 - Partitions are made of slices and they are always ordered from the ones
+ * with more shader cores to the ones with less.
+ * 2 - The shader cores in a slice are always contiguous.
+ * 3 - A partition can only have a contiguous set of slices allocated to it.
+ * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with
+ * 3 cores and 1 with 2 cores. The maximum possible shader_present would be:
+ * 0x0011|0111|0111|1111 -> note the order and that the shader cores are
+ * contiguous in any slice.
+ * Supposing that a partition takes the two slices in the middle, the current
+ * config shader_present for this partition would be:
+ * 0x0111|0111 -> note that this is a subset of the maximum above and the slices
+ * are contiguous.
+ * Therefore, by directly copying any subset of the maximum possible
+ * shader_present the mapping is already achieved.
+ */
+struct kbase_hwcnt_curr_config {
+ size_t num_l2_slices;
+ u64 shader_present;
+};
+
+/**
+ * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined.
+ *
+ * @grp_type: Hardware counter group type.
+ * @blk_type: Hardware counter block type.
+ *
+ * Return: true if the block type is undefined, else false.
+ */
+static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
+ const uint64_t blk_type)
+{
+ /* Warn on unknown group type */
+ if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
+ return false;
+
+ return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED);
+}
+
+/**
+ * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the
+ * JM GPUs.
+ * @info: Non-NULL pointer to info struct.
+ * @counter_set: The performance counter set used.
+ * @out_metadata: Non-NULL pointer to where created metadata is stored on
+ * success.
+ * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump
+ * buffer is stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata,
+ size_t *out_dump_bytes);
+
+/**
+ * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata.
+ *
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the
+ * CSF GPUs.
+ * @info: Non-NULL pointer to info struct.
+ * @counter_set: The performance counter set used.
+ * @out_metadata: Non-NULL pointer to where created metadata is stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info,
+ enum kbase_hwcnt_set counter_set,
+ const struct kbase_hwcnt_metadata **out_metadata);
+
+/**
+ * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter
+ * metadata.
+ * @metadata: Pointer to metadata to destroy.
+ */
+void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
+ * dump buffer in src into the dump buffer
+ * abstraction in dst.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of destination dump
+ * buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask: PM state synchronized shaders core mask with the dump.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
+ * @accumulate: True if counters in source should be accumulated into
+ * destination, rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ const u64 pm_core_mask,
+ const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
+
+/**
+ * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
+ * dump buffer in src into the dump buffer
+ * abstraction in dst.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of dst dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @accumulate: True if counters in src should be accumulated into
+ * destination, rather than copied.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata as
+ * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
+ * the length of src.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
+
+/**
+ * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
+ * enable map abstraction to
+ * a physical block enable
+ * map.
+ * @lo: Low 64 bits of block enable map abstraction.
+ * @hi: High 64 bits of block enable map abstraction.
+ *
+ * The abstraction uses 128 bits to enable 128 block values, whereas the
+ * physical uses just 32 bits, as bit n enables values [n*4, n*4+3].
+ * Therefore, this conversion is lossy.
+ *
+ * Return: 32-bit physical block enable map.
+ */
+static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi)
+{
+ u32 phys = 0;
+ u64 dwords[2] = { lo, hi };
+ size_t dword_idx;
+
+ for (dword_idx = 0; dword_idx < 2; dword_idx++) {
+ const u64 dword = dwords[dword_idx];
+ u16 packed = 0;
+
+ size_t hword_bit;
+
+ for (hword_bit = 0; hword_bit < 16; hword_bit++) {
+ const size_t dword_bit = hword_bit * 4;
+ const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) |
+ ((dword >> (dword_bit + 1)) & 0x1) |
+ ((dword >> (dword_bit + 2)) & 0x1) |
+ ((dword >> (dword_bit + 3)) & 0x1);
+ packed |= (mask << hword_bit);
+ }
+ phys |= ((u32)packed) << (16 * dword_idx);
+ }
+ return phys;
+}
+
+/**
+ * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction
+ * into a physical enable map.
+ * @dst: Non-NULL pointer to destination physical enable map.
+ * @src: Non-NULL pointer to source enable map abstraction.
+ *
+ * The src must have been created from a metadata returned from a call to
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This is a lossy conversion, as the enable map abstraction has one bit per
+ * individual counter block value, but the physical enable map uses 1 bit for
+ * every 4 counters, shared over all instances of a block.
+ */
+void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical
+ * SET_SELECT value.
+ *
+ * @dst: Non-NULL pointer to destination physical SET_SELECT value.
+ * @src: Non-NULL pointer to source counter set selection.
+ */
+void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src);
+
+/**
+ * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to
+ * an enable map abstraction.
+ * @dst: Non-NULL pointer to destination enable map abstraction.
+ * @src: Non-NULL pointer to source physical enable map.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This is a lossy conversion, as the physical enable map can technically
+ * support counter blocks with 128 counters each, but no hardware actually uses
+ * more than 64, so the enable map abstraction has nowhere to store the enable
+ * information for the 64 non-existent counters.
+ */
+void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_physical_enable_map *src);
+
+/**
+ * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter
+ * enable headers in a dump buffer to
+ * reflect the specified enable map.
+ * @buf: Non-NULL pointer to dump buffer to patch.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * The buf and enable_map must have been created from a metadata returned from
+ * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ *
+ * This function should be used before handing off a dump buffer over the
+ * kernel-user boundary, to ensure the header is accurate for the enable map
+ * used by the user.
+ */
+void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
+ const struct kbase_hwcnt_enable_map *enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
new file mode 100644
index 0000000..0cf2f94
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_gpu.h"
+#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
+
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md)
+{
+ struct kbase_hwcnt_description desc;
+ struct kbase_hwcnt_group_description group;
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
+ size_t prfcnt_values_per_block;
+ size_t blk;
+ int err;
+ struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+
+ if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
+ !src_md->grp_metadata[0].blk_metadata)
+ return -EINVAL;
+
+ /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
+ * count in the metadata.
+ */
+ if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
+ (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
+ return -EINVAL;
+
+ /* Get the values count in the first block. */
+ prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
+
+ /* check all blocks should have same values count. */
+ for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
+ if (val_cnt != prfcnt_values_per_block)
+ return -EINVAL;
+ }
+
+ /* Only support 64 and 128 entries per block. */
+ if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
+ return -EINVAL;
+
+ metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
+ if (!metadata_narrow)
+ return -ENOMEM;
+
+ /* Narrow to 64 entries per block to keep API backward compatibility. */
+ prfcnt_values_per_block = 64;
+
+ for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
+ size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
+ blks[blk] = (struct kbase_hwcnt_block_description){
+ .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
+ .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
+ .hdr_cnt = blk_hdr_cnt,
+ .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
+ };
+ }
+
+ group = (struct kbase_hwcnt_group_description){
+ .type = kbase_hwcnt_metadata_group_type(src_md, 0),
+ .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
+ .blks = blks,
+ };
+
+ desc = (struct kbase_hwcnt_description){
+ .grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
+ .avail_mask = src_md->avail_mask,
+ .clk_cnt = src_md->clk_cnt,
+ .grps = &group,
+ };
+
+ err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
+ if (!err) {
+ /* Narrow down the buffer size to half as the narrowed metadata
+ * only supports 32-bit but the created metadata uses 64-bit for
+ * block entry.
+ */
+ metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
+ *dst_md_narrow = metadata_narrow;
+ } else {
+ kfree(metadata_narrow);
+ }
+
+ return err;
+}
+
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+ if (!md_narrow)
+ return;
+
+ kbase_hwcnt_metadata_destroy(md_narrow->metadata);
+ kfree(md_narrow);
+}
+
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
+{
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ u8 *buf;
+
+ if (!md_narrow || !dump_buf)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */
+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)buf,
+ .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
+ };
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
+{
+ if (!dump_buf_narrow)
+ return;
+
+ kfree(dump_buf_narrow->dump_buf);
+ *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
+ .dump_buf = NULL,
+ .clk_cnt_buf = NULL };
+}
+
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ struct kbase_hwcnt_dump_buffer_narrow *buffers;
+ size_t buf_idx;
+ unsigned int order;
+ unsigned long addr;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ size_t total_dump_buf_size;
+
+ if (!md_narrow || !dump_bufs)
+ return -EINVAL;
+
+ dump_buf_bytes = md_narrow->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
+
+ /* Allocate memory for the dump buffer struct array */
+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+ if (!buffers)
+ return -ENOMEM;
+
+ /* Allocate pages for the actual dump buffers, as they tend to be fairly
+ * large.
+ */
+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+ if (!addr) {
+ kfree(buffers);
+ return -ENOMEM;
+ }
+
+ *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
+ .page_addr = addr,
+ .page_order = order,
+ .buf_cnt = n,
+ .bufs = buffers,
+ };
+
+ total_dump_buf_size = dump_buf_bytes * n;
+ /* Set the buffer of each dump buf */
+ for (buf_idx = 0; buf_idx < n; buf_idx++) {
+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+ const size_t clk_cnt_buf_offset =
+ total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
+
+ buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
+ .md_narrow = md_narrow,
+ .dump_buf = (u32 *)(addr + dump_buf_offset),
+ .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
+ };
+ }
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
+{
+ if (!dump_bufs)
+ return;
+
+ kfree(dump_bufs->bufs);
+ free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+ memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt)
+{
+ size_t val;
+
+ for (val = 0; val < val_cnt; val++) {
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+ u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
+
+ dst_blk[val] = val_enabled ? src_val : 0;
+ }
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
+ size_t grp;
+ size_t clk;
+
+ if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
+ WARN_ON(src->metadata->grp_cnt != 1) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ src->metadata->grp_metadata[0].blk_cnt) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
+ KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
+ WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
+ src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
+ return;
+
+ /* Don't use src metadata since src buffer is bigger than dst buffer. */
+ metadata_narrow = dst_narrow->md_narrow;
+
+ for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
+ size_t blk;
+ size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
+
+ for (blk = 0; blk < blk_cnt; blk++) {
+ size_t blk_inst;
+ size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
+ metadata_narrow, grp, blk);
+
+ for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
+ /* The narrowed down buffer is only 32-bit. */
+ u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
+ dst_narrow, grp, blk, blk_inst);
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
+ src, grp, blk, blk_inst);
+ const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
+ dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
+ metadata_narrow, grp, blk);
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
+ KBASE_HWCNT_VALUE_BYTES));
+
+ kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
+ blk_em, val_cnt);
+ }
+ }
+ }
+
+ for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
+ bool clk_enabled =
+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
+
+ dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ }
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
new file mode 100644
index 0000000..afd236d
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
@@ -0,0 +1,330 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_GPU_NARROW_H_
+#define _KBASE_HWCNT_GPU_NARROW_H_
+
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+#include <linux/types.h>
+
+struct kbase_device;
+struct kbase_hwcnt_metadata;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical
+ * layout of narrow dump buffers.
+ * For backward compatibility, the narrow
+ * metadata only supports 64 counters per
+ * block and 32-bit per block entry.
+ * @metadata: Non-NULL pointer to the metadata before narrow down to
+ * 32-bit per block entry, it has 64 counters per block and
+ * 64-bit per value.
+ * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block
+ * entry.
+ */
+struct kbase_hwcnt_metadata_narrow {
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t dump_buf_bytes;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer.
+ * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to
+ * describe the layout of the narrow dump buffer.
+ * @dump_buf: Non-NULL pointer to an array of u32 values, the array size
+ * is md_narrow->dump_buf_bytes.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ * for each clock domain.
+ */
+struct kbase_hwcnt_dump_buffer_narrow {
+ const struct kbase_hwcnt_metadata_narrow *md_narrow;
+ u32 *dump_buf;
+ u64 *clk_cnt_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump
+ * buffer array.
+ * @page_addr: Address of first allocated page. A single allocation is used for
+ * all narrow dump buffers in the array.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ * scale.
+ * @buf_cnt: The number of allocated dump buffers.
+ * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors.
+ */
+struct kbase_hwcnt_dump_buffer_narrow_array {
+ unsigned long page_addr;
+ unsigned int page_order;
+ size_t buf_cnt;
+ struct kbase_hwcnt_dump_buffer_narrow *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from
+ * narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ *
+ * Return: Number of hardware counter groups described by narrow metadata.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
+{
+ return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ *
+ * Return: Type of the group grp.
+ */
+static inline u64
+kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp)
+{
+ return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a
+ * group from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp)
+{
+ return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of
+ * instances of a block
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
+{
+ return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter
+ * headers from narrow
+ * metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counter headers in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp, size_t blk)
+{
+ return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of
+ * counters from narrow
+ * metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counters in each instance of block blk in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
+{
+ return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values
+ * from narrow metadata.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of headers plus counters in each instance of block blk
+ * in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ size_t grp, size_t blk)
+{
+ return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
+ kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a
+ * narrowed block instance's
+ * dump buffer.
+ * @buf: Non-NULL pointer to narrow dump buffer.
+ * @grp: Index of the group in the narrow metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u32* to the dump buffer for the block instance.
+ */
+static inline u32 *
+kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
+ buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+ (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
+ blk_inst);
+}
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC
+ * entries per block truncated to
+ * 64 entries and block entry size
+ * narrowed down to 32-bit.
+ *
+ * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored
+ * on success.
+ * @src_md: Non-NULL pointer to the HWC metadata used as the source to
+ * create dst_md_narrow.
+ *
+ * For backward compatibility of the interface to user clients, a new metadata
+ * with entries per block truncated to 64 and block entry size narrowed down
+ * to 32-bit will be created for dst_md_narrow.
+ * The total entries per block in src_md must be 64 or 128, if it's other
+ * values, function returns error since it's not supported.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
+ const struct kbase_hwcnt_metadata *src_md);
+
+/**
+ * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
+ * metadata object.
+ * @md_narrow: Pointer to hardware counter narrow metadata.
+ */
+void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be
+ * initialised to undefined values, so must be used as a copy
+ * destination, or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
+ struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer structure, or on an
+ * already freed narrow dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
+ * dump buffers.
+ * @md_narrow: Non-NULL pointer to narrow metadata.
+ * @n: Number of narrow dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array
+ * object to be initialised.
+ *
+ * A single zeroed contiguous page allocation will be used for all of the
+ * buffers inside the object, where:
+ * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_narrow_array_alloc(
+ const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer
+ * array.
+ * @dump_bufs: Narrow Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed narrow dump buffer array structure, or
+ * on an already freed narrow dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_narrow_array_free(
+ struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block
+ * values from source to
+ * destination.
+ * @dst_blk: Non-NULL pointer to destination block obtained from a call to
+ * kbase_hwcnt_dump_buffer_narrow_block_instance.
+ * @src_blk: Non-NULL pointer to source block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in destination will be zero, the enabled
+ * values in destination will be saturated at U32_MAX if the corresponding
+ * source value is bigger than U32_MAX, or copy the value from source if the
+ * corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt);
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
+ * narrow dump buffer.
+ * @dst_narrow: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * After the operation, all non-enabled values (including padding bytes) will be
+ * zero. Slower than the non-strict variant.
+ *
+ * The enabled values in dst_narrow will be saturated at U32_MAX if the
+ * corresponding source value is bigger than U32_MAX, or copy the value from
+ * source if the corresponding source value is less than or equal to U32_MAX.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
new file mode 100644
index 0000000..763eb31
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
@@ -0,0 +1,511 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/slab.h>
+
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **out_metadata)
+{
+ char *buf;
+ struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_group_metadata *grp_mds;
+ size_t grp;
+ size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
+ size_t dump_buf_count; /* Number of u64 values (inc padding) */
+ size_t avail_mask_bits; /* Number of availability mask bits */
+
+ size_t size;
+ size_t offset;
+
+ if (!desc || !out_metadata)
+ return -EINVAL;
+
+ /* The maximum number of clock domains is 64. */
+ if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE))
+ return -EINVAL;
+
+ /* Calculate the bytes needed to tightly pack the metadata */
+
+ /* Top level metadata */
+ size = 0;
+ size += sizeof(struct kbase_hwcnt_metadata);
+
+ /* Group metadata */
+ size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+ /* Block metadata */
+ for (grp = 0; grp < desc->grp_cnt; grp++) {
+ size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
+ }
+
+ /* Single allocation for the entire metadata */
+ buf = kmalloc(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ /* Use the allocated memory for the metadata and its members */
+
+ /* Bump allocate the top level metadata */
+ offset = 0;
+ metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_metadata);
+
+ /* Bump allocate the group metadata */
+ grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
+
+ enable_map_count = 0;
+ dump_buf_count = 0;
+ avail_mask_bits = 0;
+
+ for (grp = 0; grp < desc->grp_cnt; grp++) {
+ size_t blk;
+
+ const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
+ struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
+
+ size_t group_enable_map_count = 0;
+ size_t group_dump_buffer_count = 0;
+ size_t group_avail_mask_bits = 0;
+
+ /* Bump allocate this group's block metadata */
+ struct kbase_hwcnt_block_metadata *blk_mds =
+ (struct kbase_hwcnt_block_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
+
+ /* Fill in each block in the group's information */
+ for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
+ const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
+ struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
+ const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+ blk_md->type = blk_desc->type;
+ blk_md->inst_cnt = blk_desc->inst_cnt;
+ blk_md->hdr_cnt = blk_desc->hdr_cnt;
+ blk_md->ctr_cnt = blk_desc->ctr_cnt;
+ blk_md->enable_map_index = group_enable_map_count;
+ blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
+ blk_md->dump_buf_index = group_dump_buffer_count;
+ blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
+ n_values,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+ blk_md->avail_mask_index = group_avail_mask_bits;
+
+ group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
+ group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
+ group_avail_mask_bits += blk_md->inst_cnt;
+ }
+
+ /* Fill in the group's information */
+ grp_md->type = grp_desc->type;
+ grp_md->blk_cnt = grp_desc->blk_cnt;
+ grp_md->blk_metadata = blk_mds;
+ grp_md->enable_map_index = enable_map_count;
+ grp_md->dump_buf_index = dump_buf_count;
+ grp_md->avail_mask_index = avail_mask_bits;
+
+ enable_map_count += group_enable_map_count;
+ dump_buf_count += group_dump_buffer_count;
+ avail_mask_bits += group_avail_mask_bits;
+ }
+
+ /* Fill in the top level metadata's information */
+ metadata->grp_cnt = desc->grp_cnt;
+ metadata->grp_metadata = grp_mds;
+ metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
+ metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
+ metadata->avail_mask = desc->avail_mask;
+ metadata->clk_cnt = desc->clk_cnt;
+
+ WARN_ON(size != offset);
+ /* Due to the block alignment, there should be exactly one enable map
+ * bit per 4 bytes in the dump buffer.
+ */
+ WARN_ON(metadata->dump_buf_bytes !=
+ (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+
+ *out_metadata = metadata;
+ return 0;
+}
+
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata)
+{
+ kfree(metadata);
+}
+
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map)
+{
+ u64 *enable_map_buf;
+
+ if (!metadata || !enable_map)
+ return -EINVAL;
+
+ if (metadata->enable_map_bytes > 0) {
+ enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL);
+ if (!enable_map_buf)
+ return -ENOMEM;
+ } else {
+ enable_map_buf = NULL;
+ }
+
+ enable_map->metadata = metadata;
+ enable_map->hwcnt_enable_map = enable_map_buf;
+ return 0;
+}
+
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map)
+{
+ if (!enable_map)
+ return;
+
+ kfree(enable_map->hwcnt_enable_map);
+ enable_map->hwcnt_enable_map = NULL;
+ enable_map->metadata = NULL;
+}
+
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+ u8 *buf;
+
+ if (!metadata || !dump_buf)
+ return -EINVAL;
+
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+
+ /* Make a single allocation for both dump_buf and clk_cnt_buf. */
+ buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ dump_buf->metadata = metadata;
+ dump_buf->dump_buf = (u64 *)buf;
+ dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ if (!dump_buf)
+ return;
+
+ kfree(dump_buf->dump_buf);
+ memset(dump_buf, 0, sizeof(*dump_buf));
+}
+
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+ struct kbase_hwcnt_dump_buffer *buffers;
+ size_t buf_idx;
+ unsigned int order;
+ unsigned long addr;
+ size_t dump_buf_bytes;
+ size_t clk_cnt_buf_bytes;
+
+ if (!metadata || !dump_bufs)
+ return -EINVAL;
+
+ dump_buf_bytes = metadata->dump_buf_bytes;
+ clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
+
+ /* Allocate memory for the dump buffer struct array */
+ buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
+ if (!buffers)
+ return -ENOMEM;
+
+ /* Allocate pages for the actual dump buffers, as they tend to be fairly
+ * large.
+ */
+ order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
+ addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+
+ if (!addr) {
+ kfree(buffers);
+ return -ENOMEM;
+ }
+
+ dump_bufs->page_addr = addr;
+ dump_bufs->page_order = order;
+ dump_bufs->buf_cnt = n;
+ dump_bufs->bufs = buffers;
+
+ /* Set the buffer of each dump buf */
+ for (buf_idx = 0; buf_idx < n; buf_idx++) {
+ const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
+ const size_t clk_cnt_buf_offset =
+ (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
+
+ buffers[buf_idx].metadata = metadata;
+ buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
+ buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
+ }
+
+ return 0;
+}
+
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
+{
+ if (!dump_bufs)
+ return;
+
+ kfree(dump_bufs->bufs);
+ free_pages(dump_bufs->page_addr, dump_bufs->page_order);
+ memset(dump_bufs, 0, sizeof(*dump_bufs));
+}
+
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk;
+ size_t val_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+ }
+
+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
+}
+
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
+{
+ if (WARN_ON(!dst))
+ return;
+
+ memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
+
+ memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
+}
+
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+
+ if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
+ /* Block available, so only zero non-enabled values */
+ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
+ } else {
+ /* Block not available, so zero the entire thing */
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+ }
+ }
+}
+
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+ size_t clk;
+
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk;
+ const u64 *src_blk;
+ size_t val_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+ }
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
+ }
+}
+
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+ size_t clk;
+
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk =
+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+ /* Align upwards to include padding bytes */
+ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
+
+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
+ }
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ bool clk_enabled =
+ kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
+
+ dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
+ }
+}
+
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+ size_t clk;
+
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk;
+ const u64 *src_blk;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ continue;
+
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+
+ kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ }
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+ }
+}
+
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t grp, blk, blk_inst;
+ size_t clk;
+
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
+ WARN_ON(dst->metadata != src->metadata) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
+ {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ const u64 *src_blk =
+ kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ const u64 *blk_em =
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
+ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ /* Align upwards to include padding bytes */
+ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
+ hdr_cnt + ctr_cnt,
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt);
+
+ kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
+ ctr_cnt);
+ }
+
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk)
+ {
+ if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
+ dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
+ else
+ dst->clk_cnt_buf[clk] = 0;
+ }
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
new file mode 100644
index 0000000..5c5ada4
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
@@ -0,0 +1,1231 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter types.
+ * Contains structures for describing the physical layout of hardware counter
+ * dump buffers and enable maps within a system.
+ *
+ * Also contains helper functions for manipulation of these dump buffers and
+ * enable maps.
+ *
+ * Through use of these structures and functions, hardware counters can be
+ * enabled, copied, accumulated, and generally manipulated in a generic way,
+ * regardless of the physical counter dump layout.
+ *
+ * Terminology:
+ *
+ * Hardware Counter System:
+ * A collection of hardware counter groups, making a full hardware counter
+ * system.
+ * Hardware Counter Group:
+ * A group of Hardware Counter Blocks (e.g. a t62x might have more than one
+ * core group, so has one counter group per core group, where each group
+ * may have a different number and layout of counter blocks).
+ * Hardware Counter Block:
+ * A block of hardware counters (e.g. shader block, tiler block).
+ * Hardware Counter Block Instance:
+ * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have
+ * 4 shader block instances).
+ *
+ * Block Header:
+ * A header value inside a counter block. Headers don't count anything,
+ * so it is only valid to copy or zero them. Headers are always the first
+ * values in the block.
+ * Block Counter:
+ * A counter value inside a counter block. Counters can be zeroed, copied,
+ * or accumulated. Counters are always immediately after the headers in the
+ * block.
+ * Block Value:
+ * A catch-all term for block headers and block counters.
+ *
+ * Enable Map:
+ * An array of u64 bitfields, where each bit either enables exactly one
+ * block value, or is unused (padding).
+ * Dump Buffer:
+ * An array of u64 values, where each u64 corresponds either to one block
+ * value, or is unused (padding).
+ * Availability Mask:
+ * A bitfield, where each bit corresponds to whether a block instance is
+ * physically available (e.g. an MP3 GPU may have a sparse core mask of
+ * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the
+ * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this
+ * case, the availability mask might be 0b1011111 (the exact layout will
+ * depend on the specific hardware architecture), with the 3 extra early bits
+ * corresponding to other block instances in the hardware counter system).
+ * Metadata:
+ * Structure describing the physical layout of the enable map and dump buffers
+ * for a specific hardware counter system.
+ *
+ */
+
+#ifndef _KBASE_HWCNT_TYPES_H_
+#define _KBASE_HWCNT_TYPES_H_
+
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+/* Number of bytes in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64))
+
+/* Number of bits in each bitfield */
+#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE)
+
+/* Number of bytes for each counter value.
+ * Use 64-bit per counter in driver to avoid HW 32-bit register values
+ * overflow after a long time accumulation.
+ */
+#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64))
+
+/* Number of bits in an availability mask (i.e. max total number of block
+ * instances supported in a Hardware Counter System)
+ */
+#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+
+/* Minimum alignment of each block of hardware counters */
+#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
+
+/**
+ * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value.
+ * @value: The value to align upwards.
+ * @alignment: The alignment boundary.
+ *
+ * Return: Input value if already aligned to the specified boundary, or next
+ * (incrementing upwards) aligned value.
+ */
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+ (value + ((alignment - (value % alignment)) % alignment))
+
+/**
+ * struct kbase_hwcnt_block_description - Description of one or more identical,
+ * contiguous, Hardware Counter Blocks.
+ * @type: The arbitrary identifier used to identify the type of the block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt: The number of 64-bit Block Headers in the block.
+ * @ctr_cnt: The number of 64-bit Block Counters in the block.
+ */
+struct kbase_hwcnt_block_description {
+ u64 type;
+ size_t inst_cnt;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_group_description - Description of one or more identical,
+ * contiguous Hardware Counter Groups.
+ * @type: The arbitrary identifier used to identify the type of the group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the group.
+ * @blks: Non-NULL pointer to an array of blk_cnt block descriptions,
+ * describing each type of Hardware Counter Block in the group.
+ */
+struct kbase_hwcnt_group_description {
+ u64 type;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_description *blks;
+};
+
+/**
+ * struct kbase_hwcnt_description - Description of a Hardware Counter System.
+ * @grp_cnt: The number of Hardware Counter Groups.
+ * @grps: Non-NULL pointer to an array of grp_cnt group descriptions,
+ * describing each Hardware Counter Group in the system.
+ * @avail_mask: Flat Availability Mask for all block instances in the system.
+ * @clk_cnt: The number of clock domains in the system. The maximum is 64.
+ */
+struct kbase_hwcnt_description {
+ size_t grp_cnt;
+ const struct kbase_hwcnt_group_description *grps;
+ u64 avail_mask;
+ u8 clk_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout
+ * of a block in a Hardware Counter System's
+ * Dump Buffers and Enable Maps.
+ * @type: The arbitrary identifier used to identify the type of the
+ * block.
+ * @inst_cnt: The number of Instances of the block.
+ * @hdr_cnt: The number of 64-bit Block Headers in the block.
+ * @ctr_cnt: The number of 64-bit Block Counters in the block.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ * Enable Map bitfields of the Block Instances described by
+ * this metadata start.
+ * @enable_map_stride: Stride in u64s between the Enable Maps of each of the
+ * Block Instances described by this metadata.
+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the
+ * Dump Buffers of the Block Instances described by this
+ * metadata start.
+ * @dump_buf_stride: Stride in u64s between the Dump Buffers of each of the
+ * Block Instances described by this metadata.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ * the Availability Masks of the Block Instances described
+ * by this metadata start.
+ */
+struct kbase_hwcnt_block_metadata {
+ u64 type;
+ size_t inst_cnt;
+ size_t hdr_cnt;
+ size_t ctr_cnt;
+ size_t enable_map_index;
+ size_t enable_map_stride;
+ size_t dump_buf_index;
+ size_t dump_buf_stride;
+ size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
+ * of a group of blocks in a Hardware
+ * Counter System's Dump Buffers and Enable
+ * Maps.
+ * @type: The arbitrary identifier used to identify the type of the
+ * group.
+ * @blk_cnt: The number of types of Hardware Counter Block in the
+ * group.
+ * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata,
+ * describing the physical layout of each type of Hardware
+ * Counter Block in the group.
+ * @enable_map_index: Index in u64s into the parent's Enable Map where the
+ * Enable Maps of the blocks within the group described by
+ * this metadata start.
+ * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the
+ * Dump Buffers of the blocks within the group described by
+ * metadata start.
+ * @avail_mask_index: Index in bits into the parent's Availability Mask where
+ * the Availability Masks of the blocks within the group
+ * described by this metadata start.
+ */
+struct kbase_hwcnt_group_metadata {
+ u64 type;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_metadata *blk_metadata;
+ size_t enable_map_index;
+ size_t dump_buf_index;
+ size_t avail_mask_index;
+};
+
+/**
+ * struct kbase_hwcnt_metadata - Metadata describing the memory layout
+ * of Dump Buffers and Enable Maps within a
+ * Hardware Counter System.
+ * @grp_cnt: The number of Hardware Counter Groups.
+ * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata,
+ * describing the physical layout of each Hardware Counter
+ * Group in the system.
+ * @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
+ * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system.
+ * @avail_mask: The Availability Mask for the system.
+ * @clk_cnt: The number of clock domains in the system.
+ */
+struct kbase_hwcnt_metadata {
+ size_t grp_cnt;
+ const struct kbase_hwcnt_group_metadata *grp_metadata;
+ size_t enable_map_bytes;
+ size_t dump_buf_bytes;
+ u64 avail_mask;
+ u8 clk_cnt;
+};
+
+/**
+ * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64
+ * bitfields.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ * the layout of the enable map.
+ * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an
+ * array of u64 bitfields, each bit of which enables one hardware
+ * counter.
+ * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ * counter for a given clock domain.
+ */
+struct kbase_hwcnt_enable_map {
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 *hwcnt_enable_map;
+ u64 clk_enable_map;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer.
+ * @metadata: Non-NULL pointer to metadata used to identify, and to describe
+ * the layout of the Dump Buffer.
+ * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is
+ * metadata->dump_buf_bytes.
+ * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
+ * for each clock domain.
+ */
+struct kbase_hwcnt_dump_buffer {
+ const struct kbase_hwcnt_metadata *metadata;
+ u64 *dump_buf;
+ u64 *clk_cnt_buf;
+};
+
+/**
+ * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
+ * @page_addr: Address of allocated pages. A single allocation is used for all
+ * Dump Buffers in the array.
+ * @page_order: The allocation order of the pages, the order is on a logarithmic
+ * scale.
+ * @buf_cnt: The number of allocated Dump Buffers.
+ * @bufs: Non-NULL pointer to the array of Dump Buffers.
+ */
+struct kbase_hwcnt_dump_buffer_array {
+ unsigned long page_addr;
+ unsigned int page_order;
+ size_t buf_cnt;
+ struct kbase_hwcnt_dump_buffer *bufs;
+};
+
+/**
+ * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object
+ * from a description.
+ * @desc: Non-NULL pointer to a hardware counter description.
+ * @metadata: Non-NULL pointer to where created metadata will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
+ const struct kbase_hwcnt_metadata **metadata);
+
+/**
+ * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object.
+ * @metadata: Pointer to hardware counter metadata
+ */
+void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
+
+/**
+ * kbase_hwcnt_metadata_group_count() - Get the number of groups.
+ * @metadata: Non-NULL pointer to metadata.
+ *
+ * Return: Number of hardware counter groups described by metadata.
+ */
+static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
+{
+ if (WARN_ON(!metadata))
+ return 0;
+
+ return metadata->grp_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ *
+ * Return: Type of the group grp.
+ */
+static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].type;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ *
+ * Return: Number of blocks in group grp.
+ */
+static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Type of the block blk in group grp.
+ */
+static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].type;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
+ * a block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of instances of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
+ * headers.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counter headers in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of counters in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: enable map stride in each instance of block blk in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata,
+ size_t grp, size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_values_count() - Get the number of values.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: Number of headers plus counters in each instance of block blk
+ * in group grp.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) +
+ kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+}
+
+/**
+ * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
+ * the metadata.
+ * @md: Non-NULL pointer to metadata.
+ * @grp: size_t variable used as group iterator.
+ * @blk: size_t variable used as block iterator.
+ * @blk_inst: size_t variable used as block instance iterator.
+ *
+ * Iteration order is group, then block, then block instance (i.e. linearly
+ * through memory).
+ */
+#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
+ for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
+ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
+ for ((blk_inst) = 0; \
+ (blk_inst) < \
+ kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \
+ (blk_inst)++)
+
+/**
+ * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
+ * mask corresponding to the block.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ *
+ * Return: The bit index into the avail mask for the block.
+ */
+static inline size_t
+kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk)
+{
+ if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
+ WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ return 0;
+
+ return metadata->grp_metadata[grp].avail_mask_index +
+ metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+}
+
+/**
+ * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
+ * available.
+ * @metadata: Non-NULL pointer to metadata.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if the block instance is available, else false.
+ */
+static inline bool
+kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp,
+ size_t blk, size_t blk_inst)
+{
+ size_t bit;
+ u64 mask;
+
+ if (WARN_ON(!metadata))
+ return false;
+
+ bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst;
+ mask = 1ull << bit;
+
+ return (metadata->avail_mask & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_alloc() - Allocate an enable map.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @enable_map: Non-NULL pointer to enable map to be initialised. Will be
+ * initialised to all zeroes (i.e. all counters disabled).
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_free() - Free an enable map.
+ * @enable_map: Enable map to be freed.
+ *
+ * Can be safely called on an all-zeroed enable map structure, or on an already
+ * freed enable map.
+ */
+void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
+
+/**
+ * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
+ * instance's enable map.
+ * @map: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u64* to the bitfield(s) used as the enable map for the
+ * block instance.
+ */
+static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map))
+ return NULL;
+
+ if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) ||
+ WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) ||
+ WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+ return map->hwcnt_enable_map;
+
+ return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index +
+ map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index +
+ (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst);
+}
+
+/**
+ * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ * to have at minimum one bit per value.
+ * @val_cnt: Number of values.
+ *
+ * Return: Number of required bitfields.
+ */
+static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
+{
+ return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * @dst: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ size_t val_cnt;
+ size_t bitfld_cnt;
+ u64 *const block_enable_map =
+ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+
+ if (WARN_ON(!dst))
+ return;
+
+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+ memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
+}
+
+/**
+ * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map.
+ * @dst: Non-NULL pointer to enable map to zero.
+ */
+static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst)
+{
+ if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+ return;
+
+ if (dst->hwcnt_enable_map != NULL)
+ memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes);
+
+ dst->clk_enable_map = 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
+ * @dst: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ size_t val_cnt;
+ size_t bitfld_cnt;
+ u64 *const block_enable_map =
+ kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+ size_t bitfld_idx;
+
+ if (WARN_ON(!dst))
+ return;
+
+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ u64 block_enable_map_mask = U64_MAX;
+
+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+ block_enable_map_mask = (1ull << remaining_values) - 1;
+
+ block_enable_map[bitfld_idx] = block_enable_map_mask;
+ }
+}
+
+/**
+ * kbase_hwcnt_enable_map_enable_all() - Enable all values in an enable
+ * map.
+ * @dst: Non-NULL pointer to enable map.
+ */
+static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst)
+{
+ size_t grp, blk, blk_inst;
+
+ if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
+ return;
+
+ kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
+ kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst);
+
+ dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
+}
+
+/**
+ * kbase_hwcnt_enable_map_copy() - Copy an enable map to another.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+ WARN_ON(dst->metadata != src->metadata))
+ return;
+
+ if (dst->hwcnt_enable_map != NULL) {
+ if (WARN_ON(!src->hwcnt_enable_map))
+ return;
+
+ memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map,
+ dst->metadata->enable_map_bytes);
+ }
+
+ dst->clk_enable_map = src->clk_enable_map;
+}
+
+/**
+ * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst.
+ * @dst: Non-NULL pointer to destination enable map.
+ * @src: Non-NULL pointer to source enable map.
+ *
+ * The dst and src MUST have been created from the same metadata.
+ */
+static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_map *src)
+{
+ if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) ||
+ WARN_ON(dst->metadata != src->metadata))
+ return;
+
+ if (dst->hwcnt_enable_map != NULL) {
+ size_t i;
+ size_t const bitfld_count =
+ dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES;
+
+ if (WARN_ON(!src->hwcnt_enable_map))
+ return;
+
+ for (i = 0; i < bitfld_count; i++)
+ dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i];
+ }
+
+ dst->clk_enable_map |= src->clk_enable_map;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
+ * instance are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: true if any values in the block are enabled, else false.
+ */
+static inline bool
+kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp,
+ size_t blk, size_t blk_inst)
+{
+ bool any_enabled = false;
+ size_t val_cnt;
+ size_t bitfld_cnt;
+ const u64 *const block_enable_map =
+ kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+ size_t bitfld_idx;
+
+ if (WARN_ON(!enable_map))
+ return false;
+
+ val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk);
+ bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
+
+ for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
+ const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS);
+ u64 block_enable_map_mask = U64_MAX;
+
+ if (remaining_values < KBASE_HWCNT_BITFIELD_BITS)
+ block_enable_map_mask = (1ull << remaining_values) - 1;
+
+ any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask);
+ }
+
+ return any_enabled;
+}
+
+/**
+ * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled.
+ * @enable_map: Non-NULL pointer to enable map.
+ *
+ * Return: true if any values are enabled, else false.
+ */
+static inline bool
+kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map)
+{
+ size_t grp, blk, blk_inst;
+ u64 clk_enable_map_mask;
+
+ if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata))
+ return false;
+
+ clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1;
+
+ if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask))
+ return true;
+
+ kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst)
+ {
+ if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block
+ * instance is enabled.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to check in the block instance.
+ *
+ * Return: true if the value was enabled, else false.
+ */
+static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ return (bitfld[idx] & mask) != 0;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block
+ * instance.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to enable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ bitfld[idx] |= mask;
+}
+
+/**
+ * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block
+ * instance.
+ * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_idx: Index of the value to disable in the block instance.
+ */
+static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx)
+{
+ const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS;
+ const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS;
+ const u64 mask = 1ull << bit;
+
+ bitfld[idx] &= ~mask;
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
+ * initialised to undefined values, so must be used as a copy dest,
+ * or cleared before use.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_free() - Free a dump buffer.
+ * @dump_buf: Dump buffer to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer structure, or on an already
+ * freed dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
+ * @metadata: Non-NULL pointer to metadata describing the system.
+ * @n: Number of dump buffers to allocate
+ * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised.
+ *
+ * A single zeroed contiguous page allocation will be used for all of the
+ * buffers inside the array, where:
+ * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
+ struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
+ * @dump_bufs: Dump buffer array to be freed.
+ *
+ * Can be safely called on an all-zeroed dump buffer array structure, or on an
+ * already freed dump buffer array.
+ */
+void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
+ * instance's dump buffer.
+ * @buf: Non-NULL pointer to dump buffer.
+ * @grp: Index of the group in the metadata.
+ * @blk: Index of the block in the group.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: u64* to the dump buffer for the block instance.
+ */
+static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf,
+ size_t grp, size_t blk, size_t blk_inst)
+{
+ if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
+ return NULL;
+
+ if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) ||
+ WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) ||
+ WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+ return buf->dump_buf;
+
+ return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
+ buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
+ (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst.
+ * After the operation, all non-enabled values
+ * will be undefined.
+ * @dst: Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt)
+{
+ if (WARN_ON(!dst_blk))
+ return;
+
+ memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst.
+ * After the operation, all values
+ * (including padding bytes) will be
+ * zero.
+ * Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dump buffer.
+ */
+void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst);
+
+/**
+ * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in
+ * dst (including padding bytes and
+ * unavailable blocks).
+ * After the operation, all enabled
+ * values will be unchanged.
+ * @dst: Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled
+ * values in a block.
+ * After the operation, all
+ * enabled values will be
+ * unchanged.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em,
+ size_t val_cnt)
+{
+ size_t val;
+
+ if (WARN_ON(!dst_blk))
+ return;
+
+ for (val = 0; val < val_cnt; val++) {
+ if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val))
+ dst_blk[val] = 0;
+ }
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst.
+ * After the operation, all non-enabled values
+ * will be undefined.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @val_cnt: Number of values in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk,
+ size_t val_cnt)
+{
+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+ return;
+
+ /* Copy all the counters in the block instance.
+ * Values of non-enabled counters are undefined.
+ */
+ memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES));
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to
+ * dst.
+ * After the operation, all non-enabled
+ * values (including padding bytes) will
+ * be zero.
+ * Slower than the non-strict variant.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values
+ * from src to dst.
+ * After the operation, all
+ * non-enabled values will be
+ * zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @val_cnt: Number of values in the block.
+ *
+ * After the copy, any disabled values in dst will be zero.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em, size_t val_cnt)
+{
+ size_t val;
+
+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+ return;
+
+ for (val = 0; val < val_cnt; val++) {
+ bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
+
+ dst_blk[val] = val_enabled ? src_blk[val] : 0;
+ }
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and
+ * accumulate all enabled counters from
+ * src to dst.
+ * After the operation, all non-enabled
+ * values will be undefined.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and
+ * accumulate all block counters
+ * from src to dst.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk,
+ size_t hdr_cnt, size_t ctr_cnt)
+{
+ size_t ctr;
+
+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+ return;
+
+ /* Copy all the headers in the block instance.
+ * Values of non-enabled headers are undefined.
+ */
+ memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES);
+
+ /* Accumulate all the counters in the block instance.
+ * Values of non-enabled counters are undefined.
+ */
+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++)
+ dst_blk[ctr] += src_blk[ctr];
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and
+ * accumulate all enabled counters
+ * from src to dst.
+ * After the operation, all
+ * non-enabled values (including
+ * padding bytes) will be zero.
+ * Slower than the non-strict
+ * variant.
+ * @dst: Non-NULL pointer to dst dump buffer.
+ * @src: Non-NULL pointer to src dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ *
+ * The dst, src, and dst_enable_map MUST have been created from the same
+ * metadata.
+ */
+void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_dump_buffer *src,
+ const struct kbase_hwcnt_enable_map *dst_enable_map);
+
+/**
+ * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block
+ * headers and accumulate
+ * all block counters from
+ * src to dst.
+ * After the operation, all
+ * non-enabled values will
+ * be zero.
+ * @dst_blk: Non-NULL pointer to dst block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @src_blk: Non-NULL pointer to src block obtained from a call to
+ * kbase_hwcnt_dump_buffer_block_instance.
+ * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
+ * kbase_hwcnt_enable_map_block_instance.
+ * @hdr_cnt: Number of headers in the block.
+ * @ctr_cnt: Number of counters in the block.
+ */
+static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk,
+ const u64 *blk_em,
+ size_t hdr_cnt, size_t ctr_cnt)
+{
+ size_t ctr;
+
+ if (WARN_ON(!dst_blk) || WARN_ON(!src_blk))
+ return;
+
+ kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt);
+
+ for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) {
+ bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr);
+
+ if (ctr_enabled)
+ dst_blk[ctr] += src_blk[ctr];
+ else
+ dst_blk[ctr] = 0;
+ }
+}
+
+/**
+ * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the
+ * metadata.
+ * @md: Non-NULL pointer to metadata.
+ * @clk: size_t variable used as clock iterator.
+ */
+#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++)
+
+/**
+ * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled
+ * in clk_enable_map.
+ * @clk_enable_map: An enable map for clock domains.
+ * @index: Index of the enable map for clock domain.
+ *
+ * Return: true if the index of the clock domain is enabled, else false.
+ */
+static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index)
+{
+ if (WARN_ON(index >= 64))
+ return false;
+ if (clk_enable_map & (1ull << index))
+ return true;
+ return false;
+}
+
+#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
new file mode 100644
index 0000000..d618764
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
+#include "hwcnt/mali_kbase_hwcnt_accumulator.h"
+#include "hwcnt/mali_kbase_hwcnt_context.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
+#include <linux/mutex.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure.
+ * @hctx: Hardware counter context being virtualized.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ * clients where a new accumulator dump will not be
+ * performed, and instead accumulated values will be used.
+ * If 0, rate limiting is disabled.
+ * @metadata: Hardware counter metadata.
+ * @lock: Lock acquired at all entrypoints, to protect mutable
+ * state.
+ * @client_count: Current number of virtualizer clients.
+ * @clients: List of virtualizer clients.
+ * @accum: Hardware counter accumulator. NULL if no clients.
+ * @scratch_map: Enable map used as scratch space during counter changes.
+ * @scratch_buf: Dump buffer used as scratch space during dumps.
+ * @ts_last_dump_ns: End time of most recent dump across all clients.
+ */
+struct kbase_hwcnt_virtualizer {
+ struct kbase_hwcnt_context *hctx;
+ u64 dump_threshold_ns;
+ const struct kbase_hwcnt_metadata *metadata;
+ struct mutex lock;
+ size_t client_count;
+ struct list_head clients;
+ struct kbase_hwcnt_accumulator *accum;
+ struct kbase_hwcnt_enable_map scratch_map;
+ struct kbase_hwcnt_dump_buffer scratch_buf;
+ u64 ts_last_dump_ns;
+};
+
+/**
+ * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure.
+ * @node: List node used for virtualizer client list.
+ * @hvirt: Hardware counter virtualizer.
+ * @enable_map: Enable map with client's current enabled counters.
+ * @accum_buf: Dump buffer with client's current accumulated counters.
+ * @has_accum: True if accum_buf contains any accumulated counters.
+ * @ts_start_ns: Counter collection start time of current dump.
+ */
+struct kbase_hwcnt_virtualizer_client {
+ struct list_head node;
+ struct kbase_hwcnt_virtualizer *hvirt;
+ struct kbase_hwcnt_enable_map enable_map;
+ struct kbase_hwcnt_dump_buffer accum_buf;
+ bool has_accum;
+ u64 ts_start_ns;
+};
+
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt)
+{
+ if (!hvirt)
+ return NULL;
+
+ return hvirt->metadata;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory.
+ * @hvcli: Pointer to virtualizer client.
+ *
+ * Will safely free a client in any partial state of construction.
+ */
+static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ if (!hvcli)
+ return;
+
+ kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf);
+ kbase_hwcnt_enable_map_free(&hvcli->enable_map);
+ kfree(hvcli);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer
+ * client.
+ * @metadata: Non-NULL pointer to counter metadata.
+ * @out_hvcli: Non-NULL pointer to where created client will be stored on
+ * success.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *hvcli = NULL;
+
+ WARN_ON(!metadata);
+ WARN_ON(!out_hvcli);
+
+ hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL);
+ if (!hvcli)
+ return -ENOMEM;
+
+ errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf);
+ if (errcode)
+ goto error;
+
+ *out_hvcli = hvcli;
+ return 0;
+error:
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a
+ * client's accumulation buffer.
+ * @hvcli: Non-NULL pointer to virtualizer client.
+ * @dump_buf: Non-NULL pointer to dump buffer to accumulate from.
+ */
+static void
+kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ WARN_ON(!hvcli);
+ WARN_ON(!dump_buf);
+ lockdep_assert_held(&hvcli->hvirt->lock);
+
+ if (hvcli->has_accum) {
+ /* If already some accumulation, accumulate */
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ } else {
+ /* If no accumulation, copy */
+ kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map);
+ }
+ hvcli->has_accum = true;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter
+ * accumulator after final client
+ * removal.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Will safely terminate the accumulator in any partial state of initialisation.
+ */
+static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt)
+{
+ WARN_ON(!hvirt);
+ lockdep_assert_held(&hvirt->lock);
+ WARN_ON(hvirt->client_count);
+
+ kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf);
+ kbase_hwcnt_enable_map_free(&hvirt->scratch_map);
+ kbase_hwcnt_accumulator_release(hvirt->accum);
+ hvirt->accum = NULL;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter
+ * accumulator before first client
+ * addition.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt)
+{
+ int errcode;
+
+ WARN_ON(!hvirt);
+ lockdep_assert_held(&hvirt->lock);
+ WARN_ON(hvirt->client_count);
+ WARN_ON(hvirt->accum);
+
+ errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map);
+ if (errcode)
+ goto error;
+
+ errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf);
+ if (errcode)
+ goto error;
+
+ return 0;
+error:
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the
+ * virtualizer.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client to add.
+ * @enable_map: Non-NULL pointer to client's initial enable map.
+ *
+ * Return: 0 on success, else error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map)
+{
+ int errcode = 0;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!enable_map);
+ lockdep_assert_held(&hvirt->lock);
+
+ if (hvirt->client_count == 0)
+ /* First client added, so initialise the accumulator */
+ errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt);
+ if (errcode)
+ return errcode;
+
+ hvirt->client_count += 1;
+
+ if (hvirt->client_count == 1) {
+ /* First client, so just pass the enable map onwards as is */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+ &ts_start_ns, &ts_end_ns, NULL);
+ } else {
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
+ /* Accumulate into only existing clients' accumulation bufs */
+ if (!errcode)
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos,
+ &hvirt->scratch_buf);
+ }
+ if (errcode)
+ goto error;
+
+ list_add(&hvcli->node, &hvirt->clients);
+ hvcli->hvirt = hvirt;
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+ hvcli->has_accum = false;
+ hvcli->ts_start_ns = ts_end_ns;
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = ts_end_ns;
+
+ return 0;
+error:
+ hvirt->client_count -= 1;
+ if (hvirt->client_count == 0)
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the
+ * virtualizer.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client to remove.
+ */
+static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ int errcode = 0;
+ u64 ts_start_ns;
+ u64 ts_end_ns;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ lockdep_assert_held(&hvirt->lock);
+
+ list_del(&hvcli->node);
+ hvirt->client_count -= 1;
+
+ if (hvirt->client_count == 0) {
+ /* Last client removed, so terminate the accumulator */
+ kbasep_hwcnt_virtualizer_accumulator_term(hvirt);
+ } else {
+ struct kbase_hwcnt_virtualizer_client *pos;
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ &ts_start_ns, &ts_end_ns,
+ &hvirt->scratch_buf);
+ /* Accumulate into remaining clients' accumulation bufs */
+ if (!errcode) {
+ list_for_each_entry (pos, &hvirt->clients, node)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos,
+ &hvirt->scratch_buf);
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = ts_end_ns;
+ }
+ }
+ WARN_ON(errcode);
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ * currently enabled counters,
+ * and enable a new set of
+ * counters that will be used for
+ * subsequent dumps.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @enable_map: Non-NULL pointer to the new counter enable map for the client.
+ * Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_set_counters(
+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!enable_map);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(enable_map->metadata != hvirt->metadata);
+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+ lockdep_assert_held(&hvirt->lock);
+
+ /* Make the scratch enable map the union of all enable maps */
+ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
+ list_for_each_entry (pos, &hvirt->clients, node)
+ /* Ignore the enable map of the selected client */
+ if (pos != hvcli)
+ kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
+
+ /* Set the counters with the new union enable map */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
+ ts_start_ns, ts_end_ns, &hvirt->scratch_buf);
+ if (errcode)
+ return errcode;
+
+ /* Accumulate into all accumulation bufs except the selected client's */
+ list_for_each_entry (pos, &hvirt->clients, node)
+ if (pos != hvcli)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
+
+ /* Finally, write into the dump buf */
+ if (dump_buf) {
+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+ if (hvcli->has_accum) {
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+ &hvcli->enable_map);
+ src = &hvcli->accum_buf;
+ }
+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+ }
+ hvcli->has_accum = false;
+
+ /* Update the selected client's enable map */
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = *ts_end_ns;
+
+ return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer *hvirt;
+
+ if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hvirt = hvcli->hvirt;
+
+ if ((enable_map->metadata != hvirt->metadata) ||
+ (dump_buf && (dump_buf->metadata != hvirt->metadata)))
+ return -EINVAL;
+
+ mutex_lock(&hvirt->lock);
+
+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+ /*
+ * If there's only one client with no prior accumulation, we can
+ * completely skip the virtualize and just pass through the call
+ * to the accumulator, saving a fair few copies and
+ * accumulations.
+ */
+ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map,
+ ts_start_ns, ts_end_ns, dump_buf);
+
+ if (!errcode) {
+ /* Update the selected client's enable map */
+ kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map);
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = *ts_end_ns;
+ }
+ } else {
+ /* Otherwise, do the full virtualize */
+ errcode = kbasep_hwcnt_virtualizer_client_set_counters(
+ hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf);
+ }
+
+ mutex_unlock(&hvirt->lock);
+
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ * currently enabled counters.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt,
+ struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *pos;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+ lockdep_assert_held(&hvirt->lock);
+
+ /* Perform the dump */
+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+ &hvirt->scratch_buf);
+ if (errcode)
+ return errcode;
+
+ /* Accumulate into all accumulation bufs except the selected client's */
+ list_for_each_entry (pos, &hvirt->clients, node)
+ if (pos != hvcli)
+ kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
+
+ /* Finally, write into the dump buf */
+ if (dump_buf) {
+ const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf;
+
+ if (hvcli->has_accum) {
+ kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src,
+ &hvcli->enable_map);
+ src = &hvcli->accum_buf;
+ }
+ kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map);
+ }
+ hvcli->has_accum = false;
+
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = *ts_end_ns;
+
+ return errcode;
+}
+
+/**
+ * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the
+ * client's currently enabled counters
+ * if it hasn't been rate limited,
+ * otherwise return the client's most
+ * recent accumulation.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+static int kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+ struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ bool rate_limited = true;
+
+ WARN_ON(!hvirt);
+ WARN_ON(!hvcli);
+ WARN_ON(!ts_start_ns);
+ WARN_ON(!ts_end_ns);
+ WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata));
+ lockdep_assert_held(&hvirt->lock);
+
+ if (hvirt->dump_threshold_ns == 0) {
+ /* Threshold == 0, so rate limiting disabled */
+ rate_limited = false;
+ } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) {
+ /* Last dump was performed by this client, and dumps from an
+ * individual client are never rate limited
+ */
+ rate_limited = false;
+ } else {
+ const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum);
+ const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns;
+
+ /* Dump period equals or exceeds the threshold */
+ if (time_since_last_dump_ns >= hvirt->dump_threshold_ns)
+ rate_limited = false;
+ }
+
+ if (!rate_limited)
+ return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns,
+ dump_buf);
+
+ /* If we've gotten this far, the client must have something accumulated
+ * otherwise it is a logic error
+ */
+ WARN_ON(!hvcli->has_accum);
+
+ if (dump_buf)
+ kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map);
+ hvcli->has_accum = false;
+
+ *ts_start_ns = hvcli->ts_start_ns;
+ *ts_end_ns = hvirt->ts_last_dump_ns;
+ hvcli->ts_start_ns = hvirt->ts_last_dump_ns;
+
+ return 0;
+}
+
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer *hvirt;
+
+ if (!hvcli || !ts_start_ns || !ts_end_ns)
+ return -EINVAL;
+
+ hvirt = hvcli->hvirt;
+
+ if (dump_buf && (dump_buf->metadata != hvirt->metadata))
+ return -EINVAL;
+
+ mutex_lock(&hvirt->lock);
+
+ if ((hvirt->client_count == 1) && (!hvcli->has_accum)) {
+ /*
+ * If there's only one client with no prior accumulation, we can
+ * completely skip the virtualize and just pass through the call
+ * to the accumulator, saving a fair few copies and
+ * accumulations.
+ */
+ errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns,
+ dump_buf);
+
+ if (!errcode) {
+ /* Fix up the timestamps */
+ *ts_start_ns = hvcli->ts_start_ns;
+ hvcli->ts_start_ns = *ts_end_ns;
+
+ /* Store the most recent dump time for rate limiting */
+ hvirt->ts_last_dump_ns = *ts_end_ns;
+ }
+ } else {
+ /* Otherwise, do the full virtualize */
+ errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited(
+ hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf);
+ }
+
+ mutex_unlock(&hvirt->lock);
+
+ return errcode;
+}
+
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli)
+{
+ int errcode;
+ struct kbase_hwcnt_virtualizer_client *hvcli;
+
+ if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata))
+ return -EINVAL;
+
+ errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli);
+ if (errcode)
+ return errcode;
+
+ mutex_lock(&hvirt->lock);
+
+ errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map);
+
+ mutex_unlock(&hvirt->lock);
+
+ if (errcode) {
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+ return errcode;
+ }
+
+ *out_hvcli = hvcli;
+ return 0;
+}
+
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli)
+{
+ if (!hvcli)
+ return;
+
+ mutex_lock(&hvcli->hvirt->lock);
+
+ kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli);
+
+ mutex_unlock(&hvcli->hvirt->lock);
+
+ kbasep_hwcnt_virtualizer_client_free(hvcli);
+}
+
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+ struct kbase_hwcnt_virtualizer **out_hvirt)
+{
+ struct kbase_hwcnt_virtualizer *virt;
+ const struct kbase_hwcnt_metadata *metadata;
+
+ if (!hctx || !out_hvirt)
+ return -EINVAL;
+
+ metadata = kbase_hwcnt_context_metadata(hctx);
+ if (!metadata)
+ return -EINVAL;
+
+ virt = kzalloc(sizeof(*virt), GFP_KERNEL);
+ if (!virt)
+ return -ENOMEM;
+
+ virt->hctx = hctx;
+ virt->dump_threshold_ns = dump_threshold_ns;
+ virt->metadata = metadata;
+
+ mutex_init(&virt->lock);
+ INIT_LIST_HEAD(&virt->clients);
+
+ *out_hvirt = virt;
+ return 0;
+}
+
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt)
+{
+ if (!hvirt)
+ return;
+
+ /* Non-zero client count implies client leak */
+ if (WARN_ON(hvirt->client_count != 0)) {
+ struct kbase_hwcnt_virtualizer_client *pos, *n;
+
+ list_for_each_entry_safe (pos, n, &hvirt->clients, node)
+ kbase_hwcnt_virtualizer_client_destroy(pos);
+ }
+
+ WARN_ON(hvirt->client_count != 0);
+ WARN_ON(hvirt->accum);
+
+ kfree(hvirt);
+}
+
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+ struct work_struct *work)
+{
+ if (WARN_ON(!hvirt) || WARN_ON(!work))
+ return false;
+
+ return kbase_hwcnt_context_queue_work(hvirt->hctx, work);
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
new file mode 100644
index 0000000..485ba74
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Hardware counter virtualizer API.
+ *
+ * Virtualizes a hardware counter context, so multiple clients can access
+ * a single hardware counter resource as though each was the exclusive user.
+ */
+
+#ifndef _KBASE_HWCNT_VIRTUALIZER_H_
+#define _KBASE_HWCNT_VIRTUALIZER_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct kbase_hwcnt_context;
+struct kbase_hwcnt_virtualizer;
+struct kbase_hwcnt_virtualizer_client;
+struct kbase_hwcnt_enable_map;
+struct kbase_hwcnt_dump_buffer;
+
+/**
+ * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer.
+ * @hctx: Non-NULL pointer to the hardware counter context to
+ * virtualize.
+ * @dump_threshold_ns: Minimum threshold period for dumps between different
+ * clients where a new accumulator dump will not be
+ * performed, and instead accumulated values will be used.
+ * If 0, rate limiting will be disabled.
+ * @out_hvirt: Non-NULL pointer to where the pointer to the created
+ * virtualizer will be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns,
+ struct kbase_hwcnt_virtualizer **out_hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer.
+ * @hvirt: Pointer to virtualizer to be terminated.
+ */
+void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by
+ * the virtualizer, so related counter data
+ * structures can be created.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ *
+ * Return: Non-NULL pointer to metadata, or NULL on error.
+ */
+const struct kbase_hwcnt_metadata *
+kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt);
+
+/**
+ * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @enable_map: Non-NULL pointer to the enable map for the client. Must have the
+ * same metadata as the virtualizer.
+ * @out_hvcli: Non-NULL pointer to where the pointer to the created client will
+ * be stored on success.
+ *
+ * Return: 0 on success, else error code.
+ */
+int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ struct kbase_hwcnt_virtualizer_client **out_hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client.
+ * @hvcli: Pointer to the hardware counter client.
+ */
+void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli);
+
+/**
+ * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's
+ * currently enabled counters, and
+ * enable a new set of counters
+ * that will be used for
+ * subsequent dumps.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @enable_map: Non-NULL pointer to the new counter enable map for the client.
+ * Must have the same metadata as the virtualizer.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli,
+ const struct kbase_hwcnt_enable_map *enable_map,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's
+ * currently enabled counters.
+ * @hvcli: Non-NULL pointer to the virtualizer client.
+ * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will
+ * be written out to on success.
+ * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will
+ * be written out to on success.
+ * @dump_buf: Pointer to the buffer where the dump will be written out to on
+ * success. If non-NULL, must have the same metadata as the
+ * accumulator. If NULL, the dump will be discarded.
+ *
+ * Return: 0 on success or error code.
+ */
+int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli,
+ u64 *ts_start_ns, u64 *ts_end_ns,
+ struct kbase_hwcnt_dump_buffer *dump_buf);
+
+/**
+ * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async
+ * work on a workqueue specialized for
+ * hardware counters.
+ * @hvirt: Non-NULL pointer to the hardware counter virtualizer.
+ * @work: Non-NULL pointer to work to queue.
+ *
+ * Return: false if work was already on a queue, true otherwise.
+ *
+ * This is a convenience function that directly calls the underlying
+ * kbase_hwcnt_context's kbase_hwcnt_context_queue_work.
+ */
+bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt,
+ struct work_struct *work);
+
+#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
new file mode 100644
index 0000000..501c008
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Virtual interface for hardware counter watchdog.
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_H_
+
+#include <linux/types.h>
+
+/*
+ * Opaque structure of information used to create a watchdog timer interface.
+ */
+struct kbase_hwcnt_watchdog_info;
+
+/**
+ * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done
+ *
+ * @user_data: Pointer to the callback user data.
+ */
+typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ * @period_ms: Period in milliseconds of the watchdog timer
+ * @callback: Non-NULL pointer to a watchdog callback function
+ * @user_data: Pointer to the user data, used when watchdog timer callback is called
+ *
+ * Return: 0 if the watchdog timer enabled successfully, error code otherwise.
+ */
+typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer,
+ u32 period_ms,
+ kbase_hwcnt_watchdog_callback_fn *callback,
+ void *user_data);
+
+/**
+ * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ */
+typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer);
+
+/**
+ * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout
+ *
+ * @timer: Non-NULL pointer to a watchdog timer interface context
+ * @delay_ms: Watchdog timer expiration in milliseconds
+ */
+typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer,
+ u32 delay_ms);
+
+/**
+ * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface.
+ *
+ * @timer: Immutable watchdog timer info
+ * @enable: Function ptr to enable watchdog
+ * @disable: Function ptr to disable watchdog
+ * @modify: Function ptr to modify watchdog
+ */
+struct kbase_hwcnt_watchdog_interface {
+ const struct kbase_hwcnt_watchdog_info *timer;
+ kbase_hwcnt_watchdog_enable_fn *enable;
+ kbase_hwcnt_watchdog_disable_fn *disable;
+ kbase_hwcnt_watchdog_modify_fn *modify;
+};
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
new file mode 100644
index 0000000..4caa832
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include "mali_kbase.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h"
+
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+
+/**
+ * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog
+ * interface.
+ *
+ * @workq: Single threaded work queue in which to execute callbacks.
+ * @dwork: Worker to execute callback function.
+ * @timer_enabled: True if watchdog timer enabled, otherwise false
+ * @callback: Watchdog callback function
+ * @user_data: Pointer to user data passed as argument to the callback
+ * function
+ */
+struct kbase_hwcnt_watchdog_if_timer_info {
+ struct workqueue_struct *workq;
+ struct delayed_work dwork;
+ bool timer_enabled;
+ kbase_hwcnt_watchdog_callback_fn *callback;
+ void *user_data;
+};
+
+/**
+ * kbasep_hwcnt_watchdog_callback() - Watchdog callback
+ *
+ * @work: Work structure
+ *
+ * Function to be called in a work queue after watchdog timer has expired.
+ */
+static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const info =
+ container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work);
+
+ if (info->callback)
+ info->callback(info->user_data);
+}
+
+static int kbasep_hwcnt_watchdog_if_timer_enable(
+ const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms,
+ kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+ if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled))
+ return -EINVAL;
+
+ timer_info->callback = callback;
+ timer_info->user_data = user_data;
+
+ queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms));
+ timer_info->timer_enabled = true;
+
+ return 0;
+}
+
+static void
+kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+ if (WARN_ON(!timer))
+ return;
+
+ if (!timer_info->timer_enabled)
+ return;
+
+ cancel_delayed_work_sync(&timer_info->dwork);
+ timer_info->timer_enabled = false;
+}
+
+static void
+kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer,
+ u32 const delay_ms)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer;
+
+ if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled))
+ return;
+
+ mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms));
+}
+
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+ if (WARN_ON(!watchdog_if))
+ return;
+
+ timer_info = (void *)watchdog_if->timer;
+
+ if (WARN_ON(!timer_info))
+ return;
+
+ destroy_workqueue(timer_info->workq);
+ kfree(timer_info);
+
+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+ .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL
+ };
+}
+
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if)
+{
+ struct kbase_hwcnt_watchdog_if_timer_info *timer_info;
+
+ if (WARN_ON(!watchdog_if))
+ return -EINVAL;
+
+ timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL);
+ if (!timer_info)
+ return -ENOMEM;
+
+ *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false };
+
+ INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback);
+
+ *watchdog_if = (struct kbase_hwcnt_watchdog_interface){
+ .timer = (void *)timer_info,
+ .enable = kbasep_hwcnt_watchdog_if_timer_enable,
+ .disable = kbasep_hwcnt_watchdog_if_timer_disable,
+ .modify = kbasep_hwcnt_watchdog_if_timer_modify,
+ };
+
+ timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (timer_info->workq)
+ return 0;
+
+ kfree(timer_info);
+ return -ENOMEM;
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
new file mode 100644
index 0000000..a545ad3
--- /dev/null
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/*
+ * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend
+ */
+
+#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_
+
+struct kbase_hwcnt_watchdog_interface;
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend.
+ *
+ * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success
+ *
+ * Return: 0 on success, error otherwise.
+ */
+int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+/**
+ * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter
+ * backend.
+ *
+ * @watchdog_if: Pointer to watchdog interface to destroy
+ */
+void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if);
+
+#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */