diff options
Diffstat (limited to 'mali_kbase/hwcnt')
25 files changed, 11023 insertions, 0 deletions
diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild new file mode 100644 index 0000000..8c8775f --- /dev/null +++ b/mali_kbase/hwcnt/Kbuild @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# (C) COPYRIGHT 2022 ARM Limited. All rights reserved. +# +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# + +mali_kbase-y += \ + hwcnt/mali_kbase_hwcnt.o \ + hwcnt/mali_kbase_hwcnt_gpu.o \ + hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ + hwcnt/mali_kbase_hwcnt_types.o \ + hwcnt/mali_kbase_hwcnt_virtualizer.o \ + hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o + +ifeq ($(CONFIG_MALI_CSF_SUPPORT),y) + mali_kbase-y += \ + hwcnt/backend/mali_kbase_hwcnt_backend_csf.o \ + hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.o +else + mali_kbase-y += \ + hwcnt/backend/mali_kbase_hwcnt_backend_jm.o \ + hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.o +endif diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h new file mode 100644 index 0000000..6cfa6f5 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Virtual interface for hardware counter backends. + */ + +#ifndef _KBASE_HWCNT_BACKEND_H_ +#define _KBASE_HWCNT_BACKEND_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* + * struct kbase_hwcnt_backend_info - Opaque pointer to information used to + * create an instance of a hardware counter + * backend. + */ +struct kbase_hwcnt_backend_info; + +/* + * struct kbase_hwcnt_backend - Opaque pointer to a hardware counter + * backend, used to perform dumps. + */ +struct kbase_hwcnt_backend; + +/* + * typedef kbase_hwcnt_backend_metadata_fn - Get the immutable hardware counter + * metadata that describes the layout + * of the counter data structures. + * @info: Non-NULL pointer to backend info. + * + * Multiple calls to this function with the same info are guaranteed to return + * the same metadata object each time. + * + * Return: Non-NULL pointer to immutable hardware counter metadata. + */ +typedef const struct kbase_hwcnt_metadata * +kbase_hwcnt_backend_metadata_fn(const struct kbase_hwcnt_backend_info *info); + +/** + * typedef kbase_hwcnt_backend_init_fn - Initialise a counter backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * All uses of the created hardware counter backend must be externally + * synchronised. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_init_fn(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend); + +/** + * typedef kbase_hwcnt_backend_term_fn - Terminate a counter backend. + * @backend: Pointer to backend to be terminated. + */ +typedef void kbase_hwcnt_backend_term_fn(struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_timestamp_ns_fn - Get the current backend + * timestamp. + * @backend: Non-NULL pointer to backend. + * + * Return: Backend timestamp in nanoseconds. + */ +typedef u64 kbase_hwcnt_backend_timestamp_ns_fn(struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_enable_fn - Start counter dumping with the + * backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * The enable_map must have been created using the interface's metadata. + * If the backend has already been enabled, an error is returned. + * + * May be called in an atomic context. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_dump_enable_fn(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_enable_nolock_fn - Start counter dumping + * with the backend. + * @backend: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * + * Exactly the same as kbase_hwcnt_backend_dump_enable_fn(), except must be + * called in an atomic context with the spinlock documented by the specific + * backend interface held. + * + * Return: 0 on success, else error code. + */ +typedef int +kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map); + +/** + * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with + * the backend. + * @backend: Non-NULL pointer to backend. + * + * If the backend is already disabled, does nothing. + * Any undumped counter values since the last dump get will be lost. + */ +typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped + * counters. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_dump_clear_fn(struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_request_fn - Request an asynchronous counter + * dump. + * @backend: Non-NULL pointer to backend. + * @dump_time_ns: Non-NULL pointer where the timestamp of when the dump was + * requested will be written out to on success. + * + * If the backend is not enabled or another dump is already in progress, + * returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_dump_request_fn(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns); + +/** + * typedef kbase_hwcnt_backend_dump_wait_fn - Wait until the last requested + * counter dump has completed. + * @backend: Non-NULL pointer to backend. + * + * If the backend is not enabled, returns an error. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_dump_wait_fn(struct kbase_hwcnt_backend *backend); + +/** + * typedef kbase_hwcnt_backend_dump_get_fn - Copy or accumulate enable the + * counters dumped after the last dump + * request into the dump buffer. + * @backend: Non-NULL pointer to backend. + * @dump_buffer: Non-NULL pointer to destination dump buffer. + * @enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters should be accumulated into dump_buffer, rather + * than copied. + * + * The resultant contents of the dump buffer are only well defined if a prior + * call to dump_wait returned successfully, and a new dump has not yet been + * requested by a call to dump_request. + * + * Return: 0 on success, else error code. + */ +typedef int kbase_hwcnt_backend_dump_get_fn(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, + bool accumulate); + +/** + * struct kbase_hwcnt_backend_interface - Hardware counter backend virtual + * interface. + * @info: Immutable info used to initialise an instance of the + * backend. + * @metadata: Function ptr to get the immutable hardware counter + * metadata. + * @init: Function ptr to initialise an instance of the backend. + * @term: Function ptr to terminate an instance of the backend. + * @timestamp_ns: Function ptr to get the current backend timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_enable_nolock: Function ptr to enable dumping while the + * backend-specific spinlock is already held. + * @dump_disable: Function ptr to disable dumping. + * @dump_clear: Function ptr to clear counters. + * @dump_request: Function ptr to request a dump. + * @dump_wait: Function ptr to wait until dump to complete. + * @dump_get: Function ptr to copy or accumulate dump into a dump + * buffer. + */ +struct kbase_hwcnt_backend_interface { + const struct kbase_hwcnt_backend_info *info; + kbase_hwcnt_backend_metadata_fn *metadata; + kbase_hwcnt_backend_init_fn *init; + kbase_hwcnt_backend_term_fn *term; + kbase_hwcnt_backend_timestamp_ns_fn *timestamp_ns; + kbase_hwcnt_backend_dump_enable_fn *dump_enable; + kbase_hwcnt_backend_dump_enable_nolock_fn *dump_enable_nolock; + kbase_hwcnt_backend_dump_disable_fn *dump_disable; + kbase_hwcnt_backend_dump_clear_fn *dump_clear; + kbase_hwcnt_backend_dump_request_fn *dump_request; + kbase_hwcnt_backend_dump_wait_fn *dump_wait; + kbase_hwcnt_backend_dump_get_fn *dump_get; +}; + +#endif /* _KBASE_HWCNT_BACKEND_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c new file mode 100644 index 0000000..424a360 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -0,0 +1,1892 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" + +#include <linux/log2.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/completion.h> + +#ifndef BASE_MAX_NR_CLOCKS_REGULATORS +#define BASE_MAX_NR_CLOCKS_REGULATORS 2 +#endif + +#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI) +/* Backend watch dog timer interval in milliseconds: 18 seconds. */ +#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)18000) +#else +/* Backend watch dog timer interval in milliseconds: 1 second. */ +#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) +#endif /* IS_FPGA && !NO_MALI */ + +/** + * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE: Initial state, or the state if there is + * an error. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED: A user dump has been requested and + * we are waiting for an ACK, this ACK could come from either PRFCNT_ACK, + * PROTMODE_ENTER_ACK, or if an error occurs. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED: A watchdog dump has been + * requested and we're waiting for an ACK - this ACK could come from either + * PRFCNT_ACK, or if an error occurs, PROTMODE_ENTER_ACK is not applied here + * since watchdog request can't be triggered in protected mode. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT: Checking the insert + * immediately after receiving the ACK, so we know which index corresponds to + * the buffer we requested. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED: The insert has been saved and + * now we have kicked off the worker. + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING: The insert has been saved and now + * we have kicked off the worker to accumulate up to that insert and then copy + * the delta to the user buffer to prepare for dump_get(). + * + * @KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED: The dump completed successfully. + * + * Valid state transitions: + * IDLE -> REQUESTED (on user dump request) + * IDLE -> WATCHDOG_REQUESTED (on watchdog request) + * IDLE -> QUERYING_INSERT (on user dump request in protected mode) + * REQUESTED -> QUERYING_INSERT (on dump acknowledged from firmware) + * WATCHDOG_REQUESTED -> REQUESTED (on user dump request) + * WATCHDOG_REQUESTED -> COMPLETED (on dump acknowledged from firmware for watchdog request) + * QUERYING_INSERT -> WORKER_LAUNCHED (on worker submission) + * WORKER_LAUNCHED -> ACCUMULATING (while the worker is accumulating) + * ACCUMULATING -> COMPLETED (on accumulation completion) + * COMPLETED -> QUERYING_INSERT (on user dump request in protected mode) + * COMPLETED -> REQUESTED (on user dump request) + * COMPLETED -> WATCHDOG_REQUESTED (on watchdog request) + * COMPLETED -> IDLE (on disable) + * ANY -> IDLE (on error) + */ +enum kbase_hwcnt_backend_csf_dump_state { + KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE, + KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED, + KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED, + KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT, + KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED, + KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING, + KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED, +}; + +/** + * enum kbase_hwcnt_backend_csf_enable_state - HWC CSF backend enable states. + * + * @KBASE_HWCNT_BACKEND_CSF_DISABLED: Initial state, and the state when backend + * is disabled. + * + * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: Enable request is in + * progress, waiting for firmware acknowledgment. + * + * @KBASE_HWCNT_BACKEND_CSF_ENABLED: Enable request has been acknowledged, + * enable is done. + * + * @KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: Disable request is in + * progress, waiting for firmware acknowledgment. + * + * @KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: Disable request has been + * acknowledged, waiting for dump workers to be finished. + * + * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: An + * unrecoverable error happened, waiting for dump workers to be finished. + * + * @KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: An unrecoverable error + * happened, and dump workers have finished, waiting for reset. + * + * Valid state transitions: + * DISABLED -> TRANSITIONING_TO_ENABLED (on enable) + * TRANSITIONING_TO_ENABLED -> ENABLED (on enable ack) + * ENABLED -> TRANSITIONING_TO_DISABLED (on disable) + * TRANSITIONING_TO_DISABLED -> DISABLED_WAIT_FOR_WORKER (on disable ack) + * DISABLED_WAIT_FOR_WORKER -> DISABLED (after workers are flushed) + * DISABLED -> UNRECOVERABLE_ERROR (on unrecoverable error) + * ANY but DISABLED -> UNRECOVERABLE_ERROR_WAIT_FOR_WORKER (on unrecoverable + * error) + * UNRECOVERABLE_ERROR -> DISABLED (on before reset) + */ +enum kbase_hwcnt_backend_csf_enable_state { + KBASE_HWCNT_BACKEND_CSF_DISABLED, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED, + KBASE_HWCNT_BACKEND_CSF_ENABLED, + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED, + KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER, + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR, +}; + +/** + * struct kbase_hwcnt_backend_csf_info - Information used to create an instance + * of a CSF hardware counter backend. + * @backend: Pointer to access CSF backend. + * @fw_in_protected_mode: True if FW is running in protected mode, else + * false. + * @unrecoverable_error_happened: True if an recoverable error happened, else + * false. + * @csf_if: CSF interface object pointer. + * @ring_buf_cnt: Dump buffer count in the ring buffer. + * @counter_set: The performance counter set to use. + * @metadata: Hardware counter metadata. + * @prfcnt_info: Performance counter information. + * @watchdog_if: Watchdog interface object pointer. + */ +struct kbase_hwcnt_backend_csf_info { + struct kbase_hwcnt_backend_csf *backend; + bool fw_in_protected_mode; + bool unrecoverable_error_happened; + struct kbase_hwcnt_backend_csf_if *csf_if; + u32 ring_buf_cnt; + enum kbase_hwcnt_set counter_set; + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_backend_csf_if_prfcnt_info prfcnt_info; + struct kbase_hwcnt_watchdog_interface *watchdog_if; +}; + +/** + * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout + * information. + * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are + * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. + * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. + * @fe_cnt: Front end block count. + * @tiler_cnt: Tiler block count. + * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. + * @shader_cnt: Shader Core block count. + * @fw_block_cnt: Total number of firmware counters blocks. + * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). + * @shader_avail_mask: Bitmap of all shader cores in the system. + * @enable_mask_offset: Offset in array elements of enable mask in each block + * starting from the beginning of block. + * @headers_per_block: For any block, the number of counters designated as block's header. + * @counters_per_block: For any block, the number of counters designated as block's payload. + * @values_per_block: For any block, the number of counters in total (header + payload). + */ +struct kbase_hwcnt_csf_physical_layout { + u8 hw_block_cnt; + u8 fe_cnt; + u8 tiler_cnt; + u8 mmu_l2_cnt; + u8 shader_cnt; + u8 fw_block_cnt; + u8 block_cnt; + u64 shader_avail_mask; + size_t enable_mask_offset; + size_t headers_per_block; + size_t counters_per_block; + size_t values_per_block; +}; + +/** + * struct kbase_hwcnt_backend_csf - Instance of a CSF hardware counter backend. + * @info: CSF Info used to create the backend. + * @dump_state: The dumping state of the backend. + * @enable_state: The CSF backend internal enabled state. + * @insert_index_to_accumulate: The insert index in the ring buffer which need + * to accumulate up to. + * @enable_state_waitq: Wait queue object used to notify the enable + * changing flag is done. + * @to_user_buf: HWC sample buffer for client user, size + * metadata.dump_buf_bytes. + * @accum_buf: HWC sample buffer used as an internal + * accumulator, size metadata.dump_buf_bytes. + * @old_sample_buf: HWC sample buffer to save the previous values + * for delta calculation, size + * prfcnt_info.dump_bytes. + * @watchdog_last_seen_insert_idx: The insert index which watchdog has last + * seen, to check any new firmware automatic + * samples generated during the watchdog + * period. + * @ring_buf: Opaque pointer for ring buffer object. + * @ring_buf_cpu_base: CPU base address of the allocated ring buffer. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: Cycle count elapsed for a given sample period. + * @prev_cycle_count: Previous cycle count to calculate the cycle + * count for sample period. + * @phys_layout: Physical memory layout information of HWC + * sample buffer. + * @dump_completed: Completion signaled by the dump worker when + * it is completed accumulating up to the + * insert_index_to_accumulate. + * Should be initialized to the "complete" state. + * @user_requested: Flag to indicate a dump_request called from + * user. + * @hwc_dump_workq: Single threaded work queue for HWC workers + * execution. + * @hwc_dump_work: Worker to accumulate samples. + * @hwc_threshold_work: Worker for consuming available samples when + * threshold interrupt raised. + */ +struct kbase_hwcnt_backend_csf { + struct kbase_hwcnt_backend_csf_info *info; + enum kbase_hwcnt_backend_csf_dump_state dump_state; + enum kbase_hwcnt_backend_csf_enable_state enable_state; + u32 insert_index_to_accumulate; + wait_queue_head_t enable_state_waitq; + u64 *to_user_buf; + u64 *accum_buf; + u32 *old_sample_buf; + u32 watchdog_last_seen_insert_idx; + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; + void *ring_buf_cpu_base; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_hwcnt_csf_physical_layout phys_layout; + struct completion dump_completed; + bool user_requested; + struct workqueue_struct *hwc_dump_workq; + struct work_struct hwc_dump_work; + struct work_struct hwc_threshold_work; +}; + +static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) +{ + WARN_ON(!csf_info); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + return (csf_info->backend != NULL); +} + +/** + * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count + * tracking. + * + * @backend_csf: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + */ +static void +kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; + size_t clk; + + /* Read cycle count from CSF interface for both clock domains. */ + backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, + clk_enable_map); + + kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) + backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; + } + + /* Keep clk_enable_map for dump_request. */ + backend_csf->clk_enable_map = clk_enable_map; +} + +static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *backend_csf) +{ + u64 cycle_counts[BASE_MAX_NR_CLOCKS_REGULATORS]; + size_t clk; + + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, + backend_csf->clk_enable_map); + + kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { + backend_csf->cycle_count_elapsed[clk] = + cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; + backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; + } + } +} + +/* CSF backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend_csf || !backend_csf->info || !backend_csf->info->csf_if) + return 0; + + return backend_csf->info->csf_if->timestamp_ns(backend_csf->info->csf_if->ctx); +} + +/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to + * guarantee headers are + * enabled if any counter is + * required. + *@phys_enable_map: HWC physical enable map to be processed. + */ +static void +kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) +{ + WARN_ON(!phys_enable_map); + + /* Enable header if any counter is required from user, the header is + * controlled by bit 0 of the enable mask. + */ + if (phys_enable_map->fe_bm) + phys_enable_map->fe_bm |= 1; + + if (phys_enable_map->tiler_bm) + phys_enable_map->tiler_bm |= 1; + + if (phys_enable_map->mmu_l2_bm) + phys_enable_map->mmu_l2_bm |= 1; + + if (phys_enable_map->shader_bm) + phys_enable_map->shader_bm |= 1; +} + +static void kbasep_hwcnt_backend_csf_init_layout( + const struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info, + struct kbase_hwcnt_csf_physical_layout *phys_layout) +{ + size_t shader_core_cnt; + size_t values_per_block; + size_t fw_blocks_count; + size_t hw_blocks_count; + + WARN_ON(!prfcnt_info); + WARN_ON(!phys_layout); + + shader_core_cnt = fls64(prfcnt_info->core_mask); + values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; + fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); + hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + + /* The number of hardware counters reported by the GPU matches the legacy guess-work we + * have done in the past + */ + WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + prfcnt_info->l2_count + shader_core_cnt); + + *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ + .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, + .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, + .mmu_l2_cnt = prfcnt_info->l2_count, + .shader_cnt = shader_core_cnt, + .fw_block_cnt = fw_blocks_count, + .hw_block_cnt = hw_blocks_count, + .block_cnt = fw_blocks_count + hw_blocks_count, + .shader_avail_mask = prfcnt_info->core_mask, + .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .values_per_block = values_per_block, + .counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, + }; +} + +static void +kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) +{ + size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + + memset(backend_csf->to_user_buf, 0, user_buf_bytes); + memset(backend_csf->accum_buf, 0, user_buf_bytes); + memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); +} + +static void +kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf, + u32 *sample) +{ + u32 block_idx; + const struct kbase_hwcnt_csf_physical_layout *phys_layout; + u32 *block_buf; + + phys_layout = &backend_csf->phys_layout; + + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + block_buf = sample + block_idx * phys_layout->values_per_block; + block_buf[phys_layout->enable_mask_offset] = 0; + } +} + +static void +kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_csf *backend_csf) +{ + u32 idx; + u32 *sample; + char *cpu_dump_base; + size_t dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + + cpu_dump_base = (char *)backend_csf->ring_buf_cpu_base; + + for (idx = 0; idx < backend_csf->info->ring_buf_cnt; idx++) { + sample = (u32 *)&cpu_dump_base[idx * dump_bytes]; + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); + } +} + +static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) +{ + size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + + /* Copy the data into the sample and wait for the user to get it. */ + memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); + + /* After copied data into user sample, clear the accumulator values to + * prepare for the next accumulator, such as the next request or + * threshold. + */ + memset(backend_csf->accum_buf, 0, user_buf_bytes); +} + +static void kbasep_hwcnt_backend_csf_accumulate_sample( + const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, + u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) +{ + size_t block_idx; + const u32 *old_block = old_sample_buf; + const u32 *new_block = new_sample_buf; + u64 *acc_block = accum_buf; + const size_t values_per_block = phys_layout->values_per_block; + + /* Performance counter blocks for firmware are stored before blocks for hardware. + * We skip over the firmware's performance counter blocks (counters dumping is not + * supported for firmware blocks, only hardware ones). + */ + old_block += values_per_block * phys_layout->fw_block_cnt; + new_block += values_per_block * phys_layout->fw_block_cnt; + + for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; + block_idx++) { + const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; + const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; + + if (new_enable_mask == 0) { + /* Hardware block was unavailable or we didn't turn on + * any counters. Do nothing. + */ + } else { + /* Hardware block was available and it had some counters + * enabled. We need to update the accumulation buffer. + */ + size_t ctr_idx; + + /* Unconditionally copy the headers. */ + for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { + acc_block[ctr_idx] = new_block[ctr_idx]; + } + + /* Accumulate counter samples + * + * When accumulating samples we need to take into + * account whether the counter sampling method involves + * clearing counters back to zero after each sample is + * taken. + * + * The intention for CSF was that all HW should use + * counters which wrap to zero when their maximum value + * is reached. This, combined with non-clearing + * sampling, enables multiple concurrent users to + * request samples without interfering with each other. + * + * However some early HW may not support wrapping + * counters, for these GPUs counters must be cleared on + * sample to avoid loss of data due to counters + * saturating at their maximum value. + */ + if (!clearing_samples) { + if (old_enable_mask == 0) { + /* Hardware block was previously + * unavailable. Accumulate the new + * counters only, as we know previous + * values are zeroes. + */ + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < values_per_block; ctr_idx++) { + acc_block[ctr_idx] += new_block[ctr_idx]; + } + } else { + /* Hardware block was previously + * available. Accumulate the delta + * between old and new counter values. + */ + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < values_per_block; ctr_idx++) { + acc_block[ctr_idx] += + new_block[ctr_idx] - old_block[ctr_idx]; + } + } + } else { + for (ctr_idx = phys_layout->headers_per_block; + ctr_idx < values_per_block; ctr_idx++) { + acc_block[ctr_idx] += new_block[ctr_idx]; + } + } + } + old_block += values_per_block; + new_block += values_per_block; + acc_block += values_per_block; + } + + WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - + (values_per_block * phys_layout->fw_block_cnt)); + (void)dump_bytes; +} + +static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backend_csf *backend_csf, + u32 extract_index_to_start, + u32 insert_index_to_stop) +{ + u32 raw_idx; + unsigned long flags; + u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base; + const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt; + const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes; + bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; + u32 *old_sample_buf = backend_csf->old_sample_buf; + u32 *new_sample_buf = old_sample_buf; + + if (extract_index_to_start == insert_index_to_stop) + /* No samples to accumulate. Early out. */ + return; + + /* Sync all the buffers to CPU side before read the data. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, extract_index_to_start, + insert_index_to_stop, true); + + /* Consider u32 wrap case, '!=' is used here instead of '<' operator */ + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { + /* The logical "&" acts as a modulo operation since buf_count + * must be a power of two. + */ + const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); + + new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; + + kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, + buf_dump_bytes, backend_csf->accum_buf, + old_sample_buf, new_sample_buf, + clearing_samples); + + old_sample_buf = new_sample_buf; + } + + /* Save the newest buffer as the old buffer for next time. */ + memcpy(backend_csf->old_sample_buf, new_sample_buf, buf_dump_bytes); + + /* Reset the prfcnt_en header on each sample before releasing them. */ + for (raw_idx = extract_index_to_start; raw_idx != insert_index_to_stop; raw_idx++) { + const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); + u32 *sample = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; + + kbasep_hwcnt_backend_csf_zero_sample_prfcnt_en_header(backend_csf, sample); + } + + /* Sync zeroed buffers to avoid coherency issues on future use. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, extract_index_to_start, + insert_index_to_stop, false); + + /* After consuming all samples between extract_idx and insert_idx, + * set the raw extract index to insert_idx so that the sample buffers + * can be released back to the ring buffer pool. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + backend_csf->info->csf_if->set_extract_index(backend_csf->info->csf_if->ctx, + insert_index_to_stop); + /* Update the watchdog last seen index to check any new FW auto samples + * in next watchdog callback. + */ + backend_csf->watchdog_last_seen_insert_idx = insert_index_to_stop; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); +} + +static void kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + struct kbase_hwcnt_backend_csf *backend_csf, + enum kbase_hwcnt_backend_csf_enable_state new_state) +{ + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + if (backend_csf->enable_state != new_state) { + backend_csf->enable_state = new_state; + + wake_up(&backend_csf->enable_state_waitq); + } +} + +static void kbasep_hwcnt_backend_watchdog_timer_cb(void *info) +{ + struct kbase_hwcnt_backend_csf_info *csf_info = info; + struct kbase_hwcnt_backend_csf *backend_csf; + unsigned long flags; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + + if (WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info))) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + + backend_csf = csf_info->backend; + + /* Only do watchdog request when all conditions are met: */ + if (/* 1. Backend is enabled. */ + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && + /* 2. FW is not in protected mode. */ + (!csf_info->fw_in_protected_mode) && + /* 3. dump state indicates no other dumping is in progress. */ + ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) || + (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED))) { + u32 extract_index; + u32 insert_index; + + /* Read the raw extract and insert indexes from the CSF interface. */ + csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, &insert_index); + + /* Do watchdog request if no new FW auto samples. */ + if (insert_index == backend_csf->watchdog_last_seen_insert_idx) { + /* Trigger the watchdog request. */ + csf_info->csf_if->dump_request(csf_info->csf_if->ctx); + + /* A watchdog dump is required, change the state to + * start the request process. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED; + } + } + + /* Must schedule another callback when in the transitional state because + * this function can be called for the first time before the performance + * counter enabled interrupt. + */ + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) || + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED)) { + /* Reschedule the timer for next watchdog callback. */ + csf_info->watchdog_if->modify(csf_info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + } + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + +/** + * kbasep_hwcnt_backend_csf_dump_worker() - HWC dump worker. + * @work: Work structure. + * + * To accumulate all available samples in the ring buffer when a request has + * been done. + * + */ +static void kbasep_hwcnt_backend_csf_dump_worker(struct work_struct *work) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf; + u32 insert_index_to_acc; + u32 extract_index; + u32 insert_index; + + WARN_ON(!work); + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_dump_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* The backend was disabled or had an error while the worker was being + * launched. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return; + } + + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED); + + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING; + insert_index_to_acc = backend_csf->insert_index_to_accumulate; + + /* Read the raw extract and insert indexes from the CSF interface. */ + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, + &insert_index); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Accumulate up to the insert we grabbed at the prfcnt request + * interrupt. + */ + kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, + insert_index_to_acc); + + /* Copy to the user buffer so if a threshold interrupt fires + * between now and get(), the accumulations are untouched. + */ + kbasep_hwcnt_backend_csf_update_user_sample(backend_csf); + + /* Dump done, set state back to COMPLETED for next request. */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* The backend was disabled or had an error while we were accumulating. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return; + } + + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_ACCUMULATING); + + /* Our work here is done - set the wait object and unblock waiters. */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + complete_all(&backend_csf->dump_completed); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); +} + +/** + * kbasep_hwcnt_backend_csf_threshold_worker() - Threshold worker. + * + * @work: Work structure. + * + * Called when a HWC threshold interrupt raised to consume all available samples + * in the ring buffer. + */ +static void kbasep_hwcnt_backend_csf_threshold_worker(struct work_struct *work) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf; + u32 extract_index; + u32 insert_index; + + WARN_ON(!work); + + backend_csf = container_of(work, struct kbase_hwcnt_backend_csf, hwc_threshold_work); + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Assert the backend is not destroyed. */ + WARN_ON(backend_csf != backend_csf->info->backend); + + /* Read the raw extract and insert indexes from the CSF interface. */ + backend_csf->info->csf_if->get_indexes(backend_csf->info->csf_if->ctx, &extract_index, + &insert_index); + + /* The backend was disabled or had an error while the worker was being + * launched. + */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return; + } + + /* Early out if we are not in the IDLE state or COMPLETED state, as this + * means a concurrent dump is in progress and we don't want to + * interfere. + */ + if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED)) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return; + } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Accumulate everything we possibly can. We grabbed the insert index + * immediately after we acquired the lock but before we checked whether + * a concurrent dump was triggered. This ensures that if a concurrent + * dump was triggered between releasing the lock and now, we know for a + * fact that our insert will not exceed the concurrent dump's + * insert_to_accumulate, so we don't risk accumulating too much data. + */ + kbasep_hwcnt_backend_csf_accumulate_samples(backend_csf, extract_index, insert_index); + + /* No need to wake up anything since it is not a user dump request. */ +} + +static void +kbase_hwcnt_backend_csf_submit_dump_worker(struct kbase_hwcnt_backend_csf_info *csf_info) +{ + u32 extract_index; + + WARN_ON(!csf_info); + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + WARN_ON(!kbasep_hwcnt_backend_csf_backend_exists(csf_info)); + WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED); + WARN_ON(csf_info->backend->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT); + + /* Save insert index now so that the dump worker only accumulates the + * HWC data associated with this request. Extract index is not stored + * as that needs to be checked when accumulating to prevent re-reading + * buffers that have already been read and returned to the GPU. + */ + csf_info->csf_if->get_indexes(csf_info->csf_if->ctx, &extract_index, + &csf_info->backend->insert_index_to_accumulate); + csf_info->backend->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_WORKER_LAUNCHED; + + /* Submit the accumulator task into the work queue. */ + queue_work(csf_info->backend->hwc_dump_workq, &csf_info->backend->hwc_dump_work); +} + +static void +kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *backend_csf, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_backend_csf_if_enable *enable) +{ + enum kbase_hwcnt_physical_set phys_counter_set; + struct kbase_hwcnt_physical_enable_map phys_enable_map; + + kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); + + /* process the enable_map to guarantee the block header is enabled which + * is needed for delta calculation. + */ + kbasep_hwcnt_backend_csf_process_enable_map(&phys_enable_map); + + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_csf->info->counter_set); + + /* Use processed enable_map to enable HWC in HW level. */ + enable->fe_bm = phys_enable_map.fe_bm; + enable->shader_bm = phys_enable_map.shader_bm; + enable->tiler_bm = phys_enable_map.tiler_bm; + enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable->counter_set = phys_counter_set; + enable->clk_enable_map = enable_map->clk_enable_map; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int +kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + struct kbase_hwcnt_backend_csf_if_enable enable; + int err; + + if (!backend_csf || !enable_map || (enable_map->metadata != backend_csf->info->metadata)) + return -EINVAL; + + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); + + /* enable_state should be DISABLED before we transfer it to enabled */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) + return -EIO; + + err = backend_csf->info->watchdog_if->enable(backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS, + kbasep_hwcnt_backend_watchdog_timer_cb, + backend_csf->info); + if (err) + return err; + + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + WARN_ON(!completion_done(&backend_csf->dump_completed)); + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED); + + backend_csf->info->csf_if->dump_enable(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, &enable); + + kbasep_hwcnt_backend_csf_cc_initial_sample(backend_csf, enable_map); + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_csf_dump_enable(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend_csf) + return -EINVAL; + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + errcode = kbasep_hwcnt_backend_csf_dump_enable_nolock(backend, enable_map); + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return errcode; +} + +static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( + struct kbase_hwcnt_backend_csf *backend_csf, unsigned long *lock_flags) +{ + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + while ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) || + (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, *lock_flags); + + wait_event(backend_csf->enable_state_waitq, + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) && + (backend_csf->enable_state != + KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED)); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, lock_flags); + } +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + bool do_disable = false; + + WARN_ON(!backend_csf); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* Make sure we wait until any previous enable or disable have completed + * before doing anything. + */ + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED || + backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + /* If we are already disabled or in an unrecoverable error + * state, there is nothing for us to do. + */ + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return; + } + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + /* Only disable if we were previously enabled - in all other + * cases the call to disable will have already been made. + */ + do_disable = true; + } + + WARN_ON(backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE); + WARN_ON(!completion_done(&backend_csf->dump_completed)); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Deregister the timer and block until any timer callback has completed. + * We've transitioned out of the ENABLED state so we can guarantee it + * won't reschedule itself. + */ + backend_csf->info->watchdog_if->disable(backend_csf->info->watchdog_if->timer); + + /* Block until any async work has completed. We have transitioned out of + * the ENABLED state so we can guarantee no new work will concurrently + * be submitted. + */ + flush_workqueue(backend_csf->hwc_dump_workq); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + if (do_disable) + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); + + kbasep_hwcnt_backend_csf_wait_enable_transition_complete(backend_csf, &flags); + + switch (backend_csf->enable_state) { + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); + break; + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + break; + default: + WARN_ON(true); + break; + } + + backend_csf->user_requested = false; + backend_csf->watchdog_last_seen_insert_idx = 0; + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* After disable, zero the header of all buffers in the ring buffer back + * to 0 to prepare for the next enable. + */ + kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); + + /* Sync zeroed buffers to avoid coherency issues on future use. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); + + /* Reset accumulator, old_sample_buf and user_sample to all-0 to prepare + * for next enable. + */ + kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_csf_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + bool do_request = false; + bool watchdog_dumping = false; + + if (!backend_csf) + return -EINVAL; + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + + /* If we're transitioning to enabled there's nothing to accumulate, and + * the user dump buffer is already zeroed. We can just short circuit to + * the DUMP_COMPLETED state. + */ + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->user_requested = true; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return 0; + } + + /* Otherwise, make sure we're already enabled. */ + if (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_ENABLED) { + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + return -EIO; + } + + /* Make sure that this is either the first request since enable or the + * previous user dump has completed or a watchdog dump is in progress, + * so we can avoid midway through a user dump. + * If user request comes while a watchdog dumping is in progress, + * the user request takes the ownership of the watchdog dumping sample by + * changing the dump_state so the interrupt for the watchdog + * request can be processed instead of ignored. + */ + if ((backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE) && + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) && + (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED)) { + /* HWC is disabled or another user dump is ongoing, + * or we're on fault. + */ + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + /* HWC is disabled or another dump is ongoing, or we are on + * fault. + */ + return -EIO; + } + + /* Reset the completion so dump_wait() has something to wait on. */ + reinit_completion(&backend_csf->dump_completed); + + if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) + watchdog_dumping = true; + + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) && + !backend_csf->info->fw_in_protected_mode) { + /* Only do the request if we are fully enabled and not in + * protected mode. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED; + do_request = true; + } else { + /* Skip the request and waiting for ack and go straight to + * checking the insert and kicking off the worker to do the dump + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + } + + /* CSF firmware might enter protected mode now, but still call request. + * That is fine, as we changed state while holding the lock, so the + * protected mode enter function will query the insert and launch the + * dumping worker. + * At some point we will get the dump request ACK saying a dump is done, + * but we can ignore it if we are not in the REQUESTED state and process + * it in next round dumping worker. + */ + + *dump_time_ns = kbasep_hwcnt_backend_csf_timestamp_ns(backend); + kbasep_hwcnt_backend_csf_cc_update(backend_csf); + backend_csf->user_requested = true; + + if (do_request) { + /* If a watchdog dumping is in progress, don't need to do + * another request, just update the dump_state and take the + * ownership of the sample which watchdog requested. + */ + if (!watchdog_dumping) + backend_csf->info->csf_if->dump_request(backend_csf->info->csf_if->ctx); + } else + kbase_hwcnt_backend_csf_submit_dump_worker(backend_csf->info); + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Modify watchdog timer to delay the regular check time since + * just requested. + */ + backend_csf->info->watchdog_if->modify(backend_csf->info->watchdog_if->timer, + HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS); + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + int errcode; + + if (!backend_csf) + return -EINVAL; + + wait_for_completion(&backend_csf->dump_completed); + + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + /* Make sure the last dump actually succeeded when user requested is + * set. + */ + if (backend_csf->user_requested && + ((backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED) || + (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED))) + errcode = 0; + else + errcode = -EIO; + + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_csf_dump_clear(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + int errcode; + u64 ts; + + if (!backend_csf) + return -EINVAL; + + /* Request a dump so we can clear all current counters. */ + errcode = kbasep_hwcnt_backend_csf_dump_request(backend, &ts); + if (!errcode) + /* Wait for the manual dump or auto dump to be done and + * accumulator to be updated. + */ + errcode = kbasep_hwcnt_backend_csf_dump_wait(backend); + + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + int ret; + size_t clk; + + if (!backend_csf || !dst || !dst_enable_map || + (backend_csf->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Extract elapsed cycle count for each clock domain if enabled. */ + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) + continue; + + /* Reset the counter to zero if accumulation is off. */ + if (!accumulate) + dst->clk_cnt_buf[clk] = 0; + dst->clk_cnt_buf[clk] += backend_csf->cycle_count_elapsed[clk]; + } + + /* We just return the user buffer without checking the current state, + * as it is undefined to call this function without a prior succeeding + * one to dump_wait(). + */ + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); + + return ret; +} + +/** + * kbasep_hwcnt_backend_csf_destroy() - Destroy CSF backend. + * @backend_csf: Pointer to CSF backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + * + */ +static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *backend_csf) +{ + if (!backend_csf) + return; + + destroy_workqueue(backend_csf->hwc_dump_workq); + + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); + + kfree(backend_csf->accum_buf); + backend_csf->accum_buf = NULL; + + kfree(backend_csf->old_sample_buf); + backend_csf->old_sample_buf = NULL; + + kfree(backend_csf->to_user_buf); + backend_csf->to_user_buf = NULL; + + kfree(backend_csf); +} + +/** + * kbasep_hwcnt_backend_csf_create() - Create a CSF backend instance. + * + * @csf_info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *csf_info, + struct kbase_hwcnt_backend_csf **out_backend) +{ + struct kbase_hwcnt_backend_csf *backend_csf = NULL; + int errcode = -ENOMEM; + + WARN_ON(!csf_info); + WARN_ON(!out_backend); + + backend_csf = kzalloc(sizeof(*backend_csf), GFP_KERNEL); + if (!backend_csf) + goto alloc_error; + + backend_csf->info = csf_info; + kbasep_hwcnt_backend_csf_init_layout(&csf_info->prfcnt_info, &backend_csf->phys_layout); + + backend_csf->accum_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); + if (!backend_csf->accum_buf) + goto err_alloc_acc_buf; + + backend_csf->old_sample_buf = kzalloc(csf_info->prfcnt_info.dump_bytes, GFP_KERNEL); + if (!backend_csf->old_sample_buf) + goto err_alloc_pre_sample_buf; + + backend_csf->to_user_buf = kzalloc(csf_info->metadata->dump_buf_bytes, GFP_KERNEL); + if (!backend_csf->to_user_buf) + goto err_alloc_user_sample_buf; + + errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, + &backend_csf->ring_buf_cpu_base, + &backend_csf->ring_buf); + if (errcode) + goto err_ring_buf_alloc; + errcode = -ENOMEM; + + /* Zero all performance enable header to prepare for first enable. */ + kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(backend_csf); + + /* Sync zeroed buffers to avoid coherency issues on use. */ + backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf, 0, + backend_csf->info->ring_buf_cnt, false); + + init_completion(&backend_csf->dump_completed); + + init_waitqueue_head(&backend_csf->enable_state_waitq); + + /* Allocate a single threaded work queue for dump worker and threshold + * worker. + */ + backend_csf->hwc_dump_workq = + alloc_workqueue("mali_hwc_dump_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!backend_csf->hwc_dump_workq) + goto err_alloc_workqueue; + + INIT_WORK(&backend_csf->hwc_dump_work, kbasep_hwcnt_backend_csf_dump_worker); + INIT_WORK(&backend_csf->hwc_threshold_work, kbasep_hwcnt_backend_csf_threshold_worker); + + backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_DISABLED; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + backend_csf->user_requested = false; + backend_csf->watchdog_last_seen_insert_idx = 0; + + *out_backend = backend_csf; + return 0; + +err_alloc_workqueue: + backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, + backend_csf->ring_buf); +err_ring_buf_alloc: + kfree(backend_csf->to_user_buf); + backend_csf->to_user_buf = NULL; +err_alloc_user_sample_buf: + kfree(backend_csf->old_sample_buf); + backend_csf->old_sample_buf = NULL; +err_alloc_pre_sample_buf: + kfree(backend_csf->accum_buf); + backend_csf->accum_buf = NULL; +err_alloc_acc_buf: + kfree(backend_csf); +alloc_error: + return errcode; +} + +/* CSF backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_csf_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = NULL; + struct kbase_hwcnt_backend_csf_info *csf_info = (struct kbase_hwcnt_backend_csf_info *)info; + int errcode; + bool success = false; + + if (!info || !out_backend) + return -EINVAL; + + /* Create the backend. */ + errcode = kbasep_hwcnt_backend_csf_create(csf_info, &backend_csf); + if (errcode) + return errcode; + + /* If it was not created before, attach it to csf_info. + * Use spin lock to avoid concurrent initialization. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + if (csf_info->backend == NULL) { + csf_info->backend = backend_csf; + *out_backend = (struct kbase_hwcnt_backend *)backend_csf; + success = true; + if (csf_info->unrecoverable_error_happened) + backend_csf->enable_state = KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR; + } + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + /* Destroy the new created backend if the backend has already created + * before. In normal case, this won't happen if the client call init() + * function properly. + */ + if (!success) { + kbasep_hwcnt_backend_csf_destroy(backend_csf); + return -EBUSY; + } + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; + + if (!backend) + return; + + kbasep_hwcnt_backend_csf_dump_disable(backend); + + /* Set the backend in csf_info to NULL so we won't handle any external + * notification anymore since we are terminating. + */ + backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); + backend_csf->info->backend = NULL; + backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + + kbasep_hwcnt_backend_csf_destroy(backend_csf); +} + +/** + * kbasep_hwcnt_backend_csf_info_destroy() - Destroy a CSF backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + * + */ +static void kbasep_hwcnt_backend_csf_info_destroy(const struct kbase_hwcnt_backend_csf_info *info) +{ + if (!info) + return; + + /* The backend should be destroyed before the info object destroy. */ + WARN_ON(info->backend != NULL); + + /* The metadata should be destroyed before the info object destroy. */ + WARN_ON(info->metadata != NULL); + + kfree(info); +} + +/** + * kbasep_hwcnt_backend_csf_info_create() - Create a CSF backend info. + * + * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure + * used to create backend interface. + * @ring_buf_cnt: The buffer count of the CSF hwcnt backend ring buffer. + * MUST be power of 2. + * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used to create + * backend interface. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return: 0 on success, else error code. + */ +static int +kbasep_hwcnt_backend_csf_info_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + const struct kbase_hwcnt_backend_csf_info **out_info) +{ + struct kbase_hwcnt_backend_csf_info *info = NULL; + + if (WARN_ON(!csf_if) || WARN_ON(!watchdog_if) || WARN_ON(!out_info) || + WARN_ON(!is_power_of_2(ring_buf_cnt))) + return -EINVAL; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + *info = (struct kbase_hwcnt_backend_csf_info) + { +#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) + .counter_set = KBASE_HWCNT_SET_SECONDARY, +#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) + .counter_set = KBASE_HWCNT_SET_TERTIARY, +#else + /* Default to primary */ + .counter_set = KBASE_HWCNT_SET_PRIMARY, +#endif + .backend = NULL, .csf_if = csf_if, .ring_buf_cnt = ring_buf_cnt, + .fw_in_protected_mode = false, .unrecoverable_error_happened = false, + .watchdog_if = watchdog_if, + }; + *out_info = info; + + return 0; +} + +/* CSF backend implementation of kbase_hwcnt_backend_metadata_fn */ +static const struct kbase_hwcnt_metadata * +kbasep_hwcnt_backend_csf_metadata(const struct kbase_hwcnt_backend_info *info) +{ + if (!info) + return NULL; + + WARN_ON(!((const struct kbase_hwcnt_backend_csf_info *)info)->metadata); + + return ((const struct kbase_hwcnt_backend_csf_info *)info)->metadata; +} + +static void +kbasep_hwcnt_backend_csf_handle_unrecoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) +{ + bool do_disable = false; + + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + /* We are already in or transitioning to the unrecoverable error state. + * Early out. + */ + if ((backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) || + (backend_csf->enable_state == + KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER)) + return; + + /* If we are disabled, we know we have no pending workers, so skip the + * waiting state. + */ + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR); + return; + } + + /* Trigger a disable only if we are not already transitioning to + * disabled, we don't want to disable twice if an unrecoverable error + * happens while we are disabling. + */ + do_disable = + (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER); + + /* Transition the dump to the IDLE state and unblock any waiters. The + * IDLE state signifies an error. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + + /* Trigger a disable only if we are not already transitioning to + * disabled, - we don't want to disable twice if an unrecoverable error + * happens while we are disabling. + */ + if (do_disable) + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); +} + +static void +kbasep_hwcnt_backend_csf_handle_recoverable_error(struct kbase_hwcnt_backend_csf *backend_csf) +{ + backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + + switch (backend_csf->enable_state) { + case KBASE_HWCNT_BACKEND_CSF_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + /* Already disabled or disabling, or in an unrecoverable error. + * Nothing to be done to handle the error. + */ + return; + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: + /* A seemingly recoverable error that occurs while we are + * transitioning to enabled is probably unrecoverable. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(backend_csf); + return; + case KBASE_HWCNT_BACKEND_CSF_ENABLED: + /* Start transitioning to the disabled state. We can't wait for + * it as this recoverable error might be triggered from an + * interrupt. The wait will be done in the eventual call to + * disable(). + */ + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED); + /* Transition the dump to the IDLE state and unblock any + * waiters. The IDLE state signifies an error. + */ + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_IDLE; + complete_all(&backend_csf->dump_completed); + + backend_csf->info->csf_if->dump_disable(backend_csf->info->csf_if->ctx); + return; + } +} + +void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info = + (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + csf_info->fw_in_protected_mode = true; + + /* Call on_prfcnt_sample() to trigger collection of the protected mode + * entry auto-sample if there is currently a pending dump request. + */ + kbase_hwcnt_backend_csf_on_prfcnt_sample(iface); +} + +void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + csf_info->fw_in_protected_mode = false; +} + +void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + csf_info->unrecoverable_error_happened = true; + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + +void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface) +{ + unsigned long flags; + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + csf_info->csf_if->lock(csf_info->csf_if->ctx, &flags); + csf_info->unrecoverable_error_happened = false; + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) { + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); + return; + } + backend_csf = csf_info->backend; + + if ((backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED) && + (backend_csf->enable_state != KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR)) { + /* Before a reset occurs, we must either have been disabled + * (else we lose data) or we should have encountered an + * unrecoverable error. Either way, we will have disabled the + * interface and waited for any workers that might have still + * been in flight. + * If not in these states, fire off one more disable to make + * sure everything is turned off before the power is pulled. + * We can't wait for this disable to complete, but it doesn't + * really matter, the power is being pulled. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + } + + /* A reset is the only way to exit the unrecoverable error state */ + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED); + } + + csf_info->csf_if->unlock(csf_info->csf_if->ctx, flags); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + /* Skip the dump_work if it's a watchdog request. */ + if (backend_csf->dump_state == KBASE_HWCNT_BACKEND_CSF_DUMP_WATCHDOG_REQUESTED) { + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_COMPLETED; + return; + } + + /* If the current state is not REQUESTED, this HWC sample will be + * skipped and processed in next dump_request. + */ + if (backend_csf->dump_state != KBASE_HWCNT_BACKEND_CSF_DUMP_REQUESTED) + return; + backend_csf->dump_state = KBASE_HWCNT_BACKEND_CSF_DUMP_QUERYING_INSERT; + + kbase_hwcnt_backend_csf_submit_dump_worker(csf_info); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) + /* Submit the threshold work into the work queue to consume the + * available samples. + */ + queue_work(backend_csf->hwc_dump_workq, &backend_csf->hwc_threshold_work); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + + /* Called when an overflow occurs. We treat this as a recoverable error, + * so we start transitioning to the disabled state. + * We could try and handle it while enabled, but in a real system we + * never expect an overflow to occur so there is no point implementing + * complex recovery code when we can just turn ourselves off instead for + * a while. + */ + kbasep_hwcnt_backend_csf_handle_recoverable_error(csf_info->backend); +} + +void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_ENABLED); + } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_ENABLED) { + /* Unexpected, but we are already in the right state so just + * ignore it. + */ + } else { + /* Unexpected state change, assume everything is broken until + * we reset. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + } +} + +void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_backend_csf *backend_csf; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); + + /* Early out if the backend does not exist. */ + if (!kbasep_hwcnt_backend_csf_backend_exists(csf_info)) + return; + backend_csf = csf_info->backend; + + if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED) { + kbasep_hwcnt_backend_csf_change_es_and_wake_waiters( + backend_csf, KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER); + } else if (backend_csf->enable_state == KBASE_HWCNT_BACKEND_CSF_DISABLED) { + /* Unexpected, but we are already in the right state so just + * ignore it. + */ + } else { + /* Unexpected state change, assume everything is broken until + * we reset. + */ + kbasep_hwcnt_backend_csf_handle_unrecoverable_error(csf_info->backend); + } +} + +int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + struct kbase_hwcnt_gpu_info gpu_info; + + if (!iface) + return -EINVAL; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + WARN_ON(!csf_info->csf_if->get_prfcnt_info); + + csf_info->csf_if->get_prfcnt_info(csf_info->csf_if->ctx, &csf_info->prfcnt_info); + + /* The clock domain counts should not exceed the number of maximum + * number of clock regulators. + */ + if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) + return -EIO; + + gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.core_mask = csf_info->prfcnt_info.core_mask; + gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; + gpu_info.prfcnt_values_per_block = + csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; + return kbase_hwcnt_csf_metadata_create(&gpu_info, csf_info->counter_set, + &csf_info->metadata); +} + +void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + if (!iface) + return; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + if (csf_info->metadata) { + kbase_hwcnt_csf_metadata_destroy(csf_info->metadata); + csf_info->metadata = NULL; + } +} + +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_csf_info *info = NULL; + + if (!iface || !csf_if || !watchdog_if) + return -EINVAL; + + /* The buffer count must be power of 2 */ + if (!is_power_of_2(ring_buf_cnt)) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_csf_info_create(csf_if, ring_buf_cnt, watchdog_if, &info); + if (errcode) + return errcode; + + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->metadata = kbasep_hwcnt_backend_csf_metadata; + iface->init = kbasep_hwcnt_backend_csf_init; + iface->term = kbasep_hwcnt_backend_csf_term; + iface->timestamp_ns = kbasep_hwcnt_backend_csf_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_csf_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_csf_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_csf_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_csf_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_csf_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_csf_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_csf_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_csf_info_destroy( + (const struct kbase_hwcnt_backend_csf_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h new file mode 100644 index 0000000..9c5a5c9 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of mali_kbase_hwcnt_backend interface for CSF + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_H_ +#define _KBASE_HWCNT_BACKEND_CSF_H_ + +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" + +/** + * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend + * interface. + * @csf_if: Non-NULL pointer to a hwcnt backend CSF interface structure + * used to create backend interface. + * @ring_buf_cnt: The buffer count of CSF hwcnt backend, used when allocate ring + * buffer, MUST be power of 2. + * @watchdog_if: Non-NULL pointer to a hwcnt watchdog interface structure used + * to create backend interface. + * @iface: Non-NULL pointer to backend interface structure that is filled + * in on creation success. + * + * Calls to iface->dump_enable_nolock() require the CSF Scheduler IRQ lock. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_if *csf_if, u32 ring_buf_cnt, + struct kbase_hwcnt_watchdog_interface *watchdog_if, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_metadata_init() - Initialize the metadata for a CSF + * hardware counter backend. + * @iface: Non-NULL pointer to backend interface structure + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_metadata_term() - Terminate the metadata for a CSF + * hardware counter backend. + * @iface: Non-NULL pointer to backend interface structure. + */ +void kbase_hwcnt_backend_csf_metadata_term(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_destroy() - Destroy a CSF hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_protm_entered() - CSF HWC backend function to receive + * notification that protected mode + * has been entered. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_protm_entered(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_protm_exited() - CSF HWC backend function to receive + * notification that protected mode has + * been exited. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_protm_exited(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_unrecoverable_error() - CSF HWC backend function + * called when unrecoverable + * errors are detected. + * @iface: Non-NULL pointer to HWC backend interface. + * + * This should be called on encountering errors that can only be recovered from + * with reset, or that may put HWC logic in state that could result in hang. For + * example, on bus error, or when FW becomes unresponsive. + */ +void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_before_reset() - CSF HWC backend function to be + * called immediately before a + * reset. Takes us out of the + * unrecoverable error state, if we + * were in it. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample + * complete interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_sample(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_threshold() - CSF performance counter + * buffer reach threshold + * interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_threshold(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_overflow() - CSF performance counter buffer + * overflow interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_overflow(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_enable() - CSF performance counter enabled + * interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_csf_on_prfcnt_disable() - CSF performance counter + * disabled interrupt handler. + * @iface: Non-NULL pointer to HWC backend interface. + */ +void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h new file mode 100644 index 0000000..382a3ad --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -0,0 +1,302 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Virtual interface for CSF hardware counter backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_H_ +#define _KBASE_HWCNT_BACKEND_CSF_IF_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_backend_csf_if_ctx; + +struct kbase_hwcnt_backend_csf_if_ring_buf; + +/** + * struct kbase_hwcnt_backend_csf_if_enable - enable hardware counter collection + * structure. + * @fe_bm: Front End counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @counter_set: The performance counter set to enable. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. + */ +struct kbase_hwcnt_backend_csf_if_enable { + u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; + u8 counter_set; + u64 clk_enable_map; +}; + +/** + * struct kbase_hwcnt_backend_csf_if_prfcnt_info - Performance counter + * information. + * @prfcnt_hw_size: Total length in bytes of all the hardware counters data. The hardware + * counters are sub-divided into 4 classes: front-end, shader, tiler, and + * memory system (l2 cache + MMU). + * @prfcnt_fw_size: Total length in bytes of all the firmware counters data. + * @dump_bytes: Bytes of GPU memory required to perform a performance + * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. + * @prfcnt_block_size: Bytes of each performance counter block. + * @l2_count: The MMU L2 cache count. + * @core_mask: Shader core mask. + * @clk_cnt: Clock domain count in the system. + * @clearing_samples: Indicates whether counters are cleared after each sample + * is taken. + */ +struct kbase_hwcnt_backend_csf_if_prfcnt_info { + size_t prfcnt_hw_size; + size_t prfcnt_fw_size; + size_t dump_bytes; + size_t prfcnt_block_size; + size_t l2_count; + u64 core_mask; + u8 clk_cnt; + bool clearing_samples; +}; + +/** + * typedef kbase_hwcnt_backend_csf_if_assert_lock_held_fn - Assert that the + * backend spinlock is + * held. + * @ctx: Non-NULL pointer to a CSF context. + */ +typedef void +kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Pointer to the memory location that would store the previous + * interrupt state. + */ +typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. + * + * @ctx: Non-NULL pointer to a CSF context. + * @flags: Previously stored interrupt state when Scheduler interrupt + * spinlock was acquired. + */ +typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance + * counter information. + * @ctx: Non-NULL pointer to a CSF context. + * @prfcnt_info: Non-NULL pointer to struct where performance counter + * information should be stored. + */ +typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn - Allocate a ring buffer + * for CSF interface. + * @ctx: Non-NULL pointer to a CSF context. + * @buf_count: The buffer count in the ring buffer to be allocated, + * MUST be power of 2. + * @cpu_dump_base: Non-NULL pointer to where ring buffer CPU base address is + * stored when success. + * @ring_buf: Non-NULL pointer to where ring buffer is stored when success. + * + * A ring buffer is needed by the CSF interface to do manual HWC sample and + * automatic HWC samples, the buffer count in the ring buffer MUST be power + * of 2 to meet the hardware requirement. + * + * Return: 0 on success, else error code. + */ +typedef int +kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 buf_count, void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers + * memory. + * @ctx: Non-NULL pointer to a CSF context. + * @ring_buf: Non-NULL pointer to the ring buffer. + * @buf_index_first: The first buffer index in the ring buffer to be synced, + * inclusive. + * @buf_index_last: The last buffer index in the ring buffer to be synced, + * exclusive. + * @for_cpu: The direction of sync to be applied, set to true when CPU + * cache needs invalidating before reading the buffer, and set + * to false after CPU writes to flush these before this memory + * is overwritten by the GPU. + * + * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU + * are correctly observed. + */ +typedef void +kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu); + +/** + * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for + * the CSF interface. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. + */ +typedef void +kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); + +/** + * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current + * timestamp of the CSF + * interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * + * Return: CSF interface timestamp in nanoseconds. + */ +typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware + * counter in CSF interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * @ring_buf: Non-NULL pointer to the ring buffer which used to setup the HWC. + * @enable: Non-NULL pointer to the enable map of HWC. + * + * Requires lock to be taken before calling. + */ +typedef void +kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter + * in CSF interface. + * @ctx: Non-NULL pointer to a CSF interface context. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. + * + * @ctx: Non-NULL pointer to the interface context. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and + * insert indexes of the + * ring buffer. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @extract_index: Non-NULL pointer where current extract index to be saved. + * @insert_index: Non-NULL pointer where current insert index to be saved. + * + * Requires lock to be taken before calling. + */ +typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 *extract_index, u32 *insert_index); + +/** + * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract + * index of the ring + * buffer. + * + * @ctx: Non-NULL pointer to a CSF interface context. + * @extract_index: New extract index to be set. + * + * Requires lock to be taken before calling. + */ +typedef void +kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 extract_index); + +/** + * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current + * GPU cycle count. + * @ctx: Non-NULL pointer to a CSF interface context. + * @cycle_counts: Non-NULL pointer to an array where cycle counts to be saved, + * the array size should be at least as big as the number of + * clock domains returned by get_prfcnt_info interface. + * @clk_enable_map: An array of bitfields, each bit specifies an enabled clock + * domain. + * + * Requires lock to be taken before calling. + */ +typedef void +kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u64 *cycle_counts, u64 clk_enable_map); + +/** + * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual + * interface. + * @ctx: CSF interface context. + * @assert_lock_held: Function ptr to assert backend spinlock is held. + * @lock: Function ptr to acquire backend spinlock. + * @unlock: Function ptr to release backend spinlock. + * @get_prfcnt_info: Function ptr to get performance counter related + * information. + * @ring_buf_alloc: Function ptr to allocate ring buffer for CSF HWC. + * @ring_buf_sync: Function ptr to sync ring buffer to CPU. + * @ring_buf_free: Function ptr to free ring buffer for CSF HWC. + * @timestamp_ns: Function ptr to get the current CSF interface + * timestamp. + * @dump_enable: Function ptr to enable dumping. + * @dump_disable: Function ptr to disable dumping. + * @dump_request: Function ptr to request a dump. + * @get_indexes: Function ptr to get extract and insert indexes of the + * ring buffer. + * @set_extract_index: Function ptr to set extract index of ring buffer. + * @get_gpu_cycle_count: Function ptr to get the GPU cycle count. + */ +struct kbase_hwcnt_backend_csf_if { + struct kbase_hwcnt_backend_csf_if_ctx *ctx; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn *lock; + kbase_hwcnt_backend_csf_if_unlock_fn *unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; + kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; + kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; + kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; + kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; + kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; + kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; + kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; + kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; + kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; + kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; +}; + +#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c new file mode 100644 index 0000000..a3a0e02 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -0,0 +1,784 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * CSF GPU HWC backend firmware interface APIs. + */ + +#include <mali_kbase.h> +#include <gpu/mali_kbase_gpu_regmap.h> +#include <device/mali_kbase_device.h> +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" +#include <csf/mali_kbase_csf_registers.h> + +#include "csf/mali_kbase_csf_firmware.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h" +#include "mali_kbase_hwaccess_time.h" +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include <linux/log2.h> +#include "mali_kbase_ccswe.h" + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include <backend/gpu/mali_kbase_model_dummy.h> +#endif /* CONFIG_MALI_NO_MALI */ + +/* Ring buffer virtual address start at 4GB */ +#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) + +/** + * struct kbase_hwcnt_backend_csf_if_fw_ring_buf - ring buffer for CSF interface + * used to save the manual and + * auto HWC samples from + * firmware. + * @gpu_dump_base: Starting GPU base address of the ring buffer. + * @cpu_dump_base: Starting CPU address for the mapping. + * @buf_count: Buffer count in the ring buffer, MUST be power of 2. + * @as_nr: Address space number for the memory mapping. + * @phys: Physical memory allocation used by the mapping. + * @num_pages: Size of the mapping, in memory pages. + */ +struct kbase_hwcnt_backend_csf_if_fw_ring_buf { + u64 gpu_dump_base; + void *cpu_dump_base; + size_t buf_count; + u32 as_nr; + struct tagged_addr *phys; + size_t num_pages; +}; + +/** + * struct kbase_hwcnt_backend_csf_if_fw_ctx - Firmware context for the CSF + * interface, used to communicate + * with firmware. + * @kbdev: KBase device. + * @buf_bytes: The size in bytes for each buffer in the ring buffer. + * @clk_cnt: The number of clock domains in the system. + * The maximum is 64. + * @clk_enable_map: Bitmask of enabled clocks + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + */ +struct kbase_hwcnt_backend_csf_if_fw_ctx { + struct kbase_device *kbdev; + size_t buf_bytes; + u8 clk_cnt; + u64 clk_enable_map; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; +}; + +static void +kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); +} + +static void kbasep_hwcnt_backend_csf_if_fw_lock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock(kbdev, flags); +} + +static void kbasep_hwcnt_backend_csf_if_fw_unlock(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + + WARN_ON(!ctx); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void +kbasep_hwcnt_backend_csf_if_fw_on_freq_change(struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = container_of( + rate_listener, struct kbase_hwcnt_backend_csf_if_fw_ctx, rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_cc_enable() - Enable cycle count tracking + * + * @fw_ctx: Non-NULL pointer to CSF firmware interface context. + * @clk_enable_map: Non-NULL pointer to enable map specifying enabled counters. + */ +static void +kbasep_hwcnt_backend_csf_if_fw_cc_enable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx, + u64 clk_enable_map) +{ + struct kbase_device *kbdev = fw_ctx->kbdev; + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + u64 timestamp_ns; + + timestamp_ns = ktime_get_raw_ns(); + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32)clk_data->clock_val; + kbase_ccswe_reset(&fw_ctx->ccswe_shader_cores); + kbase_ccswe_freq_change(&fw_ctx->ccswe_shader_cores, timestamp_ns, cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &fw_ctx->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + } + + fw_ctx->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_cc_disable() - Disable cycle count tracking + * + * @fw_ctx: Non-NULL pointer to CSF firmware interface context. + */ +static void +kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ + struct kbase_device *kbdev = fw_ctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = fw_ctx->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) + kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); +} + +static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) +{ +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ + .l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS, + .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1, + .prfcnt_hw_size = + KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, + .prfcnt_fw_size = + KBASE_DUMMY_MODEL_MAX_FIRMWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE, + .dump_bytes = KBASE_DUMMY_MODEL_MAX_SAMPLE_SIZE, + .prfcnt_block_size = KBASE_DUMMY_MODEL_BLOCK_SIZE, + .clk_cnt = 1, + .clearing_samples = true, + }; + + fw_ctx->buf_bytes = prfcnt_info->dump_bytes; +#else + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx; + struct kbase_device *kbdev; + u32 prfcnt_size; + u32 prfcnt_hw_size; + u32 prfcnt_fw_size; + u32 prfcnt_block_size = + KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; + + WARN_ON(!ctx); + WARN_ON(!prfcnt_info); + + fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + kbdev = fw_ctx->kbdev; + prfcnt_size = kbdev->csf.global_iface.prfcnt_size; + prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); + prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); + fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; + + /* Read the block size if the GPU has the register PRFCNT_FEATURES + * which was introduced in architecture version 11.x.7. + */ + if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= + GPU_ID2_PRODUCT_TTUX) { + prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( + kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) + << 8; + } + + *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ + .prfcnt_hw_size = prfcnt_hw_size, + .prfcnt_fw_size = prfcnt_fw_size, + .dump_bytes = fw_ctx->buf_bytes, + .prfcnt_block_size = prfcnt_block_size, + .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, + .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, + .clk_cnt = fw_ctx->clk_cnt, + .clearing_samples = true, + }; + + /* Block size must be multiple of counter size. */ + WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0); + /* Total size must be multiple of block size. */ + WARN_ON((prfcnt_info->dump_bytes % prfcnt_info->prfcnt_block_size) != 0); +#endif +} + +static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **out_ring_buf) +{ + struct kbase_device *kbdev; + struct tagged_addr *phys; + struct page **page_list; + void *cpu_addr; + int ret; + int i; + size_t num_pages; + u64 flags; + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; + + pgprot_t cpu_map_prot = PAGE_KERNEL; + u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; + + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + WARN_ON(!ctx); + WARN_ON(!cpu_dump_base); + WARN_ON(!out_ring_buf); + + kbdev = fw_ctx->kbdev; + + /* The buffer count must be power of 2 */ + if (!is_power_of_2(buf_count)) + return -EINVAL; + + /* alignment failure */ + if (gpu_va_base & (2048 - 1)) + return -EINVAL; + + fw_ring_buf = kzalloc(sizeof(*fw_ring_buf), GFP_KERNEL); + if (!fw_ring_buf) + return -ENOMEM; + + num_pages = PFN_UP(fw_ctx->buf_bytes * buf_count); + phys = kmalloc_array(num_pages, sizeof(*phys), GFP_KERNEL); + if (!phys) + goto phys_alloc_error; + + page_list = kmalloc_array(num_pages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + goto page_list_alloc_error; + + /* Get physical page for the buffer */ + ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, + phys, false); + if (ret != num_pages) + goto phys_mem_pool_alloc_error; + + /* Get the CPU virtual address */ + for (i = 0; i < num_pages; i++) + page_list[i] = as_page(phys[i]); + + cpu_addr = vmap(page_list, num_pages, VM_MAP, cpu_map_prot); + if (!cpu_addr) + goto vmap_error; + + flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + + /* Update MMU table */ + ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, + num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, + mmu_sync_info); + if (ret) + goto mmu_insert_failed; + + kfree(page_list); + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + fw_ring_buf->gpu_dump_base = (uintptr_t)cpu_addr; +#else + fw_ring_buf->gpu_dump_base = gpu_va_base; +#endif /* CONFIG_MALI_NO_MALI */ + fw_ring_buf->cpu_dump_base = cpu_addr; + fw_ring_buf->phys = phys; + fw_ring_buf->num_pages = num_pages; + fw_ring_buf->buf_count = buf_count; + fw_ring_buf->as_nr = MCU_AS_NR; + + *cpu_dump_base = fw_ring_buf->cpu_dump_base; + *out_ring_buf = (struct kbase_hwcnt_backend_csf_if_ring_buf *)fw_ring_buf; + + return 0; + +mmu_insert_failed: + vunmap(cpu_addr); +vmap_error: + kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, + false, false); +phys_mem_pool_alloc_error: + kfree(page_list); +page_list_alloc_error: + kfree(phys); +phys_alloc_error: + kfree(fw_ring_buf); + return -ENOMEM; +} + +static void +kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + u32 buf_index_first, u32 buf_index_last, bool for_cpu) +{ + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + size_t i; + size_t pg_first; + size_t pg_last; + u64 start_address; + u64 stop_address; + u32 ring_buf_index_first; + u32 ring_buf_index_last; + + WARN_ON(!ctx); + WARN_ON(!ring_buf); + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* When using the dummy backend syncing the ring buffer is unnecessary as + * the ring buffer is only accessed by the CPU. It may also cause data loss + * due to cache invalidation so return early. + */ + return; +#endif /* CONFIG_MALI_NO_MALI */ + + /* The index arguments for this function form an inclusive, exclusive + * range. + * However, when masking back to the available buffers we will make this + * inclusive at both ends so full flushes are not 0 -> 0. + */ + ring_buf_index_first = buf_index_first & (fw_ring_buf->buf_count - 1); + ring_buf_index_last = (buf_index_last - 1) & (fw_ring_buf->buf_count - 1); + + /* The start address is the offset of the first buffer. */ + start_address = fw_ctx->buf_bytes * ring_buf_index_first; + pg_first = start_address >> PAGE_SHIFT; + + /* The stop address is the last byte in the final buffer. */ + stop_address = (fw_ctx->buf_bytes * (ring_buf_index_last + 1)) - 1; + pg_last = stop_address >> PAGE_SHIFT; + + /* Check whether the buffer range wraps. */ + if (start_address > stop_address) { + /* sync the first part to the end of ring buffer. */ + for (i = pg_first; i < fw_ring_buf->num_pages; i++) { + struct page *pg = as_page(fw_ring_buf->phys[i]); + + if (for_cpu) { + kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), + PAGE_SIZE, DMA_BIDIRECTIONAL); + } else { + kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), + PAGE_SIZE, DMA_BIDIRECTIONAL); + } + } + + /* second part starts from page 0. */ + pg_first = 0; + } + + for (i = pg_first; i <= pg_last; i++) { + struct page *pg = as_page(fw_ring_buf->phys[i]); + + if (for_cpu) { + kbase_sync_single_for_cpu(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); + } else { + kbase_sync_single_for_device(fw_ctx->kbdev, kbase_dma_addr(pg), PAGE_SIZE, + DMA_BIDIRECTIONAL); + } + } +} + +static u64 kbasep_hwcnt_backend_csf_if_fw_timestamp_ns(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + CSTD_UNUSED(ctx); + return ktime_get_raw_ns(); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf) +{ + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + if (!fw_ring_buf) + return; + + if (fw_ring_buf->phys) { + u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; + + WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, + gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, + fw_ring_buf->num_pages, MCU_AS_NR)); + + vunmap(fw_ring_buf->cpu_dump_base); + + kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], + fw_ring_buf->num_pages, fw_ring_buf->phys, false, false); + + kfree(fw_ring_buf->phys); + + kfree(fw_ring_buf); + } +} + +static void +kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable) +{ + u32 prfcnt_config; + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = + (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + + WARN_ON(!ctx); + WARN_ON(!ring_buf); + WARN_ON(!enable); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Configure */ + prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); + prfcnt_config = GLB_PRFCNT_CONFIG_SET_SELECT_SET(prfcnt_config, enable->counter_set); + + /* Configure the ring buffer base address */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_JASID, fw_ring_buf->as_nr); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_LO, + fw_ring_buf->gpu_dump_base & U32_MAX); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_BASE_HI, + fw_ring_buf->gpu_dump_base >> 32); + + /* Set extract position to 0 */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_EXTRACT, 0); + + /* Configure the enable bitmap */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSF_EN, enable->fe_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); + + /* Configure the HWC set and buffer size */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); + + kbdev->csf.hwcnt.enable_pending = true; + + /* Unmask the interrupts */ + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK, + GLB_ACK_IRQ_MASK_PRFCNT_ENABLE_MASK); + + /* Enable the HWC */ + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, + (1 << GLB_REQ_PRFCNT_ENABLE_SHIFT), + GLB_REQ_PRFCNT_ENABLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + prfcnt_config = kbase_csf_firmware_global_input_read(global_iface, GLB_PRFCNT_CONFIG); + + kbasep_hwcnt_backend_csf_if_fw_cc_enable(fw_ctx, enable->clk_enable_map); +} + +static void kbasep_hwcnt_backend_csf_if_fw_dump_disable(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Disable the HWC */ + kbdev->csf.hwcnt.enable_pending = true; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, 0, GLB_REQ_PRFCNT_ENABLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); + + /* mask the interrupts */ + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_SAMPLE_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_THRESHOLD_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_ACK_IRQ_MASK, 0, + GLB_ACK_IRQ_MASK_PRFCNT_OVERFLOW_MASK); + + /* In case we have a previous request in flight when the disable + * happens. + */ + kbdev->csf.hwcnt.request_pending = false; + + kbasep_hwcnt_backend_csf_if_fw_cc_disable(fw_ctx); +} + +static void kbasep_hwcnt_backend_csf_if_fw_dump_request(struct kbase_hwcnt_backend_csf_if_ctx *ctx) +{ + u32 glb_req; + struct kbase_device *kbdev; + struct kbase_csf_global_iface *global_iface; + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + kbdev = fw_ctx->kbdev; + global_iface = &kbdev->csf.global_iface; + + /* Trigger dumping */ + kbdev->csf.hwcnt.request_pending = true; + glb_req = kbase_csf_firmware_global_input_read(global_iface, GLB_REQ); + glb_req ^= GLB_REQ_PRFCNT_SAMPLE_MASK; + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_req, + GLB_REQ_PRFCNT_SAMPLE_MASK); + kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR); +} + +static void kbasep_hwcnt_backend_csf_if_fw_get_indexes(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 *extract_index, u32 *insert_index) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + WARN_ON(!extract_index); + WARN_ON(!insert_index); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + *extract_index = kbase_csf_firmware_global_input_read(&fw_ctx->kbdev->csf.global_iface, + GLB_PRFCNT_EXTRACT); + *insert_index = kbase_csf_firmware_global_output(&fw_ctx->kbdev->csf.global_iface, + GLB_PRFCNT_INSERT); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_set_extract_index(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u32 extract_idx) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + + WARN_ON(!ctx); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + /* Set the raw extract index to release the buffer back to the ring + * buffer. + */ + kbase_csf_firmware_global_input(&fw_ctx->kbdev->csf.global_iface, GLB_PRFCNT_EXTRACT, + extract_idx); +} + +static void +kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + u64 *cycle_counts, u64 clk_enable_map) +{ + struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx = + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; + u8 clk; + u64 timestamp_ns = ktime_get_raw_ns(); + + WARN_ON(!ctx); + WARN_ON(!cycle_counts); + kbasep_hwcnt_backend_csf_if_fw_assert_lock_held(ctx); + + for (clk = 0; clk < fw_ctx->clk_cnt; clk++) { + if (!(clk_enable_map & (1ull << clk))) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest(fw_ctx->kbdev, &cycle_counts[clk], + NULL, NULL); + } else { + /* Estimate cycle count for non-top clock domain. */ + cycle_counts[clk] = + kbase_ccswe_cycle_at(&fw_ctx->ccswe_shader_cores, timestamp_ns); + } + } +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_ctx_destroy() - Destroy a CSF FW interface context. + * + * @fw_ctx: Pointer to context to destroy. + */ +static void +kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(struct kbase_hwcnt_backend_csf_if_fw_ctx *fw_ctx) +{ + if (!fw_ctx) + return; + + kfree(fw_ctx); +} + +/** + * kbasep_hwcnt_backend_csf_if_fw_ctx_create() - Create a CSF Firmware context. + * + * @kbdev: Non_NULL pointer to kbase device. + * @out_ctx: Non-NULL pointer to where info is stored on success. + * Return: 0 on success, else error code. + */ +static int +kbasep_hwcnt_backend_csf_if_fw_ctx_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if_fw_ctx **out_ctx) +{ + u8 clk; + int errcode = -ENOMEM; + struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_ctx); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + goto error; + + ctx->kbdev = kbdev; + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + ctx->clk_cnt = clk; + + ctx->clk_enable_map = 0; + kbase_ccswe_init(&ctx->ccswe_shader_cores); + ctx->rate_listener.notify = kbasep_hwcnt_backend_csf_if_fw_on_freq_change; + + *out_ctx = ctx; + + return 0; +error: + kbasep_hwcnt_backend_csf_if_fw_ctx_destroy(ctx); + return errcode; +} + +void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw) +{ + if (!if_fw) + return; + + kbasep_hwcnt_backend_csf_if_fw_ctx_destroy( + (struct kbase_hwcnt_backend_csf_if_fw_ctx *)if_fw->ctx); + memset(if_fw, 0, sizeof(*if_fw)); +} + +int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if *if_fw) +{ + int errcode; + struct kbase_hwcnt_backend_csf_if_fw_ctx *ctx = NULL; + + if (!kbdev || !if_fw) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_csf_if_fw_ctx_create(kbdev, &ctx); + if (errcode) + return errcode; + + if_fw->ctx = (struct kbase_hwcnt_backend_csf_if_ctx *)ctx; + if_fw->assert_lock_held = kbasep_hwcnt_backend_csf_if_fw_assert_lock_held; + if_fw->lock = kbasep_hwcnt_backend_csf_if_fw_lock; + if_fw->unlock = kbasep_hwcnt_backend_csf_if_fw_unlock; + if_fw->get_prfcnt_info = kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info; + if_fw->ring_buf_alloc = kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc; + if_fw->ring_buf_sync = kbasep_hwcnt_backend_csf_if_fw_ring_buf_sync; + if_fw->ring_buf_free = kbasep_hwcnt_backend_csf_if_fw_ring_buf_free; + if_fw->timestamp_ns = kbasep_hwcnt_backend_csf_if_fw_timestamp_ns; + if_fw->dump_enable = kbasep_hwcnt_backend_csf_if_fw_dump_enable; + if_fw->dump_disable = kbasep_hwcnt_backend_csf_if_fw_dump_disable; + if_fw->dump_request = kbasep_hwcnt_backend_csf_if_fw_dump_request; + if_fw->get_gpu_cycle_count = kbasep_hwcnt_backend_csf_if_fw_get_gpu_cycle_count; + if_fw->get_indexes = kbasep_hwcnt_backend_csf_if_fw_get_indexes; + if_fw->set_extract_index = kbasep_hwcnt_backend_csf_if_fw_set_extract_index; + + return 0; +} diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h new file mode 100644 index 0000000..71d1506 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of kbase_hwcnt_backend_csf_if interface for CSF FW + */ + +#ifndef _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ +#define _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ + +#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" + +/** + * kbase_hwcnt_backend_csf_if_fw_create() - Create a firmware CSF interface + * of hardware counter backend. + * @kbdev: Non-NULL pointer to Kbase device. + * @if_fw: Non-NULL pointer to backend interface structure that is filled in on + * creation success. + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_csf_if_fw_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_csf_if *if_fw); + +/** + * kbase_hwcnt_backend_csf_if_fw_destroy() - Destroy a firmware CSF interface of + * hardware counter backend. + * @if_fw: Pointer to a CSF interface to destroy. + */ +void kbase_hwcnt_backend_csf_if_fw_destroy(struct kbase_hwcnt_backend_csf_if *if_fw); + +#endif /* _KBASE_HWCNT_BACKEND_CSF_IF_FW_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c new file mode 100644 index 0000000..6ddd7ba --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -0,0 +1,863 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h" +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" +#include "mali_kbase.h" +#include "backend/gpu/mali_kbase_pm_ca.h" +#include "mali_kbase_hwaccess_instr.h" +#include "mali_kbase_hwaccess_time.h" +#include "mali_kbase_ccswe.h" + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#include "backend/gpu/mali_kbase_model_dummy.h" +#endif /* CONFIG_MALI_NO_MALI */ +#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" + +#include "backend/gpu/mali_kbase_pm_internal.h" + +/** + * struct kbase_hwcnt_backend_jm_info - Information used to create an instance + * of a JM hardware counter backend. + * @kbdev: KBase device. + * @counter_set: The performance counter set to use. + * @metadata: Hardware counter metadata. + * @dump_bytes: Bytes of GPU memory required to perform a + * hardware counter dump. + * @hwcnt_gpu_info: Hardware counter block information. + */ +struct kbase_hwcnt_backend_jm_info { + struct kbase_device *kbdev; + enum kbase_hwcnt_set counter_set; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + struct kbase_hwcnt_gpu_info hwcnt_gpu_info; +}; + +/** + * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout + * information. + * @fe_cnt: Front end block count. + * @tiler_cnt: Tiler block count. + * @mmu_l2_cnt: Memory system(MMU and L2 cache) block count. + * @shader_cnt: Shader Core block count. + * @block_cnt: Total block count (sum of all other block counts). + * @shader_avail_mask: Bitmap of all shader cores in the system. + * @enable_mask_offset: Offset in array elements of enable mask in each block + * starting from the beginning of block. + * @headers_per_block: Header size per block. + * @counters_per_block: Counters size per block. + * @values_per_block: Total size per block. + */ +struct kbase_hwcnt_jm_physical_layout { + u8 fe_cnt; + u8 tiler_cnt; + u8 mmu_l2_cnt; + u8 shader_cnt; + u8 block_cnt; + u64 shader_avail_mask; + size_t enable_mask_offset; + size_t headers_per_block; + size_t counters_per_block; + size_t values_per_block; +}; + +/** + * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @to_user_buf: HWC sample buffer for client user, size + * metadata.dump_buf_bytes. + * @enabled: True if dumping has been enabled, else false. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled + * dumping. + * @curr_config: Current allocated hardware resources to correctly map the + * source raw dump buffer to the destination dump buffer. + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: + * Cycle count elapsed for a given sample period. + * The top clock cycle, index 0, is read directly from + * hardware, but the other clock domains need to be + * calculated with software estimation. + * @prev_cycle_count: Previous cycle count to calculate the cycle count for + * sample period. + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + * @phys_layout: Physical memory layout information of HWC sample buffer. + */ +struct kbase_hwcnt_backend_jm { + const struct kbase_hwcnt_backend_jm_info *info; + struct kbase_context *kctx; + u64 gpu_dump_va; + void *cpu_dump_va; + struct kbase_vmap_struct *vmap; + u64 *to_user_buf; + bool enabled; + u64 pm_core_mask; + struct kbase_hwcnt_curr_config curr_config; + u64 clk_enable_map; + u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; + u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; + struct kbase_clk_rate_listener rate_listener; + struct kbase_ccswe ccswe_shader_cores; + struct kbase_hwcnt_jm_physical_layout phys_layout; +}; + +/** + * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used + * to create the hwcnt metadata. + * @kbdev: Non-NULL pointer to kbase device. + * @info: Non-NULL pointer to data structure to be filled in. + * + * The initialised info struct will only be valid for use while kbdev is valid. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, + struct kbase_hwcnt_gpu_info *info) +{ + size_t clk; + + if (!kbdev || !info) + return -EINVAL; + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; +#else /* CONFIG_MALI_NO_MALI */ + { + const struct base_gpu_props *props = &kbdev->gpu_props.props; + const size_t l2_count = props->l2_props.num_l2_slices; + const size_t core_mask = props->coherency_info.group[0].core_mask; + + info->l2_count = l2_count; + info->core_mask = core_mask; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; + } +#endif /* CONFIG_MALI_NO_MALI */ + + /* Determine the number of available clock domains. */ + for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { + if (kbdev->pm.clk_rtm.clks[clk] == NULL) + break; + } + info->clk_cnt = clk; + + return 0; +} + +static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info, + struct kbase_hwcnt_jm_physical_layout *phys_layout) +{ + u8 shader_core_cnt; + + WARN_ON(!gpu_info); + WARN_ON(!phys_layout); + + shader_core_cnt = fls64(gpu_info->core_mask); + + *phys_layout = (struct kbase_hwcnt_jm_physical_layout){ + .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, + .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, + .mmu_l2_cnt = gpu_info->l2_count, + .shader_cnt = shader_core_cnt, + .block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + gpu_info->l2_count + shader_core_cnt, + .shader_avail_mask = gpu_info->core_mask, + .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .values_per_block = gpu_info->prfcnt_values_per_block, + .counters_per_block = + gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER, + }; +} + +static void +kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm) +{ + size_t block_idx; + const u32 *new_sample_buf = backend_jm->cpu_dump_va; + const u32 *new_block = new_sample_buf; + u64 *dst_buf = backend_jm->to_user_buf; + u64 *dst_block = dst_buf; + const size_t values_per_block = backend_jm->phys_layout.values_per_block; + const size_t dump_bytes = backend_jm->info->dump_bytes; + + for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) { + size_t ctr_idx; + + for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++) + dst_block[ctr_idx] = new_block[ctr_idx]; + + new_block += values_per_block; + dst_block += values_per_block; + } + + WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); + WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); +} + +/** + * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback + * + * @rate_listener: Callback state + * @clk_index: Clock index + * @clk_rate_hz: Clock frequency(hz) + */ +static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener, + u32 clk_index, u32 clk_rate_hz) +{ + struct kbase_hwcnt_backend_jm *backend_jm = + container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener); + u64 timestamp_ns; + + if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES) + return; + + timestamp_ns = ktime_get_raw_ns(); + kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz); +} + +/** + * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking + * + * @backend_jm: Non-NULL pointer to backend. + * @enable_map: Non-NULL pointer to enable map specifying enabled counters. + * @timestamp_ns: Timestamp(ns) when HWCNT were enabled. + */ +static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm, + const struct kbase_hwcnt_enable_map *enable_map, + u64 timestamp_ns) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + u64 clk_enable_map = enable_map->clk_enable_map; + u64 cycle_count; + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn on the cycle counter */ + kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev); + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL); + + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count; + } + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + /* software estimation for non-top clock domains */ + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES]; + u32 cur_freq; + unsigned long flags; + + spin_lock_irqsave(&rtm->lock, flags); + + cur_freq = (u32)clk_data->clock_val; + kbase_ccswe_reset(&backend_jm->ccswe_shader_cores); + kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq); + + kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener); + + spin_unlock_irqrestore(&rtm->lock, flags); + + /* ccswe was reset. The estimated cycle is zero. */ + backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0; + } + + /* Keep clk_enable_map for dump_request. */ + backend_jm->clk_enable_map = clk_enable_map; +} + +/** + * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking + * + * @backend_jm: Non-NULL pointer to backend. + */ +static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm) +{ + struct kbase_device *kbdev = backend_jm->kctx->kbdev; + struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm; + u64 clk_enable_map = backend_jm->clk_enable_map; + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) { + /* turn off the cycle counter */ + kbase_pm_release_gpu_cycle_counter(kbdev); + } + + if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) { + kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener); + } +} + +/** + * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with + * current config information. + * @kbdev: Non-NULL pointer to kbase device. + * @curr_config: Non-NULL pointer to return the current configuration of + * hardware allocated to the GPU. + * + * The current configuration information is used for architectures where the + * max_config interface is available from the Arbiter. In this case the current + * allocated hardware is not always the same, so the current config information + * is used to correctly map the current allocated resources to the memory layout + * that is copied to the user space. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev, + struct kbase_hwcnt_curr_config *curr_config) +{ + if (WARN_ON(!kbdev) || WARN_ON(!curr_config)) + return -EINVAL; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices; + curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present; + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */ +static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend) +{ + (void)backend; + return ktime_get_raw_ns(); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */ +static int +kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_context *kctx; + struct kbase_device *kbdev; + struct kbase_hwcnt_physical_enable_map phys_enable_map; + enum kbase_hwcnt_physical_set phys_counter_set; + struct kbase_instr_hwcnt_enable enable; + u64 timestamp_ns; + + if (!backend_jm || !enable_map || backend_jm->enabled || + (enable_map->metadata != backend_jm->info->metadata)) + return -EINVAL; + + kctx = backend_jm->kctx; + kbdev = backend_jm->kctx->kbdev; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map); + + kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set); + + enable.fe_bm = phys_enable_map.fe_bm; + enable.shader_bm = phys_enable_map.shader_bm; + enable.tiler_bm = phys_enable_map.tiler_bm; + enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable.counter_set = phys_counter_set; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + /* The dummy model needs the CPU mapping. */ + enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va; +#else + enable.dump_buffer = backend_jm->gpu_dump_va; +#endif /* CONFIG_MALI_NO_MALI */ + enable.dump_buffer_bytes = backend_jm->info->dump_bytes; + + timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); + if (errcode) + goto error; + + errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable); + if (errcode) + goto error; + + backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); + + backend_jm->enabled = true; + + kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); + + return 0; +error: + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */ +static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + unsigned long flags; + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + + if (!backend_jm) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ +static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + + if (WARN_ON(!backend_jm) || !backend_jm->enabled) + return; + + kbasep_hwcnt_backend_jm_cc_disable(backend_jm); + + errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); + WARN_ON(errcode); + + backend_jm->enabled = false; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */ +static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_clear(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */ +static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + struct kbase_device *kbdev; + const struct kbase_hwcnt_metadata *metadata; + u64 current_cycle_count; + size_t clk; + int ret; + + if (!backend_jm || !backend_jm->enabled || !dump_time_ns) + return -EINVAL; + + kbdev = backend_jm->kctx->kbdev; + metadata = backend_jm->info->metadata; + + /* Disable pre-emption, to make the timestamp as accurate as possible */ + preempt_disable(); + { + *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); + ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk)) + continue; + + if (clk == KBASE_CLOCK_DOMAIN_TOP) { + /* Read cycle count for top clock domain. */ + kbase_backend_get_gpu_time_norequest(kbdev, ¤t_cycle_count, + NULL, NULL); + } else { + /* + * Estimate cycle count for non-top clock + * domain. + */ + current_cycle_count = kbase_ccswe_cycle_at( + &backend_jm->ccswe_shader_cores, *dump_time_ns); + } + backend_jm->cycle_count_elapsed[clk] = + current_cycle_count - backend_jm->prev_cycle_count[clk]; + + /* + * Keep the current cycle count for later calculation. + */ + backend_jm->prev_cycle_count[clk] = current_cycle_count; + } + } + preempt_enable(); + + return ret; +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ +static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + + if (!backend_jm || !backend_jm->enabled) + return -EINVAL; + + return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); +} + +/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ +static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + bool accumulate) +{ + struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; + size_t clk; +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + struct kbase_device *kbdev; + unsigned long flags; + int errcode; +#endif /* CONFIG_MALI_NO_MALI */ + + if (!backend_jm || !dst || !dst_enable_map || + (backend_jm->info->metadata != dst->metadata) || + (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + /* Invalidate the kernel buffer before reading from it. */ + kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU); + + /* Dump sample to the internal 64-bit user buffer. */ + kbasep_hwcnt_backend_jm_dump_sample(backend_jm); + + /* Extract elapsed cycle count for each clock domain if enabled. */ + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) + { + if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) + continue; + + /* Reset the counter to zero if accumulation is off. */ + if (!accumulate) + dst->clk_cnt_buf[clk] = 0; + dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk]; + } + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + kbdev = backend_jm->kctx->kbdev; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + + /* Update the current configuration information. */ + errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config); + + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (errcode) + return errcode; +#endif /* CONFIG_MALI_NO_MALI */ + return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, + backend_jm->pm_core_mask, &backend_jm->curr_config, + accumulate); +} + +/** + * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer. + * @info: Non-NULL pointer to JM backend info. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address + * is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_context *kctx, u64 *gpu_dump_va) +{ + struct kbase_va_region *reg; + u64 flags; + u64 nr_pages; + + /* Calls to this function are inherently asynchronous, with respect to + * MMU operations. + */ + const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + + WARN_ON(!info); + WARN_ON(!kctx); + WARN_ON(!gpu_dump_va); + + flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING | + BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU; + + nr_pages = PFN_UP(info->dump_bytes); + + reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info); + + if (!reg) + return -ENOMEM; + + return 0; +} + +/** + * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer. + * @kctx: Non-NULL pointer to kbase context. + * @gpu_dump_va: GPU dump buffer virtual address. + */ +static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va) +{ + WARN_ON(!kctx); + if (gpu_dump_va) + kbase_mem_free(kctx, gpu_dump_va); +} + +/** + * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend. + * @backend: Pointer to JM backend to destroy. + * + * Can be safely called on a backend in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend) +{ + if (!backend) + return; + + if (backend->kctx) { + struct kbase_context *kctx = backend->kctx; + struct kbase_device *kbdev = kctx->kbdev; + + if (backend->cpu_dump_va) + kbase_phy_alloc_mapping_put(kctx, backend->vmap); + + if (backend->gpu_dump_va) + kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va); + + kbasep_js_release_privileged_ctx(kbdev, kctx); + kbase_destroy_context(kctx); + } + + kfree(backend->to_user_buf); + + kfree(backend); +} + +/** + * kbasep_hwcnt_backend_jm_create() - Create a JM backend. + * @info: Non-NULL pointer to backend info. + * @out_backend: Non-NULL pointer to where backend is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info, + struct kbase_hwcnt_backend_jm **out_backend) +{ + int errcode; + struct kbase_device *kbdev; + struct kbase_hwcnt_backend_jm *backend = NULL; + + WARN_ON(!info); + WARN_ON(!out_backend); + + kbdev = info->kbdev; + + backend = kzalloc(sizeof(*backend), GFP_KERNEL); + if (!backend) + goto alloc_error; + + backend->info = info; + kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout); + + backend->kctx = kbase_create_context(kbdev, true, + BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL); + if (!backend->kctx) + goto alloc_error; + + kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx); + + errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va); + if (errcode) + goto error; + + backend->cpu_dump_va = + kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap); + if (!backend->cpu_dump_va || !backend->vmap) + goto alloc_error; + + backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL); + if (!backend->to_user_buf) + goto alloc_error; + + kbase_ccswe_init(&backend->ccswe_shader_cores); + backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + + *out_backend = backend; + return 0; + +alloc_error: + errcode = -ENOMEM; +error: + kbasep_hwcnt_backend_jm_destroy(backend); + return errcode; +} + +/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */ +static const struct kbase_hwcnt_metadata * +kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info) +{ + if (!info) + return NULL; + + return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata; +} + +/* JM backend implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode; + struct kbase_hwcnt_backend_jm *backend = NULL; + + if (!info || !out_backend) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info, + &backend); + if (errcode) + return errcode; + + *out_backend = (struct kbase_hwcnt_backend *)backend; + + return 0; +} + +/* JM backend implementation of kbase_hwcnt_backend_term_fn */ +static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) +{ + if (!backend) + return; + + kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); +} + +/** + * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info. + * @info: Pointer to info to destroy. + * + * Can be safely called on a backend info in any state of partial construction. + */ +static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info) +{ + if (!info) + return; + + kbase_hwcnt_jm_metadata_destroy(info->metadata); + kfree(info); +} + +/** + * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info. + * @kbdev: Non_NULL pointer to kbase device. + * @out_info: Non-NULL pointer to where info is stored on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev, + const struct kbase_hwcnt_backend_jm_info **out_info) +{ + int errcode = -ENOMEM; + struct kbase_hwcnt_backend_jm_info *info = NULL; + + WARN_ON(!kbdev); + WARN_ON(!out_info); + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return errcode; + + info->kbdev = kbdev; + +#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY) + info->counter_set = KBASE_HWCNT_SET_SECONDARY; +#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY) + info->counter_set = KBASE_HWCNT_SET_TERTIARY; +#else + /* Default to primary */ + info->counter_set = KBASE_HWCNT_SET_PRIMARY; +#endif + + errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info); + if (errcode) + goto error; + + errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set, + &info->metadata, &info->dump_bytes); + if (errcode) + goto error; + + *out_info = info; + + return 0; +error: + kbasep_hwcnt_backend_jm_info_destroy(info); + return errcode; +} + +int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface) +{ + int errcode; + const struct kbase_hwcnt_backend_jm_info *info = NULL; + + if (!kbdev || !iface) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info); + + if (errcode) + return errcode; + + iface->info = (struct kbase_hwcnt_backend_info *)info; + iface->metadata = kbasep_hwcnt_backend_jm_metadata; + iface->init = kbasep_hwcnt_backend_jm_init; + iface->term = kbasep_hwcnt_backend_jm_term; + iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns; + iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable; + iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock; + iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable; + iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear; + iface->dump_request = kbasep_hwcnt_backend_jm_dump_request; + iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait; + iface->dump_get = kbasep_hwcnt_backend_jm_dump_get; + + return 0; +} + +void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface) + return; + + kbasep_hwcnt_backend_jm_info_destroy( + (const struct kbase_hwcnt_backend_jm_info *)iface->info); + memset(iface, 0, sizeof(*iface)); +} diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h new file mode 100644 index 0000000..4a6293c --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of mali_kbase_hwcnt_backend interface for JM + * backend. + */ + +#ifndef _KBASE_HWCNT_BACKEND_JM_H_ +#define _KBASE_HWCNT_BACKEND_JM_H_ + +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" + +struct kbase_device; + +/** + * kbase_hwcnt_backend_jm_create() - Create a JM hardware counter backend + * interface. + * @kbdev: Non-NULL pointer to kbase device. + * @iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev, + struct kbase_hwcnt_backend_interface *iface); + +/** + * kbase_hwcnt_backend_jm_destroy() - Destroy a JM hardware counter backend + * interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_JM_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c new file mode 100644 index 0000000..a8654ea --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -0,0 +1,829 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include <mali_kbase.h> + +#include <hwcnt/mali_kbase_hwcnt_gpu.h> +#include <hwcnt/mali_kbase_hwcnt_types.h> + +#include <hwcnt/backend/mali_kbase_hwcnt_backend.h> +#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h> +#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h> + +#if IS_ENABLED(CONFIG_MALI_IS_FPGA) && !IS_ENABLED(CONFIG_MALI_NO_MALI) +/* Backend watch dog timer interval in milliseconds: 18 seconds. */ +static const u32 hwcnt_backend_watchdog_timer_interval_ms = 18000; +#else +/* Backend watch dog timer interval in milliseconds: 1 second. */ +static const u32 hwcnt_backend_watchdog_timer_interval_ms = 1000; +#endif /* IS_FPGA && !NO_MALI */ + +/* + * IDLE_BUFFER_EMPTY -> USER_DUMPING_BUFFER_EMPTY on dump_request. + * IDLE_BUFFER_EMPTY -> TIMER_DUMPING after + * hwcnt_backend_watchdog_timer_interval_ms + * milliseconds, if no dump_request has been + * called in the meantime. + * IDLE_BUFFER_FULL -> USER_DUMPING_BUFFER_FULL on dump_request. + * IDLE_BUFFER_FULL -> TIMER_DUMPING after + * hwcnt_backend_watchdog_timer_interval_ms + * milliseconds, if no dump_request has been + * called in the meantime. + * IDLE_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_disable, upon discarding undumped + * counter values since the last dump_get. + * IDLE_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager + * backend dump_clear. + * IDLE_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager + * backend dump_clear. + * USER_DUMPING_BUFFER_EMPTY -> BUFFER_CLEARING on dump_clear, before calling job manager + * backend dump_clear. + * USER_DUMPING_BUFFER_FULL -> BUFFER_CLEARING on dump_clear, before calling job manager + * backend dump_clear. + * BUFFER_CLEARING -> IDLE_BUFFER_EMPTY on dump_clear, upon job manager backend + * dump_clear completion. + * TIMER_DUMPING -> IDLE_BUFFER_FULL on timer's callback completion. + * TIMER_DUMPING -> TIMER_DUMPING_USER_CLEAR on dump_clear, notifies the callback thread + * that there is no need for dumping the buffer + * anymore, and that the client will proceed + * clearing the buffer. + * TIMER_DUMPING_USER_CLEAR -> IDLE_BUFFER_EMPTY on timer's callback completion, when a user + * requested a dump_clear. + * TIMER_DUMPING -> TIMER_DUMPING_USER_REQUESTED on dump_request, when a client performs a + * dump request while the timer is dumping (the + * timer will perform the dump and (once + * completed) the client will retrieve the value + * from the buffer). + * TIMER_DUMPING_USER_REQUESTED -> IDLE_BUFFER_EMPTY on dump_get, when a timer completed and the + * user reads the periodic dump buffer. + * Any -> ERROR if the job manager backend returns an error + * (of any kind). + * USER_DUMPING_BUFFER_EMPTY -> IDLE_BUFFER_EMPTY on dump_get (performs get, ignores the + * periodic dump buffer and returns). + * USER_DUMPING_BUFFER_FULL -> IDLE_BUFFER_EMPTY on dump_get (performs get, accumulates with + * periodic dump buffer and returns). + */ + +/** enum backend_watchdog_state State used to synchronize timer callbacks with the main thread. + * @HWCNT_JM_WD_ERROR: Received an error from the job manager backend calls. + * @HWCNT_JM_WD_IDLE_BUFFER_EMPTY: Initial state. Watchdog timer enabled, periodic dump buffer is + * empty. + * @HWCNT_JM_WD_IDLE_BUFFER_FULL: Watchdog timer enabled, periodic dump buffer is full. + * @HWCNT_JM_WD_BUFFER_CLEARING: The client is performing a dump clear. A concurrent timer callback + * thread should just ignore and reschedule another callback in + * hwcnt_backend_watchdog_timer_interval_ms milliseconds. + * @HWCNT_JM_WD_TIMER_DUMPING: The timer ran out. The callback is performing a periodic dump. + * @HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: While the timer is performing a periodic dump, user + * requested a dump. + * @HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR: While the timer is performing a dump, user requested a + * dump_clear. The timer has to complete the periodic dump + * and clear buffer (internal and job manager backend). + * @HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: From IDLE state, user requested a dump. The periodic + * dump buffer is empty. + * @HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: From IDLE state, user requested a dump. The periodic dump + * buffer is full. + * + * While the state machine is in HWCNT_JM_WD_TIMER_DUMPING*, only the timer callback thread is + * allowed to call the job manager backend layer. + */ +enum backend_watchdog_state { + HWCNT_JM_WD_ERROR, + HWCNT_JM_WD_IDLE_BUFFER_EMPTY, + HWCNT_JM_WD_IDLE_BUFFER_FULL, + HWCNT_JM_WD_BUFFER_CLEARING, + HWCNT_JM_WD_TIMER_DUMPING, + HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, + HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR, + HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY, + HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL, +}; + +/** enum wd_init_state - State machine for initialization / termination of the backend resources + */ +enum wd_init_state { + HWCNT_JM_WD_INIT_START, + HWCNT_JM_WD_INIT_BACKEND = HWCNT_JM_WD_INIT_START, + HWCNT_JM_WD_INIT_ENABLE_MAP, + HWCNT_JM_WD_INIT_DUMP_BUFFER, + HWCNT_JM_WD_INIT_END +}; + +/** + * struct kbase_hwcnt_backend_jm_watchdog_info - Immutable information used to initialize an + * instance of the job manager watchdog backend. + * @jm_backend_iface: Hardware counter backend interface. This module extends + * this interface with a watchdog that performs regular + * dumps. The new interface this module provides complies + * with the old backend interface. + * @dump_watchdog_iface: Dump watchdog interface, used to periodically dump the + * hardware counter in case no reads are requested within + * a certain time, used to avoid hardware counter's buffer + * saturation. + */ +struct kbase_hwcnt_backend_jm_watchdog_info { + struct kbase_hwcnt_backend_interface *jm_backend_iface; + struct kbase_hwcnt_watchdog_interface *dump_watchdog_iface; +}; + +/** + * struct kbase_hwcnt_backend_jm_watchdog - An instance of the job manager watchdog backend. + * @info: Immutable information used to create the job manager watchdog backend. + * @jm_backend: Job manager's backend internal state. To be passed as argument during parent calls. + * @timeout_ms: Time period in milliseconds for hardware counters dumping. + * @wd_dump_buffer: Used to store periodic dumps done by a timer callback function. Contents are + * valid in state %HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED, + * %HWCNT_JM_WD_IDLE_BUFFER_FULL or %HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL. + * @wd_enable_map: Watchdog backend internal buffer mask, initialized during dump_enable copying + * the enable_map passed as argument. + * @wd_dump_timestamp: Holds the dumping timestamp for potential future client dump_request, filled + * during watchdog timer dumps. + * @watchdog_complete: Used for synchronization between watchdog dumper thread and client calls. + * @locked: Members protected from concurrent access by different threads. + * @locked.watchdog_lock: Lock used to access fields within this struct (that require mutual + * exclusion). + * @locked.is_enabled: If true then the wrapped job manager hardware counter backend and the + * watchdog timer are both enabled. If false then both are disabled (or soon + * will be). Races between enable and disable have undefined behavior. + * @locked.state: State used to synchronize timer callbacks with the main thread. + */ +struct kbase_hwcnt_backend_jm_watchdog { + const struct kbase_hwcnt_backend_jm_watchdog_info *info; + struct kbase_hwcnt_backend *jm_backend; + u32 timeout_ms; + struct kbase_hwcnt_dump_buffer wd_dump_buffer; + struct kbase_hwcnt_enable_map wd_enable_map; + u64 wd_dump_timestamp; + struct completion watchdog_complete; + struct { + spinlock_t watchdog_lock; + bool is_enabled; + enum backend_watchdog_state state; + } locked; +}; + +/* timer's callback function */ +static void kbasep_hwcnt_backend_jm_watchdog_timer_callback(void *backend) +{ + struct kbase_hwcnt_backend_jm_watchdog *wd_backend = backend; + unsigned long flags; + bool wd_accumulate; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + if (!wd_backend->locked.is_enabled || wd_backend->locked.state == HWCNT_JM_WD_ERROR) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return; + } + + if (!(wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_EMPTY || + wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL)) { + /*resetting the timer. Calling modify on a disabled timer enables it.*/ + wd_backend->info->dump_watchdog_iface->modify( + wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return; + } + /*start performing the dump*/ + + /* if there has been a previous timeout use accumulating dump_get() + * otherwise use non-accumulating to overwrite buffer + */ + wd_accumulate = (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL); + + wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING; + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + if (wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, + &wd_backend->wd_dump_timestamp) || + wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend) || + wd_backend->info->jm_backend_iface->dump_get( + wd_backend->jm_backend, &wd_backend->wd_dump_buffer, &wd_backend->wd_enable_map, + wd_accumulate)) { + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && + wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && + wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); + wd_backend->locked.state = HWCNT_JM_WD_ERROR; + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + /* Unblock user if it's waiting. */ + complete_all(&wd_backend->watchdog_complete); + return; + } + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING && + wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && + wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); + + if (wd_backend->locked.state == HWCNT_JM_WD_TIMER_DUMPING) { + /* If there is no user request/clear, transit to HWCNT_JM_WD_IDLE_BUFFER_FULL + * to indicate timer dump is done and the buffer is full. If state changed to + * HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED or + * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR then user will transit the state + * machine to next state. + */ + wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_FULL; + } + if (wd_backend->locked.state != HWCNT_JM_WD_ERROR && wd_backend->locked.is_enabled) { + /* reset the timer to schedule another callback. Calling modify on a + * disabled timer enables it. + */ + /*The spin lock needs to be held in case the client calls dump_enable*/ + wd_backend->info->dump_watchdog_iface->modify( + wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms); + } + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + /* Unblock user if it's waiting. */ + complete_all(&wd_backend->watchdog_complete); +} + +/* helper methods, info structure creation and destruction*/ + +static struct kbase_hwcnt_backend_jm_watchdog_info * +kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interface *backend_iface, + struct kbase_hwcnt_watchdog_interface *watchdog_iface) +{ + struct kbase_hwcnt_backend_jm_watchdog_info *const info = + kmalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return NULL; + + *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface, + .dump_watchdog_iface = + watchdog_iface }; + + return info; +} + +/****** kbase_hwcnt_backend_interface implementation *******/ + +/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_metadata_fn */ +static const struct kbase_hwcnt_metadata * +kbasep_hwcnt_backend_jm_watchdog_metadata(const struct kbase_hwcnt_backend_info *info) +{ + const struct kbase_hwcnt_backend_jm_watchdog_info *wd_info = (void *)info; + + if (WARN_ON(!info)) + return NULL; + + return wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); +} + +static void +kbasep_hwcnt_backend_jm_watchdog_term_partial(struct kbase_hwcnt_backend_jm_watchdog *wd_backend, + enum wd_init_state state) +{ + if (!wd_backend) + return; + + WARN_ON(state > HWCNT_JM_WD_INIT_END); + + while (state-- > HWCNT_JM_WD_INIT_START) { + switch (state) { + case HWCNT_JM_WD_INIT_BACKEND: + wd_backend->info->jm_backend_iface->term(wd_backend->jm_backend); + break; + case HWCNT_JM_WD_INIT_ENABLE_MAP: + kbase_hwcnt_enable_map_free(&wd_backend->wd_enable_map); + break; + case HWCNT_JM_WD_INIT_DUMP_BUFFER: + kbase_hwcnt_dump_buffer_free(&wd_backend->wd_dump_buffer); + break; + case HWCNT_JM_WD_INIT_END: + break; + } + } + + kfree(wd_backend); +} + +/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_term_fn + * Calling term does *not* destroy the interface + */ +static void kbasep_hwcnt_backend_jm_watchdog_term(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm_watchdog *wd_backend = + (struct kbase_hwcnt_backend_jm_watchdog *)backend; + + if (!backend) + return; + + /* disable timer thread to avoid concurrent access to shared resources */ + wd_backend->info->dump_watchdog_iface->disable( + wd_backend->info->dump_watchdog_iface->timer); + + kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, HWCNT_JM_WD_INIT_END); +} + +/* Job manager watchdog backend, implementation of kbase_hwcnt_backend_init_fn */ +static int kbasep_hwcnt_backend_jm_watchdog_init(const struct kbase_hwcnt_backend_info *info, + struct kbase_hwcnt_backend **out_backend) +{ + int errcode = 0; + struct kbase_hwcnt_backend_jm_watchdog *wd_backend = NULL; + struct kbase_hwcnt_backend_jm_watchdog_info *const wd_info = (void *)info; + const struct kbase_hwcnt_backend_info *jm_info; + const struct kbase_hwcnt_metadata *metadata; + enum wd_init_state state = HWCNT_JM_WD_INIT_START; + + if (WARN_ON(!info) || WARN_ON(!out_backend)) + return -EINVAL; + + jm_info = wd_info->jm_backend_iface->info; + metadata = wd_info->jm_backend_iface->metadata(wd_info->jm_backend_iface->info); + + wd_backend = kmalloc(sizeof(*wd_backend), GFP_KERNEL); + if (!wd_backend) { + *out_backend = NULL; + return -ENOMEM; + } + + *wd_backend = (struct kbase_hwcnt_backend_jm_watchdog){ + .info = wd_info, + .timeout_ms = hwcnt_backend_watchdog_timer_interval_ms, + .locked = { .state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY, .is_enabled = false } + }; + + while (state < HWCNT_JM_WD_INIT_END && !errcode) { + switch (state) { + case HWCNT_JM_WD_INIT_BACKEND: + errcode = wd_info->jm_backend_iface->init(jm_info, &wd_backend->jm_backend); + break; + case HWCNT_JM_WD_INIT_ENABLE_MAP: + errcode = + kbase_hwcnt_enable_map_alloc(metadata, &wd_backend->wd_enable_map); + break; + case HWCNT_JM_WD_INIT_DUMP_BUFFER: + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, + &wd_backend->wd_dump_buffer); + break; + case HWCNT_JM_WD_INIT_END: + break; + } + if (!errcode) + state++; + } + + if (errcode) { + kbasep_hwcnt_backend_jm_watchdog_term_partial(wd_backend, state); + *out_backend = NULL; + return errcode; + } + + WARN_ON(state != HWCNT_JM_WD_INIT_END); + + spin_lock_init(&wd_backend->locked.watchdog_lock); + init_completion(&wd_backend->watchdog_complete); + + *out_backend = (struct kbase_hwcnt_backend *)wd_backend; + return 0; +} + +/* Job manager watchdog backend, implementation of timestamp_ns */ +static u64 kbasep_hwcnt_backend_jm_watchdog_timestamp_ns(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + + return wd_backend->info->jm_backend_iface->timestamp_ns(wd_backend->jm_backend); +} + +static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( + struct kbase_hwcnt_backend_jm_watchdog *wd_backend, + const struct kbase_hwcnt_enable_map *enable_map, kbase_hwcnt_backend_dump_enable_fn enabler) +{ + int errcode = -EPERM; + unsigned long flags; + + if (WARN_ON(!wd_backend) || WARN_ON(!enable_map)) + return -EINVAL; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + /* If the backend is already enabled return an error */ + if (wd_backend->locked.is_enabled) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return -EPERM; + } + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + /*We copy the enable map into our watchdog backend copy, for future usage*/ + kbase_hwcnt_enable_map_copy(&wd_backend->wd_enable_map, enable_map); + + errcode = enabler(wd_backend->jm_backend, enable_map); + if (!errcode) { + /*Enable dump watchdog*/ + errcode = wd_backend->info->dump_watchdog_iface->enable( + wd_backend->info->dump_watchdog_iface->timer, wd_backend->timeout_ms, + kbasep_hwcnt_backend_jm_watchdog_timer_callback, wd_backend); + if (!errcode) { + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + WARN_ON(wd_backend->locked.is_enabled); + wd_backend->locked.is_enabled = true; + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + } else + /*Reverting the job manager backend back to disabled*/ + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); + } + + return errcode; +} + +/* Job manager watchdog backend, implementation of dump_enable */ +static int +kbasep_hwcnt_backend_jm_watchdog_dump_enable(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + + return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( + wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable); +} + +/* Job manager watchdog backend, implementation of dump_enable_nolock */ +static int +kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *backend, + const struct kbase_hwcnt_enable_map *enable_map) +{ + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + + return kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( + wd_backend, enable_map, wd_backend->info->jm_backend_iface->dump_enable_nolock); +} + +/* Job manager watchdog backend, implementation of dump_disable */ +static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend) +{ + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + unsigned long flags; + + if (WARN_ON(!backend)) + return; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + if (!wd_backend->locked.is_enabled) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return; + } + + wd_backend->locked.is_enabled = false; + + /* Discard undumped counter values since the last dump_get. */ + if (wd_backend->locked.state == HWCNT_JM_WD_IDLE_BUFFER_FULL) + wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + wd_backend->info->dump_watchdog_iface->disable( + wd_backend->info->dump_watchdog_iface->timer); + + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); +} + +/* Job manager watchdog backend, implementation of dump_clear */ +static int kbasep_hwcnt_backend_jm_watchdog_dump_clear(struct kbase_hwcnt_backend *backend) +{ + int errcode = -EPERM; + bool clear_wd_wait_completion = false; + unsigned long flags; + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + + if (WARN_ON(!backend)) + return -EINVAL; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + if (!wd_backend->locked.is_enabled) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return -EPERM; + } + + switch (wd_backend->locked.state) { + case HWCNT_JM_WD_IDLE_BUFFER_FULL: + case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: + case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: + case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: + wd_backend->locked.state = HWCNT_JM_WD_BUFFER_CLEARING; + errcode = 0; + break; + case HWCNT_JM_WD_TIMER_DUMPING: + /* The timer asked for a dump request, when complete, the job manager backend + * buffer will be zero + */ + clear_wd_wait_completion = true; + /* This thread will have to wait for the callback to terminate and then call a + * dump_clear on the job manager backend. We change the state to + * HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR to notify the callback thread there is + * no more need to dump the buffer (since we will clear it right after anyway). + * We set up a wait queue to synchronize with the callback. + */ + reinit_completion(&wd_backend->watchdog_complete); + wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR; + errcode = 0; + break; + default: + errcode = -EPERM; + break; + } + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + if (!errcode) { + if (clear_wd_wait_completion) { + /* Waiting for the callback to finish */ + wait_for_completion(&wd_backend->watchdog_complete); + } + + /* Clearing job manager backend buffer */ + errcode = wd_backend->info->jm_backend_iface->dump_clear(wd_backend->jm_backend); + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_CLEAR && + wd_backend->locked.state != HWCNT_JM_WD_BUFFER_CLEARING && + wd_backend->locked.state != HWCNT_JM_WD_ERROR); + + WARN_ON(!wd_backend->locked.is_enabled); + + if (!errcode && wd_backend->locked.state != HWCNT_JM_WD_ERROR) { + /* Setting the internal buffer state to EMPTY */ + wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; + /* Resetting the timer. Calling modify on a disabled timer + * enables it. + */ + wd_backend->info->dump_watchdog_iface->modify( + wd_backend->info->dump_watchdog_iface->timer, + wd_backend->timeout_ms); + } else { + wd_backend->locked.state = HWCNT_JM_WD_ERROR; + errcode = -EPERM; + } + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + } + + return errcode; +} + +/* Job manager watchdog backend, implementation of dump_request */ +static int kbasep_hwcnt_backend_jm_watchdog_dump_request(struct kbase_hwcnt_backend *backend, + u64 *dump_time_ns) +{ + bool call_dump_request = false; + int errcode = 0; + unsigned long flags; + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + + if (WARN_ON(!backend) || WARN_ON(!dump_time_ns)) + return -EINVAL; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + if (!wd_backend->locked.is_enabled) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return -EPERM; + } + + switch (wd_backend->locked.state) { + case HWCNT_JM_WD_IDLE_BUFFER_EMPTY: + /* progressing the state to avoid callbacks running while calling the job manager + * backend + */ + wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY; + call_dump_request = true; + break; + case HWCNT_JM_WD_IDLE_BUFFER_FULL: + wd_backend->locked.state = HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL; + call_dump_request = true; + break; + case HWCNT_JM_WD_TIMER_DUMPING: + /* Retrieve timing information from previous dump_request */ + *dump_time_ns = wd_backend->wd_dump_timestamp; + /* On the next client call (dump_wait) the thread will have to wait for the + * callback to finish the dumping. + * We set up a wait queue to synchronize with the callback. + */ + reinit_completion(&wd_backend->watchdog_complete); + wd_backend->locked.state = HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED; + break; + default: + errcode = -EPERM; + break; + } + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + if (call_dump_request) { + errcode = wd_backend->info->jm_backend_iface->dump_request(wd_backend->jm_backend, + dump_time_ns); + if (!errcode) { + /*resetting the timer. Calling modify on a disabled timer enables it*/ + wd_backend->info->dump_watchdog_iface->modify( + wd_backend->info->dump_watchdog_iface->timer, + wd_backend->timeout_ms); + } else { + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + WARN_ON(!wd_backend->locked.is_enabled); + wd_backend->locked.state = HWCNT_JM_WD_ERROR; + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + } + } + + return errcode; +} + +/* Job manager watchdog backend, implementation of dump_wait */ +static int kbasep_hwcnt_backend_jm_watchdog_dump_wait(struct kbase_hwcnt_backend *backend) +{ + int errcode = -EPERM; + bool wait_for_auto_dump = false, wait_for_user_dump = false; + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + unsigned long flags; + + if (WARN_ON(!backend)) + return -EINVAL; + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + if (!wd_backend->locked.is_enabled) { + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + return -EPERM; + } + + switch (wd_backend->locked.state) { + case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: + wait_for_auto_dump = true; + errcode = 0; + break; + case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: + case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: + wait_for_user_dump = true; + errcode = 0; + break; + default: + errcode = -EPERM; + break; + } + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + if (wait_for_auto_dump) + wait_for_completion(&wd_backend->watchdog_complete); + else if (wait_for_user_dump) { + errcode = wd_backend->info->jm_backend_iface->dump_wait(wd_backend->jm_backend); + if (errcode) { + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + WARN_ON(!wd_backend->locked.is_enabled); + wd_backend->locked.state = HWCNT_JM_WD_ERROR; + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + } + } + + return errcode; +} + +/* Job manager watchdog backend, implementation of dump_get */ +static int kbasep_hwcnt_backend_jm_watchdog_dump_get( + struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map, bool accumulate) +{ + bool call_dump_get = false; + struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; + unsigned long flags; + int errcode = 0; + + if (WARN_ON(!backend) || WARN_ON(!dump_buffer) || WARN_ON(!enable_map)) + return -EINVAL; + + /* The resultant contents of the dump buffer are only well defined if a prior + * call to dump_wait returned successfully, and a new dump has not yet been + * requested by a call to dump_request. + */ + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + switch (wd_backend->locked.state) { + case HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED: + /*we assume dump_wait has been called and completed successfully*/ + if (accumulate) + kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, + enable_map); + else + kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, + enable_map); + + /*use state to indicate the the buffer is now empty*/ + wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; + break; + case HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL: + /*accumulate or copy watchdog data to user buffer first so that dump_get can set + * the header correctly + */ + if (accumulate) + kbase_hwcnt_dump_buffer_accumulate(dump_buffer, &wd_backend->wd_dump_buffer, + enable_map); + else + kbase_hwcnt_dump_buffer_copy(dump_buffer, &wd_backend->wd_dump_buffer, + enable_map); + + /*accumulate backend data into user buffer on top of watchdog data*/ + accumulate = true; + call_dump_get = true; + break; + case HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY: + call_dump_get = true; + break; + default: + errcode = -EPERM; + break; + } + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + + if (call_dump_get && !errcode) { + /*we just dump the job manager backend into the user buffer, following + *accumulate flag + */ + errcode = wd_backend->info->jm_backend_iface->dump_get( + wd_backend->jm_backend, dump_buffer, enable_map, accumulate); + + spin_lock_irqsave(&wd_backend->locked.watchdog_lock, flags); + + WARN_ON(wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_EMPTY && + wd_backend->locked.state != HWCNT_JM_WD_USER_DUMPING_BUFFER_FULL && + wd_backend->locked.state != HWCNT_JM_WD_TIMER_DUMPING_USER_REQUESTED); + + if (!errcode) + wd_backend->locked.state = HWCNT_JM_WD_IDLE_BUFFER_EMPTY; + else + wd_backend->locked.state = HWCNT_JM_WD_ERROR; + + spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); + } + + return errcode; +} + +/* exposed methods */ + +int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, + struct kbase_hwcnt_watchdog_interface *watchdog_iface, + struct kbase_hwcnt_backend_interface *out_iface) +{ + struct kbase_hwcnt_backend_jm_watchdog_info *info = NULL; + + if (WARN_ON(!backend_iface) || WARN_ON(!watchdog_iface) || WARN_ON(!out_iface)) + return -EINVAL; + + info = kbasep_hwcnt_backend_jm_watchdog_info_create(backend_iface, watchdog_iface); + if (!info) + return -ENOMEM; + + /*linking the info table with the output iface, to allow the callbacks below to access the + *info object later on + */ + *out_iface = (struct kbase_hwcnt_backend_interface){ + .info = (void *)info, + .metadata = kbasep_hwcnt_backend_jm_watchdog_metadata, + .init = kbasep_hwcnt_backend_jm_watchdog_init, + .term = kbasep_hwcnt_backend_jm_watchdog_term, + .timestamp_ns = kbasep_hwcnt_backend_jm_watchdog_timestamp_ns, + .dump_enable = kbasep_hwcnt_backend_jm_watchdog_dump_enable, + .dump_enable_nolock = kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock, + .dump_disable = kbasep_hwcnt_backend_jm_watchdog_dump_disable, + .dump_clear = kbasep_hwcnt_backend_jm_watchdog_dump_clear, + .dump_request = kbasep_hwcnt_backend_jm_watchdog_dump_request, + .dump_wait = kbasep_hwcnt_backend_jm_watchdog_dump_wait, + .dump_get = kbasep_hwcnt_backend_jm_watchdog_dump_get + }; + + /*registering watchdog backend module methods on the output interface*/ + + return 0; +} + +void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface) +{ + if (!iface || !iface->info) + return; + + kfree((struct kbase_hwcnt_backend_jm_watchdog_info *)iface->info); + + /*blanking the watchdog backend interface*/ + memset(iface, 0, sizeof(*iface)); +} diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h new file mode 100644 index 0000000..02a7952 --- /dev/null +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of mali_kbase_hwcnt_backend interface for job manager + * backend. This module functionally interleaves between the hardware counter + * (hwcnt_accumulator) module (the interface consumer) and the job manager + * backend module (hwcnt_backend_jm). This module provides buffering + * functionality for the dumping requests requested by the hwcnt_accumulator + * consumer. This module is NOT multi-thread safe. The programmer must + * ensure the exposed methods are called by at most one thread at any time. + */ + +#ifndef _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ +#define _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ + +#include <hwcnt/backend/mali_kbase_hwcnt_backend.h> +#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h> + +/** + * kbase_hwcnt_backend_jm_watchdog_create() - Create a job manager hardware counter watchdog + * backend interface. + * @backend_iface: Non-NULL pointer to the backend interface structure that this module will + * extend. + * @watchdog_iface: Non-NULL pointer to an hardware counter watchdog interface. + * @out_iface: Non-NULL pointer to backend interface structure that is filled in + * on creation success. + * + * Calls to out_iface->dump_enable_nolock() require kbdev->hwaccess_lock held. + * + * Return: 0 on success, error otherwise. + */ +int kbase_hwcnt_backend_jm_watchdog_create(struct kbase_hwcnt_backend_interface *backend_iface, + struct kbase_hwcnt_watchdog_interface *watchdog_iface, + struct kbase_hwcnt_backend_interface *out_iface); + +/** + * kbase_hwcnt_backend_jm_watchdog_destroy() - Destroy a job manager hardware counter watchdog + * backend interface. + * @iface: Pointer to interface to destroy. + * + * Can be safely called on an all-zeroed interface, or on an already destroyed + * interface. + */ +void kbase_hwcnt_backend_jm_watchdog_destroy(struct kbase_hwcnt_backend_interface *iface); + +#endif /* _KBASE_HWCNT_BACKEND_JM_WATCHDOG_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c new file mode 100644 index 0000000..e724572 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c @@ -0,0 +1,775 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Implementation of hardware counter context and accumulator APIs. + */ + +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_accumulator.h" +#include "hwcnt/backend/mali_kbase_hwcnt_backend.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" + +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/slab.h> + +/** + * enum kbase_hwcnt_accum_state - Hardware counter accumulator states. + * @ACCUM_STATE_ERROR: Error state, where all accumulator operations fail. + * @ACCUM_STATE_DISABLED: Disabled state, where dumping is always disabled. + * @ACCUM_STATE_ENABLED: Enabled state, where dumping is enabled if there are + * any enabled counters. + */ +enum kbase_hwcnt_accum_state { ACCUM_STATE_ERROR, ACCUM_STATE_DISABLED, ACCUM_STATE_ENABLED }; + +/** + * struct kbase_hwcnt_accumulator - Hardware counter accumulator structure. + * @metadata: Pointer to immutable hwcnt metadata. + * @backend: Pointer to created counter backend. + * @state: The current state of the accumulator. + * - State transition from disabled->enabled or + * disabled->error requires state_lock. + * - State transition from enabled->disabled or + * enabled->error requires both accum_lock and + * state_lock. + * - Error state persists until next disable. + * @enable_map: The current set of enabled counters. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map_any_enabled. + * @enable_map_any_enabled: True if any counters in the map are enabled, else + * false. If true, and state is ACCUM_STATE_ENABLED, + * then the counter backend will be enabled. + * - Must only be modified while holding both + * accum_lock and state_lock. + * - Can be read while holding either lock. + * - Must stay in sync with enable_map. + * @scratch_map: Scratch enable map, used as temporary enable map + * storage during dumps. + * - Must only be read or modified while holding + * accum_lock. + * @accum_buf: Accumulation buffer, where dumps will be accumulated + * into on transition to a disable state. + * - Must only be read or modified while holding + * accum_lock. + * @accumulated: True if the accumulation buffer has been accumulated + * into and not subsequently read from yet, else false. + * - Must only be read or modified while holding + * accum_lock. + * @ts_last_dump_ns: Timestamp (ns) of the end time of the most recent + * dump that was requested by the user. + * - Must only be read or modified while holding + * accum_lock. + */ +struct kbase_hwcnt_accumulator { + const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_backend *backend; + enum kbase_hwcnt_accum_state state; + struct kbase_hwcnt_enable_map enable_map; + bool enable_map_any_enabled; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool accumulated; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_context - Hardware counter context structure. + * @iface: Pointer to hardware counter backend interface. + * @state_lock: Spinlock protecting state. + * @disable_count: Disable count of the context. Initialised to 1. + * Decremented when the accumulator is acquired, and incremented + * on release. Incremented on calls to + * kbase_hwcnt_context_disable[_atomic], and decremented on + * calls to kbase_hwcnt_context_enable. + * - Must only be read or modified while holding state_lock. + * @accum_lock: Mutex protecting accumulator. + * @accum_inited: Flag to prevent concurrent accumulator initialisation and/or + * termination. Set to true before accumulator initialisation, + * and false after accumulator termination. + * - Must only be modified while holding both accum_lock and + * state_lock. + * - Can be read while holding either lock. + * @accum: Hardware counter accumulator structure. + * @wq: Centralized workqueue for users of hardware counters to + * submit async hardware counter related work. Never directly + * called, but it's expected that a lot of the functions in this + * API will end up called from the enqueued async work. + */ +struct kbase_hwcnt_context { + const struct kbase_hwcnt_backend_interface *iface; + spinlock_t state_lock; + size_t disable_count; + struct mutex accum_lock; + bool accum_inited; + struct kbase_hwcnt_accumulator accum; + struct workqueue_struct *wq; +}; + +int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx) +{ + struct kbase_hwcnt_context *hctx = NULL; + + if (!iface || !out_hctx) + return -EINVAL; + + hctx = kzalloc(sizeof(*hctx), GFP_KERNEL); + if (!hctx) + goto err_alloc_hctx; + + hctx->iface = iface; + spin_lock_init(&hctx->state_lock); + hctx->disable_count = 1; + mutex_init(&hctx->accum_lock); + hctx->accum_inited = false; + + hctx->wq = alloc_workqueue("mali_kbase_hwcnt", WQ_HIGHPRI | WQ_UNBOUND, 0); + if (!hctx->wq) + goto err_alloc_workqueue; + + *out_hctx = hctx; + + return 0; + +err_alloc_workqueue: + kfree(hctx); +err_alloc_hctx: + return -ENOMEM; +} + +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return; + + /* Make sure we didn't leak the accumulator */ + WARN_ON(hctx->accum_inited); + + /* We don't expect any work to be pending on this workqueue. + * Regardless, this will safely drain and complete the work. + */ + destroy_workqueue(hctx->wq); + kfree(hctx); +} + +/** + * kbasep_hwcnt_accumulator_term() - Terminate the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_term(struct kbase_hwcnt_context *hctx) +{ + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + kbase_hwcnt_enable_map_free(&hctx->accum.scratch_map); + kbase_hwcnt_dump_buffer_free(&hctx->accum.accum_buf); + kbase_hwcnt_enable_map_free(&hctx->accum.enable_map); + hctx->iface->term(hctx->accum.backend); + memset(&hctx->accum, 0, sizeof(hctx->accum)); +} + +/** + * kbasep_hwcnt_accumulator_init() - Initialise the accumulator for the context. + * @hctx: Non-NULL pointer to hardware counter context. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_init(struct kbase_hwcnt_context *hctx) +{ + int errcode; + + WARN_ON(!hctx); + WARN_ON(!hctx->accum_inited); + + errcode = hctx->iface->init(hctx->iface->info, &hctx->accum.backend); + if (errcode) + goto error; + + hctx->accum.metadata = hctx->iface->metadata(hctx->iface->info); + hctx->accum.state = ACCUM_STATE_ERROR; + + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.enable_map); + if (errcode) + goto error; + + hctx->accum.enable_map_any_enabled = false; + + errcode = kbase_hwcnt_dump_buffer_alloc(hctx->accum.metadata, &hctx->accum.accum_buf); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc(hctx->accum.metadata, &hctx->accum.scratch_map); + if (errcode) + goto error; + + hctx->accum.accumulated = false; + + hctx->accum.ts_last_dump_ns = hctx->iface->timestamp_ns(hctx->accum.backend); + + return 0; + +error: + kbasep_hwcnt_accumulator_term(hctx); + return errcode; +} + +/** + * kbasep_hwcnt_accumulator_disable() - Transition the accumulator into the + * disabled state, from the enabled or + * error states. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, bool accumulate) +{ + int errcode = 0; + bool backend_enabled = false; + struct kbase_hwcnt_accumulator *accum; + unsigned long flags; + u64 dump_time_ns; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + WARN_ON(!hctx->accum_inited); + + accum = &hctx->accum; + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count != 0); + WARN_ON(hctx->accum.state == ACCUM_STATE_DISABLED); + + if ((hctx->accum.state == ACCUM_STATE_ENABLED) && (accum->enable_map_any_enabled)) + backend_enabled = true; + + if (!backend_enabled) + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Early out if the backend is not already enabled */ + if (!backend_enabled) + return; + + if (!accumulate) + goto disable; + + /* Try and accumulate before disabling */ + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto disable; + + errcode = hctx->iface->dump_get(accum->backend, &accum->accum_buf, &accum->enable_map, + accum->accumulated); + if (errcode) + goto disable; + + accum->accumulated = true; + +disable: + hctx->iface->dump_disable(accum->backend); + + /* Regardless of any errors during the accumulate, put the accumulator + * in the disabled state. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum.state = ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +/** + * kbasep_hwcnt_accumulator_enable() - Transition the accumulator into the + * enabled state, from the disabled state. + * @hctx: Non-NULL pointer to hardware counter context. + */ +static void kbasep_hwcnt_accumulator_enable(struct kbase_hwcnt_context *hctx) +{ + int errcode = 0; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->state_lock); + WARN_ON(!hctx->accum_inited); + WARN_ON(hctx->accum.state != ACCUM_STATE_DISABLED); + + accum = &hctx->accum; + + /* The backend only needs enabling if any counters are enabled */ + if (accum->enable_map_any_enabled) + errcode = hctx->iface->dump_enable_nolock(accum->backend, &accum->enable_map); + + if (!errcode) + accum->state = ACCUM_STATE_ENABLED; + else + accum->state = ACCUM_STATE_ERROR; +} + +/** + * kbasep_hwcnt_accumulator_dump() - Perform a dump with the most up-to-date + * values of enabled counters possible, and + * optionally update the set of enabled + * counters. + * @hctx: Non-NULL pointer to the hardware counter context + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * @new_map: Pointer to the new counter enable map. If non-NULL, must have + * the same metadata as the accumulator. If NULL, the set of + * enabled counters will be unchanged. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf, + const struct kbase_hwcnt_enable_map *new_map) +{ + int errcode = 0; + unsigned long flags; + enum kbase_hwcnt_accum_state state; + bool dump_requested = false; + bool dump_written = false; + bool cur_map_any_enabled; + struct kbase_hwcnt_enable_map *cur_map; + bool new_map_any_enabled = false; + u64 dump_time_ns; + struct kbase_hwcnt_accumulator *accum; + + WARN_ON(!hctx); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hctx->accum.metadata)); + WARN_ON(new_map && (new_map->metadata != hctx->accum.metadata)); + WARN_ON(!hctx->accum_inited); + lockdep_assert_held(&hctx->accum_lock); + + accum = &hctx->accum; + cur_map = &accum->scratch_map; + + /* Save out info about the current enable map */ + cur_map_any_enabled = accum->enable_map_any_enabled; + kbase_hwcnt_enable_map_copy(cur_map, &accum->enable_map); + + if (new_map) + new_map_any_enabled = kbase_hwcnt_enable_map_any_enabled(new_map); + + /* + * We're holding accum_lock, so the accumulator state might transition + * from disabled to enabled during this function (as enabling is lock + * free), but it will never disable (as disabling needs to hold the + * accum_lock), nor will it ever transition from enabled to error (as + * an enable while we're already enabled is impossible). + * + * If we're already disabled, we'll only look at the accumulation buffer + * rather than do a real dump, so a concurrent enable does not affect + * us. + * + * If a concurrent enable fails, we might transition to the error + * state, but again, as we're only looking at the accumulation buffer, + * it's not an issue. + */ + spin_lock_irqsave(&hctx->state_lock, flags); + + state = accum->state; + + /* + * Update the new map now, such that if an enable occurs during this + * dump then that enable will set the new map. If we're already enabled, + * then we'll do it ourselves after the dump. + */ + if (new_map) { + kbase_hwcnt_enable_map_copy(&accum->enable_map, new_map); + accum->enable_map_any_enabled = new_map_any_enabled; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + /* Error state, so early out. No need to roll back any map updates */ + if (state == ACCUM_STATE_ERROR) + return -EIO; + + /* Initiate the dump if the backend is enabled. */ + if ((state == ACCUM_STATE_ENABLED) && cur_map_any_enabled) { + if (dump_buf) { + errcode = hctx->iface->dump_request(accum->backend, &dump_time_ns); + dump_requested = true; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + errcode = hctx->iface->dump_clear(accum->backend); + } + + if (errcode) + goto error; + } else { + dump_time_ns = hctx->iface->timestamp_ns(accum->backend); + } + + /* Copy any accumulation into the dest buffer */ + if (accum->accumulated && dump_buf) { + kbase_hwcnt_dump_buffer_copy(dump_buf, &accum->accum_buf, cur_map); + dump_written = true; + } + + /* Wait for any requested dumps to complete */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_wait(accum->backend); + if (errcode) + goto error; + } + + /* If we're enabled and there's a new enable map, change the enabled set + * as soon after the dump has completed as possible. + */ + if ((state == ACCUM_STATE_ENABLED) && new_map) { + /* Backend is only enabled if there were any enabled counters */ + if (cur_map_any_enabled) + hctx->iface->dump_disable(accum->backend); + + /* (Re-)enable the backend if the new map has enabled counters. + * No need to acquire the spinlock, as concurrent enable while + * we're already enabled and holding accum_lock is impossible. + */ + if (new_map_any_enabled) { + errcode = hctx->iface->dump_enable(accum->backend, new_map); + if (errcode) + goto error; + } + } + + /* Copy, accumulate, or zero into the dest buffer to finish */ + if (dump_buf) { + /* If we dumped, copy or accumulate it into the destination */ + if (dump_requested) { + WARN_ON(state != ACCUM_STATE_ENABLED); + errcode = hctx->iface->dump_get(accum->backend, dump_buf, cur_map, + dump_written); + if (errcode) + goto error; + dump_written = true; + } + + /* If we've not written anything into the dump buffer so far, it + * means there was nothing to write. Zero any enabled counters. + */ + if (!dump_written) + kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + } + + /* Write out timestamps */ + *ts_start_ns = accum->ts_last_dump_ns; + *ts_end_ns = dump_time_ns; + + accum->accumulated = false; + accum->ts_last_dump_ns = dump_time_ns; + + return 0; +error: + /* An error was only physically possible if the backend was enabled */ + WARN_ON(state != ACCUM_STATE_ENABLED); + + /* Disable the backend, and transition to the error state */ + hctx->iface->dump_disable(accum->backend); + spin_lock_irqsave(&hctx->state_lock, flags); + + accum->state = ACCUM_STATE_ERROR; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return errcode; +} + +/** + * kbasep_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to hardware counter context. + * @accumulate: True if we should accumulate before disabling, else false. + */ +static void kbasep_hwcnt_context_disable(struct kbase_hwcnt_context *hctx, bool accumulate) +{ + unsigned long flags; + + WARN_ON(!hctx); + lockdep_assert_held(&hctx->accum_lock); + + if (!kbase_hwcnt_context_disable_atomic(hctx)) { + kbasep_hwcnt_accumulator_disable(hctx, accumulate); + + spin_lock_irqsave(&hctx->state_lock, flags); + + /* Atomic disable failed and we're holding the mutex, so current + * disable count must be 0. + */ + WARN_ON(hctx->disable_count != 0); + hctx->disable_count++; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + } +} + +int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum) +{ + int errcode = 0; + unsigned long flags; + + if (!hctx || !accum) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!hctx->accum_inited) + /* Set accum initing now to prevent concurrent init */ + hctx->accum_inited = true; + else + /* Already have an accum, or already being inited */ + errcode = -EBUSY; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + if (errcode) + return errcode; + + errcode = kbasep_hwcnt_accumulator_init(hctx); + + if (errcode) { + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); + + return errcode; + } + + spin_lock_irqsave(&hctx->state_lock, flags); + + WARN_ON(hctx->disable_count == 0); + WARN_ON(hctx->accum.enable_map_any_enabled); + + /* Decrement the disable count to allow the accumulator to be accessible + * now that it's fully constructed. + */ + hctx->disable_count--; + + /* + * Make sure the accumulator is initialised to the correct state. + * Regardless of initial state, counters don't need to be enabled via + * the backend, as the initial enable map has no enabled counters. + */ + hctx->accum.state = (hctx->disable_count == 0) ? ACCUM_STATE_ENABLED : ACCUM_STATE_DISABLED; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + *accum = &hctx->accum; + + return 0; +} + +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum) +{ + unsigned long flags; + struct kbase_hwcnt_context *hctx; + + if (!accum) + return; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + mutex_lock(&hctx->accum_lock); + + /* Double release is a programming error */ + WARN_ON(!hctx->accum_inited); + + /* Disable the context to ensure the accumulator is inaccesible while + * we're destroying it. This performs the corresponding disable count + * increment to the decrement done during acquisition. + */ + kbasep_hwcnt_context_disable(hctx, false); + + mutex_unlock(&hctx->accum_lock); + + kbasep_hwcnt_accumulator_term(hctx); + + mutex_lock(&hctx->accum_lock); + spin_lock_irqsave(&hctx->state_lock, flags); + + hctx->accum_inited = false; + + spin_unlock_irqrestore(&hctx->state_lock, flags); + mutex_unlock(&hctx->accum_lock); +} + +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx) +{ + if (WARN_ON(!hctx)) + return; + + /* Try and atomically disable first, so we can avoid locking the mutex + * if we don't need to. + */ + if (kbase_hwcnt_context_disable_atomic(hctx)) + return; + + mutex_lock(&hctx->accum_lock); + + kbasep_hwcnt_context_disable(hctx, true); + + mutex_unlock(&hctx->accum_lock); +} + +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + bool atomic_disabled = false; + + if (WARN_ON(!hctx)) + return false; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == SIZE_MAX)) { + /* + * If disable count is non-zero, we can just bump the disable + * count. + * + * Otherwise, we can't disable in an atomic context. + */ + if (hctx->disable_count != 0) { + hctx->disable_count++; + atomic_disabled = true; + } + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); + + return atomic_disabled; +} + +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx) +{ + unsigned long flags; + + if (WARN_ON(!hctx)) + return; + + spin_lock_irqsave(&hctx->state_lock, flags); + + if (!WARN_ON(hctx->disable_count == 0)) { + if (hctx->disable_count == 1) + kbasep_hwcnt_accumulator_enable(hctx); + + hctx->disable_count--; + } + + spin_unlock_irqrestore(&hctx->state_lock, flags); +} + +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx) +{ + if (!hctx) + return NULL; + + return hctx->iface->metadata(hctx->iface->info); +} + +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work) +{ + if (WARN_ON(!hctx) || WARN_ON(!work)) + return false; + + return queue_work(hctx->wq, work); +} + +int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !new_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if ((new_map->metadata != hctx->accum.metadata) || + (dump_buf && (dump_buf->metadata != hctx->accum.metadata))) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, new_map); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} + +int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_context *hctx; + + if (!accum || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + + if (dump_buf && (dump_buf->metadata != hctx->accum.metadata)) + return -EINVAL; + + mutex_lock(&hctx->accum_lock); + + errcode = kbasep_hwcnt_accumulator_dump(hctx, ts_start_ns, ts_end_ns, dump_buf, NULL); + + mutex_unlock(&hctx->accum_lock); + + return errcode; +} + +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum) +{ + struct kbase_hwcnt_context *hctx; + + if (WARN_ON(!accum)) + return 0; + + hctx = container_of(accum, struct kbase_hwcnt_context, accum); + return hctx->iface->timestamp_ns(accum->backend); +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h new file mode 100644 index 0000000..069e020 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_accumulator.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Hardware counter accumulator API. + */ + +#ifndef _KBASE_HWCNT_ACCUMULATOR_H_ +#define _KBASE_HWCNT_ACCUMULATOR_H_ + +#include <linux/types.h> + +struct kbase_hwcnt_context; +struct kbase_hwcnt_accumulator; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_accumulator_acquire() - Acquire the hardware counter accumulator + * for a hardware counter context. + * @hctx: Non-NULL pointer to a hardware counter context. + * @accum: Non-NULL pointer to where the pointer to the created accumulator + * will be stored on success. + * + * There can exist at most one instance of the hardware counter accumulator per + * context at a time. + * + * If multiple clients need access to the hardware counters at the same time, + * then an abstraction built on top of the single instance to the hardware + * counter accumulator is required. + * + * No counters will be enabled with the returned accumulator. A subsequent call + * to kbase_hwcnt_accumulator_set_counters must be used to turn them on. + * + * There are four components to a hardware counter dump: + * - A set of enabled counters + * - A start time + * - An end time + * - A dump buffer containing the accumulated counter values for all enabled + * counters between the start and end times. + * + * For each dump, it is guaranteed that all enabled counters were active for the + * entirety of the period between the start and end times. + * + * It is also guaranteed that the start time of dump "n" is always equal to the + * end time of dump "n - 1". + * + * For all dumps, the values of any counters that were not enabled is undefined. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_acquire(struct kbase_hwcnt_context *hctx, + struct kbase_hwcnt_accumulator **accum); + +/** + * kbase_hwcnt_accumulator_release() - Release a hardware counter accumulator. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * The accumulator must be released before the context the accumulator was + * created from is terminated. + */ +void kbase_hwcnt_accumulator_release(struct kbase_hwcnt_accumulator *accum); + +/** + * kbase_hwcnt_accumulator_set_counters() - Perform a dump of the currently + * enabled counters, and enable a new + * set of counters that will be used + * for subsequent dumps. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @new_map: Non-NULL pointer to the new counter enable map. Must have the + * same metadata as the accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_set_counters(struct kbase_hwcnt_accumulator *accum, + const struct kbase_hwcnt_enable_map *new_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_dump() - Perform a dump of the currently enabled + * counters. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * If this function fails for some unexpected reason (i.e. anything other than + * invalid args), then the accumulator will be put into the error state until + * the parent context is next disabled. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_accumulator_dump(struct kbase_hwcnt_accumulator *accum, u64 *ts_start_ns, + u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_accumulator_timestamp_ns() - Get the current accumulator backend + * timestamp. + * @accum: Non-NULL pointer to the hardware counter accumulator. + * + * Return: Accumulator backend timestamp in nanoseconds. + */ +u64 kbase_hwcnt_accumulator_timestamp_ns(struct kbase_hwcnt_accumulator *accum); + +#endif /* _KBASE_HWCNT_ACCUMULATOR_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h new file mode 100644 index 0000000..89732a9 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_context.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Hardware counter context API. + */ + +#ifndef _KBASE_HWCNT_CONTEXT_H_ +#define _KBASE_HWCNT_CONTEXT_H_ + +#include <linux/types.h> +#include <linux/workqueue.h> + +struct kbase_hwcnt_backend_interface; +struct kbase_hwcnt_context; + +/** + * kbase_hwcnt_context_init() - Initialise a hardware counter context. + * @iface: Non-NULL pointer to a hardware counter backend interface. + * @out_hctx: Non-NULL pointer to where the pointer to the created context will + * be stored on success. + * + * On creation, the disable count of the context will be 0. + * A hardware counter accumulator can be acquired using a created context. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_context_init(const struct kbase_hwcnt_backend_interface *iface, + struct kbase_hwcnt_context **out_hctx); + +/** + * kbase_hwcnt_context_term() - Terminate a hardware counter context. + * @hctx: Pointer to context to be terminated. + */ +void kbase_hwcnt_context_term(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_metadata() - Get the hardware counter metadata used by + * the context, so related counter data + * structures can be created. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata *kbase_hwcnt_context_metadata(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable() - Increment the disable count of the context. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * If a call to this function increments the disable count from 0 to 1, and + * an accumulator has been acquired, then a counter dump will be performed + * before counters are disabled via the backend interface. + * + * Subsequent dumps via the accumulator while counters are disabled will first + * return the accumulated dump, then will return dumps with zeroed counters. + * + * After this function call returns, it is guaranteed that counters will not be + * enabled via the backend interface. + */ +void kbase_hwcnt_context_disable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_disable_atomic() - Increment the disable count of the + * context if possible in an atomic + * context. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * This function will only succeed if hardware counters are effectively already + * disabled, i.e. there is no accumulator, the disable count is already + * non-zero, or the accumulator has no counters set. + * + * After this function call returns true, it is guaranteed that counters will + * not be enabled via the backend interface. + * + * Return: True if the disable count was incremented, else False. + */ +bool kbase_hwcnt_context_disable_atomic(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_enable() - Decrement the disable count of the context. + * @hctx: Non-NULL pointer to the hardware counter context. + * + * If a call to this function decrements the disable count from 1 to 0, and + * an accumulator has been acquired, then counters will be re-enabled via the + * backend interface. + * + * If an accumulator has been acquired and enabling counters fails for some + * reason, the accumulator will be placed into an error state. + * + * It is only valid to call this function one time for each prior returned call + * to kbase_hwcnt_context_disable. + * + * The spinlock documented in the backend interface that was passed in to + * kbase_hwcnt_context_init() must be held before calling this function. + */ +void kbase_hwcnt_context_enable(struct kbase_hwcnt_context *hctx); + +/** + * kbase_hwcnt_context_queue_work() - Queue hardware counter related async + * work on a workqueue specialized for + * hardware counters. + * @hctx: Non-NULL pointer to the hardware counter context. + * @work: Non-NULL pointer to work to queue. + * + * Return: false if work was already on a queue, true otherwise. + * + * Performance counter related work is high priority, short running, and + * generally CPU locality is unimportant. There is no standard workqueue that + * can service this flavor of work. + * + * Rather than have each user of counters define their own workqueue, we have + * a centralized one in here that anybody using this hardware counter API + * should use. + * + * Before the context is destroyed, all work submitted must have been completed. + * Given that the work enqueued via this function is likely to be hardware + * counter related and will therefore use the context object, this is likely + * to be behavior that will occur naturally. + * + * Historical note: prior to this centralized workqueue, the system_highpri_wq + * was used. This was generally fine, except when a particularly long running, + * higher priority thread ended up scheduled on the enqueuing CPU core. Given + * that hardware counters requires tight integration with power management, + * this meant progress through the power management states could be stalled + * for however long that higher priority thread took. + */ +bool kbase_hwcnt_context_queue_work(struct kbase_hwcnt_context *hctx, struct work_struct *work); + +#endif /* _KBASE_HWCNT_CONTEXT_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c new file mode 100644 index 0000000..74916da --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c @@ -0,0 +1,738 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" + +#include <linux/err.h> + +/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements + */ +enum enable_map_idx { + EM_LO, + EM_HI, + EM_COUNT, +}; + +static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; + break; + case KBASE_HWCNT_SET_SECONDARY: + if (is_csf) + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; + else + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; + break; + case KBASE_HWCNT_SET_TERTIARY: + if (is_csf) + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; + else + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; + break; + default: + WARN_ON(true); + } +} + +static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; + break; + case KBASE_HWCNT_SET_SECONDARY: + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; + break; + default: + WARN_ON(true); + } +} + +static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; + break; + case KBASE_HWCNT_SET_TERTIARY: + if (is_csf) + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; + else + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; + break; + default: + WARN_ON(true); + } +} + +static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; + break; + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; + break; + default: + WARN_ON(true); + } +} + +/** + * kbasep_hwcnt_backend_gpu_metadata_create() - Create hardware counter metadata + * for the GPU. + * @gpu_info: Non-NULL pointer to hwcnt info for current GPU. + * @is_csf: true for CSF GPU, otherwise false. + * @counter_set: The performance counter set to use. + * @metadata: Non-NULL pointer to where created metadata is stored + * on success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + const bool is_csf, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **metadata) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t non_sc_block_count; + size_t sc_block_count; + + WARN_ON(!gpu_info); + WARN_ON(!metadata); + + /* Calculate number of block instances that aren't shader cores */ + non_sc_block_count = 2 + gpu_info->l2_count; + /* Calculate number of block instances that are shader cores */ + sc_block_count = fls64(gpu_info->core_mask); + + /* + * A system can have up to 64 shader cores, but the 64-bit + * availability mask can't physically represent that many cores as well + * as the other hardware blocks. + * Error out if there are more blocks than our implementation can + * support. + */ + if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + return -EINVAL; + + /* One Front End block */ + kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); + blks[0].inst_cnt = 1; + blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* One Tiler block */ + kbasep_get_tiler_block_type(&blks[1].type, counter_set); + blks[1].inst_cnt = 1; + blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* l2_count memsys blks */ + kbasep_get_memsys_block_type(&blks[2].type, counter_set); + blks[2].inst_cnt = gpu_info->l2_count; + blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + /* + * There are as many shader cores in the system as there are bits set in + * the core mask. However, the dump buffer memory requirements need to + * take into account the fact that the core mask may be non-contiguous. + * + * For example, a system with a core mask of 0b1011 has the same dump + * buffer memory requirements as a system with 0b1111, but requires more + * memory than a system with 0b0111. However, core 2 of the system with + * 0b1011 doesn't physically exist, and the dump buffer memory that + * accounts for that core will never be written to when we do a counter + * dump. + * + * We find the core mask's last set bit to determine the memory + * requirements, and embed the core mask into the availability mask so + * we can determine later which shader cores physically exist. + */ + kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); + blks[3].inst_cnt = sc_block_count; + blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + + WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + + group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; + group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; + group.blks = blks; + + desc.grp_cnt = 1; + desc.grps = &group; + desc.clk_cnt = gpu_info->clk_cnt; + + /* The JM, Tiler, and L2s are always available, and are before cores */ + desc.avail_mask = (1ull << non_sc_block_count) - 1; + /* Embed the core mask directly in the availability mask */ + desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); + + return kbase_hwcnt_metadata_create(&desc, metadata); +} + +/** + * kbasep_hwcnt_backend_jm_dump_bytes() - Get the raw dump buffer size for the + * GPU. + * @gpu_info: Non-NULL pointer to hwcnt info for the GPU. + * + * Return: Size of buffer the GPU needs to perform a counter dump. + */ +static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_info *gpu_info) +{ + WARN_ON(!gpu_info); + + return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * + gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; +} + +int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + size_t dump_bytes; + + if (!gpu_info || !out_metadata || !out_dump_bytes) + return -EINVAL; + + /* + * For architectures where a max_config interface is available + * from the arbiter, the v5 dump bytes and the metadata v5 are + * based on the maximum possible allocation of the HW in the + * GPU cause it needs to be prepared for the worst case where + * all the available L2 cache and Shader cores are allocated. + */ + dump_bytes = kbasep_hwcnt_backend_jm_dump_bytes(gpu_info); + errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, false, counter_set, &metadata); + if (errcode) + return errcode; + + /* + * The physical dump size should be half of dump abstraction size in + * metadata since physical HW uses 32-bit per value but metadata + * specifies 64-bit per value. + */ + WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes); + + *out_metadata = metadata; + *out_dump_bytes = dump_bytes; + + return 0; +} + +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} + +int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata) +{ + int errcode; + const struct kbase_hwcnt_metadata *metadata; + + if (!gpu_info || !out_metadata) + return -EINVAL; + + errcode = kbasep_hwcnt_backend_gpu_metadata_create(gpu_info, true, counter_set, &metadata); + if (errcode) + return errcode; + + *out_metadata = metadata; + + return 0; +} + +void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + if (!metadata) + return; + + kbase_hwcnt_metadata_destroy(metadata); +} + +static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) +{ + bool is_shader = false; + + /* Warn on unknown group type */ + if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) + return false; + + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED) + is_shader = true; + + return is_shader; +} + +static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) +{ + bool is_l2_cache = false; + + switch (grp_type) { + case KBASE_HWCNT_GPU_GROUP_TYPE_V5: + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) + is_l2_cache = true; + break; + default: + /* Warn on unknown group type */ + WARN_ON(true); + } + + return is_l2_cache; +} + +int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + const u64 *dump_src = src; + size_t src_offset = 0; + u64 core_mask = pm_core_mask; + + /* Variables to deal with the current configuration */ + int l2_count = 0; + + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + const bool is_shader_core = is_block_type_shader( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); + const bool is_l2_cache = is_block_type_l2_cache( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + bool hw_res_available = true; + + /* + * If l2 blocks is greater than the current allocated number of + * L2 slices, there is no hw allocated to that block. + */ + if (is_l2_cache) { + l2_count++; + if (l2_count > curr_config->num_l2_slices) + hw_res_available = false; + else + hw_res_available = true; + } + /* + * For the shader cores, the current shader_mask allocated is + * always a subgroup of the maximum shader_mask, so after + * jumping any L2 cache not available the available shader cores + * will always have a matching set of blk instances available to + * accumulate them. + */ + else + hw_res_available = true; + + /* + * Skip block if no values in the destination block are enabled. + */ + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { + u64 *dst_blk = + kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = dump_src + src_offset; + bool blk_powered; + + if (!is_shader_core) { + /* Under the current PM system, counters will + * only be enabled after all non shader core + * blocks are powered up. + */ + blk_powered = true; + } else { + /* Check the PM core mask to see if the shader + * core is powered up. + */ + blk_powered = core_mask & 1; + } + + if (blk_powered && !is_undefined && hw_res_available) { + /* Only powered and defined blocks have valid data. */ + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, + hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } + } else { + /* Even though the block might be undefined, the + * user has enabled counter collection for it. + * We should not propagate garbage data. + */ + if (accumulate) { + /* No-op to preserve existing values */ + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, + (hdr_cnt + ctr_cnt)); + } + } + } + + /* Just increase the src_offset if the HW is available */ + if (hw_res_available) + src_offset += (hdr_cnt + ctr_cnt); + if (is_shader_core) + core_mask = core_mask >> 1; + } + + return 0; +} + +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) +{ + const struct kbase_hwcnt_metadata *metadata; + const u64 *dump_src = src; + size_t src_offset = 0; + size_t grp, blk, blk_inst; + + if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) + return -EINVAL; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + const size_t ctr_cnt = + kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined( + kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + + /* + * Skip block if no values in the destination block are enabled. + */ + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { + u64 *dst_blk = + kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = dump_src + src_offset; + + if (!is_undefined) { + if (accumulate) { + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, + hdr_cnt, ctr_cnt); + } else { + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, + (hdr_cnt + ctr_cnt)); + } + } else { + /* Even though the block might be undefined, the + * user has enabled counter collection for it. + * We should not propagate garbage data. + */ + if (accumulate) { + /* No-op to preserve existing values */ + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, + (hdr_cnt + ctr_cnt)); + } + } + } + + src_offset += (hdr_cnt + ctr_cnt); + } + + return 0; +} + +/** + * kbasep_hwcnt_backend_gpu_block_map_from_physical() - Convert from a physical + * block enable map to a + * block enable map + * abstraction. + * @phys: Physical 32-bit block enable map + * @lo: Non-NULL pointer to where low 64 bits of block enable map abstraction + * will be stored. + * @hi: Non-NULL pointer to where high 64 bits of block enable map abstraction + * will be stored. + */ +static inline void kbasep_hwcnt_backend_gpu_block_map_from_physical(u32 phys, u64 *lo, u64 *hi) +{ + u64 dwords[2] = { 0, 0 }; + + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u16 packed = phys >> (16 * dword_idx); + u64 dword = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u64 mask = (packed >> (hword_bit)) & 0x1; + + dword |= mask << (dword_bit + 0); + dword |= mask << (dword_bit + 1); + dword |= mask << (dword_bit + 2); + dword |= mask << (dword_bit + 3); + } + dwords[dword_idx] = dword; + } + *lo = dwords[0]; + *hi = dwords[1]; +} + +void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + u64 fe_bm[EM_COUNT] = { 0 }; + u64 shader_bm[EM_COUNT] = { 0 }; + u64 tiler_bm[EM_COUNT] = { 0 }; + u64 mmu_l2_bm[EM_COUNT] = { 0 }; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = src->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); + + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); + size_t map_idx; + + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; + + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + fe_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + shader_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm[map_idx] |= blk_map[map_idx]; + break; + default: + WARN_ON(true); + } + } + } else { + WARN_ON(true); + } + } + + dst->fe_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fe_bm[EM_LO], fe_bm[EM_HI]); + dst->shader_bm = + kbase_hwcnt_backend_gpu_block_map_to_physical(shader_bm[EM_LO], shader_bm[EM_HI]); + dst->tiler_bm = + kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]); + dst->mmu_l2_bm = + kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); +} + +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) +{ + switch (src) { + case KBASE_HWCNT_SET_PRIMARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_PRIMARY; + break; + case KBASE_HWCNT_SET_SECONDARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_SECONDARY; + break; + case KBASE_HWCNT_SET_TERTIARY: + *dst = KBASE_HWCNT_PHYSICAL_SET_TERTIARY; + break; + default: + WARN_ON(true); + } +} + +void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src) +{ + const struct kbase_hwcnt_metadata *metadata; + + u64 fe_bm[EM_COUNT] = { 0 }; + u64 shader_bm[EM_COUNT] = { 0 }; + u64 tiler_bm[EM_COUNT] = { 0 }; + u64 mmu_l2_bm[EM_COUNT] = { 0 }; + size_t grp, blk, blk_inst; + + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + metadata = dst->metadata; + + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO], + &shader_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO], + &tiler_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], + &mmu_l2_bm[EM_HI]); + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); + size_t map_idx; + + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; + + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + blk_map[map_idx] = fe_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + blk_map[map_idx] = tiler_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + blk_map[map_idx] = shader_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + blk_map[map_idx] = mmu_l2_bm[map_idx]; + break; + default: + WARN_ON(true); + } + } + } else { + WARN_ON(true); + } + } +} + +void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) + return; + + metadata = buf->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); + u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); + const u64 *blk_map = + kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + + if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); + u64 prfcnt_bm[EM_COUNT] = { 0 }; + u32 prfcnt_en = 0; + size_t map_idx; + + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; + + prfcnt_bm[map_idx] = blk_map[map_idx]; + } + + prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], + prfcnt_bm[EM_HI]); + + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; + } else { + WARN_ON(true); + } + } +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h new file mode 100644 index 0000000..a49c31e --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h @@ -0,0 +1,407 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_HWCNT_GPU_H_ +#define _KBASE_HWCNT_GPU_H_ + +#include <linux/bug.h> +#include <linux/types.h> + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/* Hardware counter version 5 definitions, V5 is the only supported version. */ +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 +#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 +#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ + (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) + +/* FrontEnd block count in V5 GPU hardware counter. */ +#define KBASE_HWCNT_V5_FE_BLOCK_COUNT 1 +/* Tiler block count in V5 GPU hardware counter. */ +#define KBASE_HWCNT_V5_TILER_BLOCK_COUNT 1 + +/* Index of the PRFCNT_EN header into a V5 counter block */ +#define KBASE_HWCNT_V5_PRFCNT_EN_HEADER 2 + +/* Number of bytes for each counter value in hardware. */ +#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32)) + +/** + * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to + * identify metadata groups. + * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. + */ +enum kbase_hwcnt_gpu_group_type { + KBASE_HWCNT_GPU_GROUP_TYPE_V5, +}; + +/** + * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, + * used to identify metadata blocks. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager + * or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: Secondary Front End block (Job + * manager or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: Tertiary Front End block (Job + * manager or CSF HW). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: Undefined Front End block + * (e.g. if a counter set that + * a block doesn't support is + * used). + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: Undefined Tiler block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: Secondary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: Tertiary Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: Undefined Shader Core block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. + */ +enum kbase_hwcnt_gpu_v5_block_type { + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, +}; + +/** + * enum kbase_hwcnt_set - GPU hardware counter sets + * @KBASE_HWCNT_SET_PRIMARY: The Primary set of counters + * @KBASE_HWCNT_SET_SECONDARY: The Secondary set of counters + * @KBASE_HWCNT_SET_TERTIARY: The Tertiary set of counters + * @KBASE_HWCNT_SET_UNDEFINED: Undefined set of counters + */ +enum kbase_hwcnt_set { + KBASE_HWCNT_SET_PRIMARY, + KBASE_HWCNT_SET_SECONDARY, + KBASE_HWCNT_SET_TERTIARY, + KBASE_HWCNT_SET_UNDEFINED = 255, +}; + +/** + * struct kbase_hwcnt_physical_enable_map - Representation of enable map + * directly used by GPU. + * @fe_bm: Front end (JM/CSHW) counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + */ +struct kbase_hwcnt_physical_enable_map { + u32 fe_bm; + u32 shader_bm; + u32 tiler_bm; + u32 mmu_l2_bm; +}; + +/* + * Values for Hardware Counter SET_SELECT value. + * Directly passed to HW. + */ +enum kbase_hwcnt_physical_set { + KBASE_HWCNT_PHYSICAL_SET_PRIMARY = 0, + KBASE_HWCNT_PHYSICAL_SET_SECONDARY = 1, + KBASE_HWCNT_PHYSICAL_SET_TERTIARY = 2, +}; + +/** + * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs. + * @l2_count: L2 cache count. + * @core_mask: Shader core mask. May be sparse. + * @clk_cnt: Number of clock domains available. + * @prfcnt_values_per_block: Total entries (header + counters) of performance + * counter per block. + */ +struct kbase_hwcnt_gpu_info { + size_t l2_count; + u64 core_mask; + u8 clk_cnt; + size_t prfcnt_values_per_block; +}; + +/** + * struct kbase_hwcnt_curr_config - Current Configuration of HW allocated to the + * GPU. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present: Current shader present bitmap that is allocated to the GPU. + * + * For architectures with the max_config interface available from the Arbiter, + * the current resources allocated may change during runtime due to a + * re-partitioning (possible with partition manager). Thus, the HWC needs to be + * prepared to report any possible set of counters. For this reason the memory + * layout in the userspace is based on the maximum possible allocation. On the + * other hand, each partition has just the view of its currently allocated + * resources. Therefore, it is necessary to correctly map the dumped HWC values + * from the registers into this maximum memory layout so that it can be exposed + * to the userspace side correctly. + * + * For L2 cache just the number is enough once the allocated ones will be + * accumulated on the first L2 slots available in the destination buffer. + * + * For the correct mapping of the shader cores it is necessary to jump all the + * L2 cache slots in the destination buffer that are not allocated. But, it is + * not necessary to add any logic to map the shader cores bitmap into the memory + * layout because the shader_present allocated will always be a subset of the + * maximum shader_present. It is possible because: + * 1 - Partitions are made of slices and they are always ordered from the ones + * with more shader cores to the ones with less. + * 2 - The shader cores in a slice are always contiguous. + * 3 - A partition can only have a contiguous set of slices allocated to it. + * So, for example, if 4 slices are available in total, 1 with 4 cores, 2 with + * 3 cores and 1 with 2 cores. The maximum possible shader_present would be: + * 0x0011|0111|0111|1111 -> note the order and that the shader cores are + * contiguous in any slice. + * Supposing that a partition takes the two slices in the middle, the current + * config shader_present for this partition would be: + * 0x0111|0111 -> note that this is a subset of the maximum above and the slices + * are contiguous. + * Therefore, by directly copying any subset of the maximum possible + * shader_present the mapping is already achieved. + */ +struct kbase_hwcnt_curr_config { + size_t num_l2_slices; + u64 shader_present; +}; + +/** + * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. + * + * @grp_type: Hardware counter group type. + * @blk_type: Hardware counter block type. + * + * Return: true if the block type is undefined, else false. + */ +static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, + const uint64_t blk_type) +{ + /* Warn on unknown group type */ + if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) + return false; + + return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED); +} + +/** + * kbase_hwcnt_jm_metadata_create() - Create hardware counter metadata for the + * JM GPUs. + * @info: Non-NULL pointer to info struct. + * @counter_set: The performance counter set used. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * @out_dump_bytes: Non-NULL pointer to where the size of the GPU counter dump + * buffer is stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata, + size_t *out_dump_bytes); + +/** + * kbase_hwcnt_jm_metadata_destroy() - Destroy JM GPU hardware counter metadata. + * + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_jm_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_csf_metadata_create() - Create hardware counter metadata for the + * CSF GPUs. + * @info: Non-NULL pointer to info struct. + * @counter_set: The performance counter set used. + * @out_metadata: Non-NULL pointer to where created metadata is stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_csf_metadata_create(const struct kbase_hwcnt_gpu_info *info, + enum kbase_hwcnt_set counter_set, + const struct kbase_hwcnt_metadata **out_metadata); + +/** + * kbase_hwcnt_csf_metadata_destroy() - Destroy CSF GPU hardware counter + * metadata. + * @metadata: Pointer to metadata to destroy. + */ +void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of destination dump + * buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @curr_config: Current allocated hardware resources to correctly map the + * source raw dump buffer to the destination dump buffer. + * @accumulate: True if counters in source should be accumulated into + * destination, rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, + const u64 pm_core_mask, + const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); + +/** + * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw + * dump buffer in src into the dump buffer + * abstraction in dst. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. + * + * The dst and dst_enable_map MUST have been created from the same metadata as + * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get + * the length of src. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, + const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); + +/** + * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block + * enable map abstraction to + * a physical block enable + * map. + * @lo: Low 64 bits of block enable map abstraction. + * @hi: High 64 bits of block enable map abstraction. + * + * The abstraction uses 128 bits to enable 128 block values, whereas the + * physical uses just 32 bits, as bit n enables values [n*4, n*4+3]. + * Therefore, this conversion is lossy. + * + * Return: 32-bit physical block enable map. + */ +static inline u32 kbase_hwcnt_backend_gpu_block_map_to_physical(u64 lo, u64 hi) +{ + u32 phys = 0; + u64 dwords[2] = { lo, hi }; + size_t dword_idx; + + for (dword_idx = 0; dword_idx < 2; dword_idx++) { + const u64 dword = dwords[dword_idx]; + u16 packed = 0; + + size_t hword_bit; + + for (hword_bit = 0; hword_bit < 16; hword_bit++) { + const size_t dword_bit = hword_bit * 4; + const u16 mask = ((dword >> (dword_bit + 0)) & 0x1) | + ((dword >> (dword_bit + 1)) & 0x1) | + ((dword >> (dword_bit + 2)) & 0x1) | + ((dword >> (dword_bit + 3)) & 0x1); + packed |= (mask << hword_bit); + } + phys |= ((u32)packed) << (16 * dword_idx); + } + return phys; +} + +/** + * kbase_hwcnt_gpu_enable_map_to_physical() - Convert an enable map abstraction + * into a physical enable map. + * @dst: Non-NULL pointer to destination physical enable map. + * @src: Non-NULL pointer to source enable map abstraction. + * + * The src must have been created from a metadata returned from a call to + * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This is a lossy conversion, as the enable map abstraction has one bit per + * individual counter block value, but the physical enable map uses 1 bit for + * every 4 counters, shared over all instances of a block. + */ +void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_map *dst, + const struct kbase_hwcnt_enable_map *src); + +/** + * kbase_hwcnt_gpu_set_to_physical() - Map counter set selection to physical + * SET_SELECT value. + * + * @dst: Non-NULL pointer to destination physical SET_SELECT value. + * @src: Non-NULL pointer to source counter set selection. + */ +void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src); + +/** + * kbase_hwcnt_gpu_enable_map_from_physical() - Convert a physical enable map to + * an enable map abstraction. + * @dst: Non-NULL pointer to destination enable map abstraction. + * @src: Non-NULL pointer to source physical enable map. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This is a lossy conversion, as the physical enable map can technically + * support counter blocks with 128 counters each, but no hardware actually uses + * more than 64, so the enable map abstraction has nowhere to store the enable + * information for the 64 non-existent counters. + */ +void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_physical_enable_map *src); + +/** + * kbase_hwcnt_gpu_patch_dump_headers() - Patch all the performance counter + * enable headers in a dump buffer to + * reflect the specified enable map. + * @buf: Non-NULL pointer to dump buffer to patch. + * @enable_map: Non-NULL pointer to enable map. + * + * The buf and enable_map must have been created from a metadata returned from + * a call to kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + * + * This function should be used before handing off a dump buffer over the + * kernel-user boundary, to ensure the header is accurate for the enable map + * used by the user. + */ +void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, + const struct kbase_hwcnt_enable_map *enable_map); + +#endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c new file mode 100644 index 0000000..0cf2f94 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/mali_kbase_hwcnt_gpu.h" +#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" + +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/slab.h> + +int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, + const struct kbase_hwcnt_metadata *src_md) +{ + struct kbase_hwcnt_description desc; + struct kbase_hwcnt_group_description group; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; + size_t prfcnt_values_per_block; + size_t blk; + int err; + struct kbase_hwcnt_metadata_narrow *metadata_narrow; + + if (!dst_md_narrow || !src_md || !src_md->grp_metadata || + !src_md->grp_metadata[0].blk_metadata) + return -EINVAL; + + /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block + * count in the metadata. + */ + if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || + (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) + return -EINVAL; + + /* Get the values count in the first block. */ + prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); + + /* check all blocks should have same values count. */ + for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); + if (val_cnt != prfcnt_values_per_block) + return -EINVAL; + } + + /* Only support 64 and 128 entries per block. */ + if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) + return -EINVAL; + + metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL); + if (!metadata_narrow) + return -ENOMEM; + + /* Narrow to 64 entries per block to keep API backward compatibility. */ + prfcnt_values_per_block = 64; + + for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { + size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); + blks[blk] = (struct kbase_hwcnt_block_description){ + .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), + .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), + .hdr_cnt = blk_hdr_cnt, + .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, + }; + } + + group = (struct kbase_hwcnt_group_description){ + .type = kbase_hwcnt_metadata_group_type(src_md, 0), + .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT, + .blks = blks, + }; + + desc = (struct kbase_hwcnt_description){ + .grp_cnt = kbase_hwcnt_metadata_group_count(src_md), + .avail_mask = src_md->avail_mask, + .clk_cnt = src_md->clk_cnt, + .grps = &group, + }; + + err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata); + if (!err) { + /* Narrow down the buffer size to half as the narrowed metadata + * only supports 32-bit but the created metadata uses 64-bit for + * block entry. + */ + metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; + *dst_md_narrow = metadata_narrow; + } else { + kfree(metadata_narrow); + } + + return err; +} + +void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) +{ + if (!md_narrow) + return; + + kbase_hwcnt_metadata_destroy(md_narrow->metadata); + kfree(md_narrow); +} + +int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, + struct kbase_hwcnt_dump_buffer_narrow *dump_buf) +{ + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + u8 *buf; + + if (!md_narrow || !dump_buf) + return -EINVAL; + + dump_buf_bytes = md_narrow->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; + + /* Make a single allocation for both dump_buf and clk_cnt_buf. */ + buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){ + .md_narrow = md_narrow, + .dump_buf = (u32 *)buf, + .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes), + }; + + return 0; +} + +void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) +{ + if (!dump_buf_narrow) + return; + + kfree(dump_buf_narrow->dump_buf); + *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, + .dump_buf = NULL, + .clk_cnt_buf = NULL }; +} + +int kbase_hwcnt_dump_buffer_narrow_array_alloc( + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, + struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer_narrow *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + size_t total_dump_buf_size; + + if (!md_narrow || !dump_bufs) + return -EINVAL; + + dump_buf_bytes = md_narrow->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){ + .page_addr = addr, + .page_order = order, + .buf_cnt = n, + .bufs = buffers, + }; + + total_dump_buf_size = dump_buf_bytes * n; + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t dump_buf_offset = dump_buf_bytes * buf_idx; + const size_t clk_cnt_buf_offset = + total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx); + + buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){ + .md_narrow = md_narrow, + .dump_buf = (u32 *)(addr + dump_buf_offset), + .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset), + }; + } + + return 0; +} + +void kbase_hwcnt_dump_buffer_narrow_array_free( + struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} + +void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt) +{ + size_t val; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); + u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; + + dst_blk[val] = val_enabled ? src_val : 0; + } +} + +void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata_narrow *metadata_narrow; + size_t grp; + size_t clk; + + if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || + WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || + WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || + WARN_ON(src->metadata->grp_cnt != 1) || + WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != + src->metadata->grp_metadata[0].blk_cnt) || + WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != + KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || + WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > + src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) + return; + + /* Don't use src metadata since src buffer is bigger than dst buffer. */ + metadata_narrow = dst_narrow->md_narrow; + + for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { + size_t blk; + size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); + + for (blk = 0; blk < blk_cnt; blk++) { + size_t blk_inst; + size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( + metadata_narrow, grp, blk); + + for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { + /* The narrowed down buffer is only 32-bit. */ + u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( + dst_narrow, grp, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( + src, grp, blk, blk_inst); + const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( + dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( + metadata_narrow, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / + KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, + blk_em, val_cnt); + } + } + } + + for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); + + dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h new file mode 100644 index 0000000..afd236d --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _KBASE_HWCNT_GPU_NARROW_H_ +#define _KBASE_HWCNT_GPU_NARROW_H_ + +#include "hwcnt/mali_kbase_hwcnt_types.h" +#include <linux/types.h> + +struct kbase_device; +struct kbase_hwcnt_metadata; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical + * layout of narrow dump buffers. + * For backward compatibility, the narrow + * metadata only supports 64 counters per + * block and 32-bit per block entry. + * @metadata: Non-NULL pointer to the metadata before narrow down to + * 32-bit per block entry, it has 64 counters per block and + * 64-bit per value. + * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block + * entry. + */ +struct kbase_hwcnt_metadata_narrow { + const struct kbase_hwcnt_metadata *metadata; + size_t dump_buf_bytes; +}; + +/** + * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer. + * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to + * describe the layout of the narrow dump buffer. + * @dump_buf: Non-NULL pointer to an array of u32 values, the array size + * is md_narrow->dump_buf_bytes. + * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed + * for each clock domain. + */ +struct kbase_hwcnt_dump_buffer_narrow { + const struct kbase_hwcnt_metadata_narrow *md_narrow; + u32 *dump_buf; + u64 *clk_cnt_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump + * buffer array. + * @page_addr: Address of first allocated page. A single allocation is used for + * all narrow dump buffers in the array. + * @page_order: The allocation order of the pages, the order is on a logarithmic + * scale. + * @buf_cnt: The number of allocated dump buffers. + * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors. + */ +struct kbase_hwcnt_dump_buffer_narrow_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer_narrow *bufs; +}; + +/** + * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from + * narrow metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * + * Return: Number of hardware counter groups described by narrow metadata. + */ +static inline size_t +kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) +{ + return kbase_hwcnt_metadata_group_count(md_narrow->metadata); +} + +/** + * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group + * from narrow metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * + * Return: Type of the group grp. + */ +static inline u64 +kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp) +{ + return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); +} + +/** + * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a + * group from narrow metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * + * Return: Number of blocks in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp) +{ + return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); +} + +/** + * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of + * instances of a block + * from narrow metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) +{ + return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); +} + +/** + * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter + * headers from narrow + * metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * @blk: Index of the block in the group. + * + * Return: Number of counter headers in each instance of block blk in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp, size_t blk) +{ + return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); +} + +/** + * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of + * counters from narrow + * metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * @blk: Index of the block in the group. + * + * Return: Number of counters in each instance of block blk in group grp. + */ +static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) +{ + return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); +} + +/** + * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values + * from narrow metadata. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @grp: Index of the group in the narrow metadata. + * @blk: Index of the block in the group. + * + * Return: Number of headers plus counters in each instance of block blk + * in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, + size_t grp, size_t blk) +{ + return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + + kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); +} + +/** + * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a + * narrowed block instance's + * dump buffer. + * @buf: Non-NULL pointer to narrow dump buffer. + * @grp: Index of the group in the narrow metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: u32* to the dump buffer for the block instance. + */ +static inline u32 * +kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, + size_t grp, size_t blk, size_t blk_inst) +{ + return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + + buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + + (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * + blk_inst); +} + +/** + * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC + * entries per block truncated to + * 64 entries and block entry size + * narrowed down to 32-bit. + * + * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored + * on success. + * @src_md: Non-NULL pointer to the HWC metadata used as the source to + * create dst_md_narrow. + * + * For backward compatibility of the interface to user clients, a new metadata + * with entries per block truncated to 64 and block entry size narrowed down + * to 32-bit will be created for dst_md_narrow. + * The total entries per block in src_md must be 64 or 128, if it's other + * values, function returns error since it's not supported. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, + const struct kbase_hwcnt_metadata *src_md); + +/** + * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow + * metadata object. + * @md_narrow: Pointer to hardware counter narrow metadata. + */ +void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); + +/** + * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy + * destination, or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, + struct kbase_hwcnt_dump_buffer_narrow *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed narrow dump buffer structure, or on an + * already freed narrow dump buffer. + */ +void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow + * dump buffers. + * @md_narrow: Non-NULL pointer to narrow metadata. + * @n: Number of narrow dump buffers to allocate + * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array + * object to be initialised. + * + * A single zeroed contiguous page allocation will be used for all of the + * buffers inside the object, where: + * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_narrow_array_alloc( + const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, + struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer + * array. + * @dump_bufs: Narrow Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed narrow dump buffer array structure, or + * on an already freed narrow dump buffer array. + */ +void kbase_hwcnt_dump_buffer_narrow_array_free( + struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block + * values from source to + * destination. + * @dst_blk: Non-NULL pointer to destination block obtained from a call to + * kbase_hwcnt_dump_buffer_narrow_block_instance. + * @src_blk: Non-NULL pointer to source block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in destination will be zero, the enabled + * values in destination will be saturated at U32_MAX if the corresponding + * source value is bigger than U32_MAX, or copy the value from source if the + * corresponding source value is less than or equal to U32_MAX. + */ +void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt); + +/** + * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a + * narrow dump buffer. + * @dst_narrow: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * After the operation, all non-enabled values (including padding bytes) will be + * zero. Slower than the non-strict variant. + * + * The enabled values in dst_narrow will be saturated at U32_MAX if the + * corresponding source value is bigger than U32_MAX, or copy the value from + * source if the corresponding source value is less than or equal to U32_MAX. + */ +void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c new file mode 100644 index 0000000..763eb31 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/mali_kbase_hwcnt_types.h" + +#include <linux/slab.h> + +int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **out_metadata) +{ + char *buf; + struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_group_metadata *grp_mds; + size_t grp; + size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count; /* Number of u64 values (inc padding) */ + size_t avail_mask_bits; /* Number of availability mask bits */ + + size_t size; + size_t offset; + + if (!desc || !out_metadata) + return -EINVAL; + + /* The maximum number of clock domains is 64. */ + if (desc->clk_cnt > (sizeof(u64) * BITS_PER_BYTE)) + return -EINVAL; + + /* Calculate the bytes needed to tightly pack the metadata */ + + /* Top level metadata */ + size = 0; + size += sizeof(struct kbase_hwcnt_metadata); + + /* Group metadata */ + size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + /* Block metadata */ + for (grp = 0; grp < desc->grp_cnt; grp++) { + size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; + } + + /* Single allocation for the entire metadata */ + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* Use the allocated memory for the metadata and its members */ + + /* Bump allocate the top level metadata */ + offset = 0; + metadata = (struct kbase_hwcnt_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_metadata); + + /* Bump allocate the group metadata */ + grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; + + enable_map_count = 0; + dump_buf_count = 0; + avail_mask_bits = 0; + + for (grp = 0; grp < desc->grp_cnt; grp++) { + size_t blk; + + const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; + struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; + + size_t group_enable_map_count = 0; + size_t group_dump_buffer_count = 0; + size_t group_avail_mask_bits = 0; + + /* Bump allocate this group's block metadata */ + struct kbase_hwcnt_block_metadata *blk_mds = + (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; + + /* Fill in each block in the group's information */ + for (blk = 0; blk < grp_desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; + const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + blk_md->type = blk_desc->type; + blk_md->inst_cnt = blk_desc->inst_cnt; + blk_md->hdr_cnt = blk_desc->hdr_cnt; + blk_md->ctr_cnt = blk_desc->ctr_cnt; + blk_md->enable_map_index = group_enable_map_count; + blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); + blk_md->dump_buf_index = group_dump_buffer_count; + blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( + n_values, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); + blk_md->avail_mask_index = group_avail_mask_bits; + + group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; + group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; + group_avail_mask_bits += blk_md->inst_cnt; + } + + /* Fill in the group's information */ + grp_md->type = grp_desc->type; + grp_md->blk_cnt = grp_desc->blk_cnt; + grp_md->blk_metadata = blk_mds; + grp_md->enable_map_index = enable_map_count; + grp_md->dump_buf_index = dump_buf_count; + grp_md->avail_mask_index = avail_mask_bits; + + enable_map_count += group_enable_map_count; + dump_buf_count += group_dump_buffer_count; + avail_mask_bits += group_avail_mask_bits; + } + + /* Fill in the top level metadata's information */ + metadata->grp_cnt = desc->grp_cnt; + metadata->grp_metadata = grp_mds; + metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; + metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; + metadata->avail_mask = desc->avail_mask; + metadata->clk_cnt = desc->clk_cnt; + + WARN_ON(size != offset); + /* Due to the block alignment, there should be exactly one enable map + * bit per 4 bytes in the dump buffer. + */ + WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + + *out_metadata = metadata; + return 0; +} + +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata) +{ + kfree(metadata); +} + +int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map) +{ + u64 *enable_map_buf; + + if (!metadata || !enable_map) + return -EINVAL; + + if (metadata->enable_map_bytes > 0) { + enable_map_buf = kzalloc(metadata->enable_map_bytes, GFP_KERNEL); + if (!enable_map_buf) + return -ENOMEM; + } else { + enable_map_buf = NULL; + } + + enable_map->metadata = metadata; + enable_map->hwcnt_enable_map = enable_map_buf; + return 0; +} + +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map) +{ + if (!enable_map) + return; + + kfree(enable_map->hwcnt_enable_map); + enable_map->hwcnt_enable_map = NULL; + enable_map->metadata = NULL; +} + +int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + u8 *buf; + + if (!metadata || !dump_buf) + return -EINVAL; + + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + + /* Make a single allocation for both dump_buf and clk_cnt_buf. */ + buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + dump_buf->metadata = metadata; + dump_buf->dump_buf = (u64 *)buf; + dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + + return 0; +} + +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) +{ + if (!dump_buf) + return; + + kfree(dump_buf->dump_buf); + memset(dump_buf, 0, sizeof(*dump_buf)); +} + +int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + struct kbase_hwcnt_dump_buffer *buffers; + size_t buf_idx; + unsigned int order; + unsigned long addr; + size_t dump_buf_bytes; + size_t clk_cnt_buf_bytes; + + if (!metadata || !dump_bufs) + return -EINVAL; + + dump_buf_bytes = metadata->dump_buf_bytes; + clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; + + /* Allocate memory for the dump buffer struct array */ + buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); + if (!buffers) + return -ENOMEM; + + /* Allocate pages for the actual dump buffers, as they tend to be fairly + * large. + */ + order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + + if (!addr) { + kfree(buffers); + return -ENOMEM; + } + + dump_bufs->page_addr = addr; + dump_bufs->page_order = order; + dump_bufs->buf_cnt = n; + dump_bufs->bufs = buffers; + + /* Set the buffer of each dump buf */ + for (buf_idx = 0; buf_idx < n; buf_idx++) { + const size_t dump_buf_offset = dump_buf_bytes * buf_idx; + const size_t clk_cnt_buf_offset = + (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); + + buffers[buf_idx].metadata = metadata; + buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); + buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); + } + + return 0; +} + +void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) +{ + if (!dump_bufs) + return; + + kfree(dump_bufs->bufs); + free_pages(dump_bufs->page_addr, dump_bufs->page_order); + memset(dump_bufs, 0, sizeof(*dump_bufs)); +} + +void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + + memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); +} + +void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) +{ + if (WARN_ON(!dst)) + return; + + memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); + + memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); +} + +void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); + + if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { + /* Block available, so only zero non-enabled values */ + kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); + } else { + /* Block not available, so zero the entire thing */ + kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + } + } +} + +void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk; + const u64 *src_blk; + size_t val_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; + } +} + +void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = + kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + /* Align upwards to include padding bytes */ + val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); + + kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + bool clk_enabled = + kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); + + dst->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; + } +} + +void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk; + const u64 *src_blk; + size_t hdr_cnt; + size_t ctr_cnt; + + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + continue; + + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + + kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + } +} + +void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t grp, blk, blk_inst; + size_t clk; + + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || + WARN_ON(dst->metadata != src->metadata) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) + { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + const u64 *src_blk = + kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + const u64 *blk_em = + kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + /* Align upwards to include padding bytes */ + ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( + hdr_cnt + ctr_cnt, + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES) - hdr_cnt); + + kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, + ctr_cnt); + } + + kbase_hwcnt_metadata_for_each_clock(metadata, clk) + { + if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) + dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; + else + dst->clk_cnt_buf[clk] = 0; + } +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h new file mode 100644 index 0000000..5c5ada4 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h @@ -0,0 +1,1231 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Hardware counter types. + * Contains structures for describing the physical layout of hardware counter + * dump buffers and enable maps within a system. + * + * Also contains helper functions for manipulation of these dump buffers and + * enable maps. + * + * Through use of these structures and functions, hardware counters can be + * enabled, copied, accumulated, and generally manipulated in a generic way, + * regardless of the physical counter dump layout. + * + * Terminology: + * + * Hardware Counter System: + * A collection of hardware counter groups, making a full hardware counter + * system. + * Hardware Counter Group: + * A group of Hardware Counter Blocks (e.g. a t62x might have more than one + * core group, so has one counter group per core group, where each group + * may have a different number and layout of counter blocks). + * Hardware Counter Block: + * A block of hardware counters (e.g. shader block, tiler block). + * Hardware Counter Block Instance: + * An instance of a Hardware Counter Block (e.g. an MP4 GPU might have + * 4 shader block instances). + * + * Block Header: + * A header value inside a counter block. Headers don't count anything, + * so it is only valid to copy or zero them. Headers are always the first + * values in the block. + * Block Counter: + * A counter value inside a counter block. Counters can be zeroed, copied, + * or accumulated. Counters are always immediately after the headers in the + * block. + * Block Value: + * A catch-all term for block headers and block counters. + * + * Enable Map: + * An array of u64 bitfields, where each bit either enables exactly one + * block value, or is unused (padding). + * Dump Buffer: + * An array of u64 values, where each u64 corresponds either to one block + * value, or is unused (padding). + * Availability Mask: + * A bitfield, where each bit corresponds to whether a block instance is + * physically available (e.g. an MP3 GPU may have a sparse core mask of + * 0b1011, meaning it only has 3 cores but for hardware counter dumps has the + * same dump buffer layout as an MP4 GPU with a core mask of 0b1111. In this + * case, the availability mask might be 0b1011111 (the exact layout will + * depend on the specific hardware architecture), with the 3 extra early bits + * corresponding to other block instances in the hardware counter system). + * Metadata: + * Structure describing the physical layout of the enable map and dump buffers + * for a specific hardware counter system. + * + */ + +#ifndef _KBASE_HWCNT_TYPES_H_ +#define _KBASE_HWCNT_TYPES_H_ + +#include <linux/bitops.h> +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/types.h> + +/* Number of bytes in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BYTES (sizeof(u64)) + +/* Number of bits in each bitfield */ +#define KBASE_HWCNT_BITFIELD_BITS (KBASE_HWCNT_BITFIELD_BYTES * BITS_PER_BYTE) + +/* Number of bytes for each counter value. + * Use 64-bit per counter in driver to avoid HW 32-bit register values + * overflow after a long time accumulation. + */ +#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64)) + +/* Number of bits in an availability mask (i.e. max total number of block + * instances supported in a Hardware Counter System) + */ +#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) + +/* Minimum alignment of each block of hardware counters */ +#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) + +/** + * KBASE_HWCNT_ALIGN_UPWARDS() - Calculate next aligned value. + * @value: The value to align upwards. + * @alignment: The alignment boundary. + * + * Return: Input value if already aligned to the specified boundary, or next + * (incrementing upwards) aligned value. + */ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ + (value + ((alignment - (value % alignment)) % alignment)) + +/** + * struct kbase_hwcnt_block_description - Description of one or more identical, + * contiguous, Hardware Counter Blocks. + * @type: The arbitrary identifier used to identify the type of the block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 64-bit Block Headers in the block. + * @ctr_cnt: The number of 64-bit Block Counters in the block. + */ +struct kbase_hwcnt_block_description { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; +}; + +/** + * struct kbase_hwcnt_group_description - Description of one or more identical, + * contiguous Hardware Counter Groups. + * @type: The arbitrary identifier used to identify the type of the group. + * @blk_cnt: The number of types of Hardware Counter Block in the group. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each type of Hardware Counter Block in the group. + */ +struct kbase_hwcnt_group_description { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; +}; + +/** + * struct kbase_hwcnt_description - Description of a Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, + * describing each Hardware Counter Group in the system. + * @avail_mask: Flat Availability Mask for all block instances in the system. + * @clk_cnt: The number of clock domains in the system. The maximum is 64. + */ +struct kbase_hwcnt_description { + size_t grp_cnt; + const struct kbase_hwcnt_group_description *grps; + u64 avail_mask; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_block_metadata - Metadata describing the physical layout + * of a block in a Hardware Counter System's + * Dump Buffers and Enable Maps. + * @type: The arbitrary identifier used to identify the type of the + * block. + * @inst_cnt: The number of Instances of the block. + * @hdr_cnt: The number of 64-bit Block Headers in the block. + * @ctr_cnt: The number of 64-bit Block Counters in the block. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Map bitfields of the Block Instances described by + * this metadata start. + * @enable_map_stride: Stride in u64s between the Enable Maps of each of the + * Block Instances described by this metadata. + * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the + * Dump Buffers of the Block Instances described by this + * metadata start. + * @dump_buf_stride: Stride in u64s between the Dump Buffers of each of the + * Block Instances described by this metadata. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the Block Instances described + * by this metadata start. + */ +struct kbase_hwcnt_block_metadata { + u64 type; + size_t inst_cnt; + size_t hdr_cnt; + size_t ctr_cnt; + size_t enable_map_index; + size_t enable_map_stride; + size_t dump_buf_index; + size_t dump_buf_stride; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout + * of a group of blocks in a Hardware + * Counter System's Dump Buffers and Enable + * Maps. + * @type: The arbitrary identifier used to identify the type of the + * group. + * @blk_cnt: The number of types of Hardware Counter Block in the + * group. + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, + * describing the physical layout of each type of Hardware + * Counter Block in the group. + * @enable_map_index: Index in u64s into the parent's Enable Map where the + * Enable Maps of the blocks within the group described by + * this metadata start. + * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the + * Dump Buffers of the blocks within the group described by + * metadata start. + * @avail_mask_index: Index in bits into the parent's Availability Mask where + * the Availability Masks of the blocks within the group + * described by this metadata start. + */ +struct kbase_hwcnt_group_metadata { + u64 type; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; + size_t enable_map_index; + size_t dump_buf_index; + size_t avail_mask_index; +}; + +/** + * struct kbase_hwcnt_metadata - Metadata describing the memory layout + * of Dump Buffers and Enable Maps within a + * Hardware Counter System. + * @grp_cnt: The number of Hardware Counter Groups. + * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * describing the physical layout of each Hardware Counter + * Group in the system. + * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. + * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @avail_mask: The Availability Mask for the system. + * @clk_cnt: The number of clock domains in the system. + */ +struct kbase_hwcnt_metadata { + size_t grp_cnt; + const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t enable_map_bytes; + size_t dump_buf_bytes; + u64 avail_mask; + u8 clk_cnt; +}; + +/** + * struct kbase_hwcnt_enable_map - Hardware Counter Enable Map. Array of u64 + * bitfields. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the enable map. + * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an + * array of u64 bitfields, each bit of which enables one hardware + * counter. + * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * counter for a given clock domain. + */ +struct kbase_hwcnt_enable_map { + const struct kbase_hwcnt_metadata *metadata; + u64 *hwcnt_enable_map; + u64 clk_enable_map; +}; + +/** + * struct kbase_hwcnt_dump_buffer - Hardware Counter Dump Buffer. + * @metadata: Non-NULL pointer to metadata used to identify, and to describe + * the layout of the Dump Buffer. + * @dump_buf: Non-NULL pointer to an array of u64 values, the array size is + * metadata->dump_buf_bytes. + * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed + * for each clock domain. + */ +struct kbase_hwcnt_dump_buffer { + const struct kbase_hwcnt_metadata *metadata; + u64 *dump_buf; + u64 *clk_cnt_buf; +}; + +/** + * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. + * @page_addr: Address of allocated pages. A single allocation is used for all + * Dump Buffers in the array. + * @page_order: The allocation order of the pages, the order is on a logarithmic + * scale. + * @buf_cnt: The number of allocated Dump Buffers. + * @bufs: Non-NULL pointer to the array of Dump Buffers. + */ +struct kbase_hwcnt_dump_buffer_array { + unsigned long page_addr; + unsigned int page_order; + size_t buf_cnt; + struct kbase_hwcnt_dump_buffer *bufs; +}; + +/** + * kbase_hwcnt_metadata_create() - Create a hardware counter metadata object + * from a description. + * @desc: Non-NULL pointer to a hardware counter description. + * @metadata: Non-NULL pointer to where created metadata will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, + const struct kbase_hwcnt_metadata **metadata); + +/** + * kbase_hwcnt_metadata_destroy() - Destroy a hardware counter metadata object. + * @metadata: Pointer to hardware counter metadata + */ +void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); + +/** + * kbase_hwcnt_metadata_group_count() - Get the number of groups. + * @metadata: Non-NULL pointer to metadata. + * + * Return: Number of hardware counter groups described by metadata. + */ +static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) +{ + if (WARN_ON(!metadata)) + return 0; + + return metadata->grp_cnt; +} + +/** + * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Type of the group grp. + */ +static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, + size_t grp) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) + return 0; + + return metadata->grp_metadata[grp].type; +} + +/** + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * + * Return: Number of blocks in group grp. + */ +static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, + size_t grp) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_cnt; +} + +/** + * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Type of the block blk in group grp. + */ +static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, + size_t grp, size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_metadata[blk].type; +} + +/** + * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of + * a block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of instances of block blk in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt; +} + +/** + * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter + * headers. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of counter headers in each instance of block blk in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt; +} + +/** + * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of counters in each instance of block blk in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt; +} + +/** + * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: enable map stride in each instance of block blk in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, + size_t grp, size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride; +} + +/** + * kbase_hwcnt_metadata_block_values_count() - Get the number of values. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: Number of headers plus counters in each instance of block blk + * in group grp. + */ +static inline size_t +kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) + + kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); +} + +/** + * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in + * the metadata. + * @md: Non-NULL pointer to metadata. + * @grp: size_t variable used as group iterator. + * @blk: size_t variable used as block iterator. + * @blk_inst: size_t variable used as block instance iterator. + * + * Iteration order is group, then block, then block instance (i.e. linearly + * through memory). + */ +#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ + for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ + for ((blk_inst) = 0; \ + (blk_inst) < \ + kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ + (blk_inst)++) + +/** + * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail + * mask corresponding to the block. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * + * Return: The bit index into the avail mask for the block. + */ +static inline size_t +kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk) +{ + if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || + WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + return 0; + + return metadata->grp_metadata[grp].avail_mask_index + + metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; +} + +/** + * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is + * available. + * @metadata: Non-NULL pointer to metadata. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if the block instance is available, else false. + */ +static inline bool +kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, + size_t blk, size_t blk_inst) +{ + size_t bit; + u64 mask; + + if (WARN_ON(!metadata)) + return false; + + bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst; + mask = 1ull << bit; + + return (metadata->avail_mask & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_alloc() - Allocate an enable map. + * @metadata: Non-NULL pointer to metadata describing the system. + * @enable_map: Non-NULL pointer to enable map to be initialised. Will be + * initialised to all zeroes (i.e. all counters disabled). + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_enable_map_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_free() - Free an enable map. + * @enable_map: Enable map to be freed. + * + * Can be safely called on an all-zeroed enable map structure, or on an already + * freed enable map. + */ +void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); + +/** + * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block + * instance's enable map. + * @map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: u64* to the bitfield(s) used as the enable map for the + * block instance. + */ +static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, + size_t grp, size_t blk, size_t blk_inst) +{ + if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) + return NULL; + + if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) || + WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) || + WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + return map->hwcnt_enable_map; + + return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + + map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + + (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); +} + +/** + * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * to have at minimum one bit per value. + * @val_cnt: Number of values. + * + * Return: Number of required bitfields. + */ +static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) +{ + return (val_cnt + KBASE_HWCNT_BITFIELD_BITS - 1) / KBASE_HWCNT_BITFIELD_BITS; +} + +/** + * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, + size_t grp, size_t blk, size_t blk_inst) +{ + size_t val_cnt; + size_t bitfld_cnt; + u64 *const block_enable_map = + kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + + if (WARN_ON(!dst)) + return; + + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + + memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); +} + +/** + * kbase_hwcnt_enable_map_disable_all() - Disable all values in the enable map. + * @dst: Non-NULL pointer to enable map to zero. + */ +static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_map *dst) +{ + if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) + return; + + if (dst->hwcnt_enable_map != NULL) + memset(dst->hwcnt_enable_map, 0, dst->metadata->enable_map_bytes); + + dst->clk_enable_map = 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. + * @dst: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + */ +static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, + size_t grp, size_t blk, size_t blk_inst) +{ + size_t val_cnt; + size_t bitfld_cnt; + u64 *const block_enable_map = + kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + size_t bitfld_idx; + + if (WARN_ON(!dst)) + return; + + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + block_enable_map[bitfld_idx] = block_enable_map_mask; + } +} + +/** + * kbase_hwcnt_enable_map_enable_all() - Enable all values in an enable + * map. + * @dst: Non-NULL pointer to enable map. + */ +static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) +{ + size_t grp, blk, blk_inst; + + if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) + return; + + kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); + + dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; +} + +/** + * kbase_hwcnt_enable_map_copy() - Copy an enable map to another. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_copy(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || + WARN_ON(dst->metadata != src->metadata)) + return; + + if (dst->hwcnt_enable_map != NULL) { + if (WARN_ON(!src->hwcnt_enable_map)) + return; + + memcpy(dst->hwcnt_enable_map, src->hwcnt_enable_map, + dst->metadata->enable_map_bytes); + } + + dst->clk_enable_map = src->clk_enable_map; +} + +/** + * kbase_hwcnt_enable_map_union() - Union dst and src enable maps into dst. + * @dst: Non-NULL pointer to destination enable map. + * @src: Non-NULL pointer to source enable map. + * + * The dst and src MUST have been created from the same metadata. + */ +static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_map *src) +{ + if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst->metadata) || + WARN_ON(dst->metadata != src->metadata)) + return; + + if (dst->hwcnt_enable_map != NULL) { + size_t i; + size_t const bitfld_count = + dst->metadata->enable_map_bytes / KBASE_HWCNT_BITFIELD_BYTES; + + if (WARN_ON(!src->hwcnt_enable_map)) + return; + + for (i = 0; i < bitfld_count; i++) + dst->hwcnt_enable_map[i] |= src->hwcnt_enable_map[i]; + } + + dst->clk_enable_map |= src->clk_enable_map; +} + +/** + * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block + * instance are enabled. + * @enable_map: Non-NULL pointer to enable map. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: true if any values in the block are enabled, else false. + */ +static inline bool +kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, + size_t blk, size_t blk_inst) +{ + bool any_enabled = false; + size_t val_cnt; + size_t bitfld_cnt; + const u64 *const block_enable_map = + kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + size_t bitfld_idx; + + if (WARN_ON(!enable_map)) + return false; + + val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk); + bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); + + for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { + const u64 remaining_values = val_cnt - (bitfld_idx * KBASE_HWCNT_BITFIELD_BITS); + u64 block_enable_map_mask = U64_MAX; + + if (remaining_values < KBASE_HWCNT_BITFIELD_BITS) + block_enable_map_mask = (1ull << remaining_values) - 1; + + any_enabled = any_enabled || (block_enable_map[bitfld_idx] & block_enable_map_mask); + } + + return any_enabled; +} + +/** + * kbase_hwcnt_enable_map_any_enabled() - Check if any values are enabled. + * @enable_map: Non-NULL pointer to enable map. + * + * Return: true if any values are enabled, else false. + */ +static inline bool +kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) +{ + size_t grp, blk, blk_inst; + u64 clk_enable_map_mask; + + if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata)) + return false; + + clk_enable_map_mask = (1ull << enable_map->metadata->clk_cnt) - 1; + + if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) + return true; + + kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) + { + if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + return true; + } + + return false; +} + +/** + * kbase_hwcnt_enable_map_block_value_enabled() - Check if a value in a block + * instance is enabled. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to check in the block instance. + * + * Return: true if the value was enabled, else false. + */ +static inline bool kbase_hwcnt_enable_map_block_value_enabled(const u64 *bitfld, size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + return (bitfld[idx] & mask) != 0; +} + +/** + * kbase_hwcnt_enable_map_block_enable_value() - Enable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to enable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_enable_value(u64 *bitfld, size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] |= mask; +} + +/** + * kbase_hwcnt_enable_map_block_disable_value() - Disable a value in a block + * instance. + * @bitfld: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_idx: Index of the value to disable in the block instance. + */ +static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_t val_idx) +{ + const size_t idx = val_idx / KBASE_HWCNT_BITFIELD_BITS; + const size_t bit = val_idx % KBASE_HWCNT_BITFIELD_BITS; + const u64 mask = 1ull << bit; + + bitfld[idx] &= ~mask; +} + +/** + * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. + * @metadata: Non-NULL pointer to metadata describing the system. + * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be + * initialised to undefined values, so must be used as a copy dest, + * or cleared before use. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_free() - Free a dump buffer. + * @dump_buf: Dump buffer to be freed. + * + * Can be safely called on an all-zeroed dump buffer structure, or on an already + * freed dump buffer. + */ +void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. + * @metadata: Non-NULL pointer to metadata describing the system. + * @n: Number of dump buffers to allocate + * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. + * + * A single zeroed contiguous page allocation will be used for all of the + * buffers inside the array, where: + * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, + struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. + * @dump_bufs: Dump buffer array to be freed. + * + * Can be safely called on an all-zeroed dump buffer array structure, or on an + * already freed dump buffer array. + */ +void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); + +/** + * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block + * instance's dump buffer. + * @buf: Non-NULL pointer to dump buffer. + * @grp: Index of the group in the metadata. + * @blk: Index of the block in the group. + * @blk_inst: Index of the block instance in the block. + * + * Return: u64* to the dump buffer for the block instance. + */ +static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, + size_t grp, size_t blk, size_t blk_inst) +{ + if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) + return NULL; + + if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) || + WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) || + WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + return buf->dump_buf; + + return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + + buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + + (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); +} + +/** + * kbase_hwcnt_dump_buffer_zero() - Zero all enabled values in dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero() - Zero all values in a block. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero(u64 *dst_blk, size_t val_cnt) +{ + if (WARN_ON(!dst_blk)) + return; + + memset(dst_blk, 0, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_zero_strict() - Zero all values in dst. + * After the operation, all values + * (including padding bytes) will be + * zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dump buffer. + */ +void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst); + +/** + * kbase_hwcnt_dump_buffer_zero_non_enabled() - Zero all non-enabled values in + * dst (including padding bytes and + * unavailable blocks). + * After the operation, all enabled + * values will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_zero_non_enabled() - Zero all non-enabled + * values in a block. + * After the operation, all + * enabled values will be + * unchanged. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_zero_non_enabled(u64 *dst_blk, const u64 *blk_em, + size_t val_cnt) +{ + size_t val; + + if (WARN_ON(!dst_blk)) + return; + + for (val = 0; val < val_cnt; val++) { + if (!kbase_hwcnt_enable_map_block_value_enabled(blk_em, val)) + dst_blk[val] = 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_copy() - Copy all enabled values from src to dst. + * After the operation, all non-enabled values + * will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy() - Copy all block values from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @val_cnt: Number of values in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy(u64 *dst_blk, const u64 *src_blk, + size_t val_cnt) +{ + if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + + /* Copy all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + memcpy(dst_blk, src_blk, (val_cnt * KBASE_HWCNT_VALUE_BYTES)); +} + +/** + * kbase_hwcnt_dump_buffer_copy_strict() - Copy all enabled values from src to + * dst. + * After the operation, all non-enabled + * values (including padding bytes) will + * be zero. + * Slower than the non-strict variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_copy_strict() - Copy all enabled block values + * from src to dst. + * After the operation, all + * non-enabled values will be + * zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @val_cnt: Number of values in the block. + * + * After the copy, any disabled values in dst will be zero. + */ +static inline void kbase_hwcnt_dump_buffer_block_copy_strict(u64 *dst_blk, const u64 *src_blk, + const u64 *blk_em, size_t val_cnt) +{ + size_t val; + + if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + + for (val = 0; val < val_cnt; val++) { + bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); + + dst_blk[val] = val_enabled ? src_blk[val] : 0; + } +} + +/** + * kbase_hwcnt_dump_buffer_accumulate() - Copy all enabled headers and + * accumulate all enabled counters from + * src to dst. + * After the operation, all non-enabled + * values will be undefined. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate() - Copy all block headers and + * accumulate all block counters + * from src to dst. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate(u64 *dst_blk, const u64 *src_blk, + size_t hdr_cnt, size_t ctr_cnt) +{ + size_t ctr; + + if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + + /* Copy all the headers in the block instance. + * Values of non-enabled headers are undefined. + */ + memcpy(dst_blk, src_blk, hdr_cnt * KBASE_HWCNT_VALUE_BYTES); + + /* Accumulate all the counters in the block instance. + * Values of non-enabled counters are undefined. + */ + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) + dst_blk[ctr] += src_blk[ctr]; +} + +/** + * kbase_hwcnt_dump_buffer_accumulate_strict() - Copy all enabled headers and + * accumulate all enabled counters + * from src to dst. + * After the operation, all + * non-enabled values (including + * padding bytes) will be zero. + * Slower than the non-strict + * variant. + * @dst: Non-NULL pointer to dst dump buffer. + * @src: Non-NULL pointer to src dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * + * The dst, src, and dst_enable_map MUST have been created from the same + * metadata. + */ +void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_dump_buffer *src, + const struct kbase_hwcnt_enable_map *dst_enable_map); + +/** + * kbase_hwcnt_dump_buffer_block_accumulate_strict() - Copy all enabled block + * headers and accumulate + * all block counters from + * src to dst. + * After the operation, all + * non-enabled values will + * be zero. + * @dst_blk: Non-NULL pointer to dst block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @src_blk: Non-NULL pointer to src block obtained from a call to + * kbase_hwcnt_dump_buffer_block_instance. + * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to + * kbase_hwcnt_enable_map_block_instance. + * @hdr_cnt: Number of headers in the block. + * @ctr_cnt: Number of counters in the block. + */ +static inline void kbase_hwcnt_dump_buffer_block_accumulate_strict(u64 *dst_blk, const u64 *src_blk, + const u64 *blk_em, + size_t hdr_cnt, size_t ctr_cnt) +{ + size_t ctr; + + if (WARN_ON(!dst_blk) || WARN_ON(!src_blk)) + return; + + kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, hdr_cnt); + + for (ctr = hdr_cnt; ctr < ctr_cnt + hdr_cnt; ctr++) { + bool ctr_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, ctr); + + if (ctr_enabled) + dst_blk[ctr] += src_blk[ctr]; + else + dst_blk[ctr] = 0; + } +} + +/** + * kbase_hwcnt_metadata_for_each_clock() - Iterate over each clock domain in the + * metadata. + * @md: Non-NULL pointer to metadata. + * @clk: size_t variable used as clock iterator. + */ +#define kbase_hwcnt_metadata_for_each_clock(md, clk) for ((clk) = 0; (clk) < (md)->clk_cnt; (clk)++) + +/** + * kbase_hwcnt_clk_enable_map_enabled() - Check if the given index is enabled + * in clk_enable_map. + * @clk_enable_map: An enable map for clock domains. + * @index: Index of the enable map for clock domain. + * + * Return: true if the index of the clock domain is enabled, else false. + */ +static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, const size_t index) +{ + if (WARN_ON(index >= 64)) + return false; + if (clk_enable_map & (1ull << index)) + return true; + return false; +} + +#endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c new file mode 100644 index 0000000..d618764 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c @@ -0,0 +1,744 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "hwcnt/mali_kbase_hwcnt_virtualizer.h" +#include "hwcnt/mali_kbase_hwcnt_accumulator.h" +#include "hwcnt/mali_kbase_hwcnt_context.h" +#include "hwcnt/mali_kbase_hwcnt_types.h" + +#include <linux/mutex.h> +#include <linux/slab.h> + +/** + * struct kbase_hwcnt_virtualizer - Hardware counter virtualizer structure. + * @hctx: Hardware counter context being virtualized. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting is disabled. + * @metadata: Hardware counter metadata. + * @lock: Lock acquired at all entrypoints, to protect mutable + * state. + * @client_count: Current number of virtualizer clients. + * @clients: List of virtualizer clients. + * @accum: Hardware counter accumulator. NULL if no clients. + * @scratch_map: Enable map used as scratch space during counter changes. + * @scratch_buf: Dump buffer used as scratch space during dumps. + * @ts_last_dump_ns: End time of most recent dump across all clients. + */ +struct kbase_hwcnt_virtualizer { + struct kbase_hwcnt_context *hctx; + u64 dump_threshold_ns; + const struct kbase_hwcnt_metadata *metadata; + struct mutex lock; + size_t client_count; + struct list_head clients; + struct kbase_hwcnt_accumulator *accum; + struct kbase_hwcnt_enable_map scratch_map; + struct kbase_hwcnt_dump_buffer scratch_buf; + u64 ts_last_dump_ns; +}; + +/** + * struct kbase_hwcnt_virtualizer_client - Virtualizer client structure. + * @node: List node used for virtualizer client list. + * @hvirt: Hardware counter virtualizer. + * @enable_map: Enable map with client's current enabled counters. + * @accum_buf: Dump buffer with client's current accumulated counters. + * @has_accum: True if accum_buf contains any accumulated counters. + * @ts_start_ns: Counter collection start time of current dump. + */ +struct kbase_hwcnt_virtualizer_client { + struct list_head node; + struct kbase_hwcnt_virtualizer *hvirt; + struct kbase_hwcnt_enable_map enable_map; + struct kbase_hwcnt_dump_buffer accum_buf; + bool has_accum; + u64 ts_start_ns; +}; + +const struct kbase_hwcnt_metadata * +kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return NULL; + + return hvirt->metadata; +} + +/** + * kbasep_hwcnt_virtualizer_client_free - Free a virtualizer client's memory. + * @hvcli: Pointer to virtualizer client. + * + * Will safely free a client in any partial state of construction. + */ +static void kbasep_hwcnt_virtualizer_client_free(struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + kbase_hwcnt_dump_buffer_free(&hvcli->accum_buf); + kbase_hwcnt_enable_map_free(&hvcli->enable_map); + kfree(hvcli); +} + +/** + * kbasep_hwcnt_virtualizer_client_alloc - Allocate memory for a virtualizer + * client. + * @metadata: Non-NULL pointer to counter metadata. + * @out_hvcli: Non-NULL pointer to where created client will be stored on + * success. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_alloc(const struct kbase_hwcnt_metadata *metadata, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli = NULL; + + WARN_ON(!metadata); + WARN_ON(!out_hvcli); + + hvcli = kzalloc(sizeof(*hvcli), GFP_KERNEL); + if (!hvcli) + return -ENOMEM; + + errcode = kbase_hwcnt_enable_map_alloc(metadata, &hvcli->enable_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(metadata, &hvcli->accum_buf); + if (errcode) + goto error; + + *out_hvcli = hvcli; + return 0; +error: + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_accumulate - Accumulate a dump buffer into a + * client's accumulation buffer. + * @hvcli: Non-NULL pointer to virtualizer client. + * @dump_buf: Non-NULL pointer to dump buffer to accumulate from. + */ +static void +kbasep_hwcnt_virtualizer_client_accumulate(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_dump_buffer *dump_buf) +{ + WARN_ON(!hvcli); + WARN_ON(!dump_buf); + lockdep_assert_held(&hvcli->hvirt->lock); + + if (hvcli->has_accum) { + /* If already some accumulation, accumulate */ + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } else { + /* If no accumulation, copy */ + kbase_hwcnt_dump_buffer_copy(&hvcli->accum_buf, dump_buf, &hvcli->enable_map); + } + hvcli->has_accum = true; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_term - Terminate the hardware counter + * accumulator after final client + * removal. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Will safely terminate the accumulator in any partial state of initialisation. + */ +static void kbasep_hwcnt_virtualizer_accumulator_term(struct kbase_hwcnt_virtualizer *hvirt) +{ + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + + kbase_hwcnt_dump_buffer_free(&hvirt->scratch_buf); + kbase_hwcnt_enable_map_free(&hvirt->scratch_map); + kbase_hwcnt_accumulator_release(hvirt->accum); + hvirt->accum = NULL; +} + +/** + * kbasep_hwcnt_virtualizer_accumulator_init - Initialise the hardware counter + * accumulator before first client + * addition. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_accumulator_init(struct kbase_hwcnt_virtualizer *hvirt) +{ + int errcode; + + WARN_ON(!hvirt); + lockdep_assert_held(&hvirt->lock); + WARN_ON(hvirt->client_count); + WARN_ON(hvirt->accum); + + errcode = kbase_hwcnt_accumulator_acquire(hvirt->hctx, &hvirt->accum); + if (errcode) + goto error; + + errcode = kbase_hwcnt_enable_map_alloc(hvirt->metadata, &hvirt->scratch_map); + if (errcode) + goto error; + + errcode = kbase_hwcnt_dump_buffer_alloc(hvirt->metadata, &hvirt->scratch_buf); + if (errcode) + goto error; + + return 0; +error: + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_add - Add a newly allocated client to the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to add. + * @enable_map: Non-NULL pointer to client's initial enable map. + * + * Return: 0 on success, else error code. + */ +static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->client_count == 0) + /* First client added, so initialise the accumulator */ + errcode = kbasep_hwcnt_virtualizer_accumulator_init(hvirt); + if (errcode) + return errcode; + + hvirt->client_count += 1; + + if (hvirt->client_count == 1) { + /* First client, so just pass the enable map onwards as is */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, + &ts_start_ns, &ts_end_ns, NULL); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry (pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into only existing clients' accumulation bufs */ + if (!errcode) + list_for_each_entry (pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate(pos, + &hvirt->scratch_buf); + } + if (errcode) + goto error; + + list_add(&hvcli->node, &hvirt->clients); + hvcli->hvirt = hvirt; + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + hvcli->has_accum = false; + hvcli->ts_start_ns = ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + + return 0; +error: + hvirt->client_count -= 1; + if (hvirt->client_count == 0) + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_remove - Remove a client from the + * virtualizer. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client to remove. + */ +static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli) +{ + int errcode = 0; + u64 ts_start_ns; + u64 ts_end_ns; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + lockdep_assert_held(&hvirt->lock); + + list_del(&hvcli->node); + hvirt->client_count -= 1; + + if (hvirt->client_count == 0) { + /* Last client removed, so terminate the accumulator */ + kbasep_hwcnt_virtualizer_accumulator_term(hvirt); + } else { + struct kbase_hwcnt_virtualizer_client *pos; + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); + list_for_each_entry (pos, &hvirt->clients, node) + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + &ts_start_ns, &ts_end_ns, + &hvirt->scratch_buf); + /* Accumulate into remaining clients' accumulation bufs */ + if (!errcode) { + list_for_each_entry (pos, &hvirt->clients, node) + kbasep_hwcnt_virtualizer_client_accumulate(pos, + &hvirt->scratch_buf); + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = ts_end_ns; + } + } + WARN_ON(errcode); +} + +/** + * kbasep_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, + * and enable a new set of + * counters that will be used for + * subsequent dumps. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_set_counters( + struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!enable_map); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(enable_map->metadata != hvirt->metadata); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Make the scratch enable map the union of all enable maps */ + kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); + list_for_each_entry (pos, &hvirt->clients, node) + /* Ignore the enable map of the selected client */ + if (pos != hvcli) + kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); + + /* Set the counters with the new union enable map */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, + ts_start_ns, ts_end_ns, &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry (pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, + &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !enable_map || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if ((enable_map->metadata != hvirt->metadata) || + (dump_buf && (dump_buf->metadata != hvirt->metadata))) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, enable_map, + ts_start_ns, ts_end_ns, dump_buf); + + if (!errcode) { + /* Update the selected client's enable map */ + kbase_hwcnt_enable_map_copy(&hvcli->enable_map, enable_map); + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_set_counters( + hvirt, hvcli, enable_map, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *hvirt, + struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *pos; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + /* Perform the dump */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, + &hvirt->scratch_buf); + if (errcode) + return errcode; + + /* Accumulate into all accumulation bufs except the selected client's */ + list_for_each_entry (pos, &hvirt->clients, node) + if (pos != hvcli) + kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); + + /* Finally, write into the dump buf */ + if (dump_buf) { + const struct kbase_hwcnt_dump_buffer *src = &hvirt->scratch_buf; + + if (hvcli->has_accum) { + kbase_hwcnt_dump_buffer_accumulate(&hvcli->accum_buf, src, + &hvcli->enable_map); + src = &hvcli->accum_buf; + } + kbase_hwcnt_dump_buffer_copy(dump_buf, src, &hvcli->enable_map); + } + hvcli->has_accum = false; + + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + + return errcode; +} + +/** + * kbasep_hwcnt_virtualizer_client_dump_rate_limited - Perform a dump of the + * client's currently enabled counters + * if it hasn't been rate limited, + * otherwise return the client's most + * recent accumulation. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +static int kbasep_hwcnt_virtualizer_client_dump_rate_limited( + struct kbase_hwcnt_virtualizer *hvirt, struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, struct kbase_hwcnt_dump_buffer *dump_buf) +{ + bool rate_limited = true; + + WARN_ON(!hvirt); + WARN_ON(!hvcli); + WARN_ON(!ts_start_ns); + WARN_ON(!ts_end_ns); + WARN_ON(dump_buf && (dump_buf->metadata != hvirt->metadata)); + lockdep_assert_held(&hvirt->lock); + + if (hvirt->dump_threshold_ns == 0) { + /* Threshold == 0, so rate limiting disabled */ + rate_limited = false; + } else if (hvirt->ts_last_dump_ns == hvcli->ts_start_ns) { + /* Last dump was performed by this client, and dumps from an + * individual client are never rate limited + */ + rate_limited = false; + } else { + const u64 ts_ns = kbase_hwcnt_accumulator_timestamp_ns(hvirt->accum); + const u64 time_since_last_dump_ns = ts_ns - hvirt->ts_last_dump_ns; + + /* Dump period equals or exceeds the threshold */ + if (time_since_last_dump_ns >= hvirt->dump_threshold_ns) + rate_limited = false; + } + + if (!rate_limited) + return kbasep_hwcnt_virtualizer_client_dump(hvirt, hvcli, ts_start_ns, ts_end_ns, + dump_buf); + + /* If we've gotten this far, the client must have something accumulated + * otherwise it is a logic error + */ + WARN_ON(!hvcli->has_accum); + + if (dump_buf) + kbase_hwcnt_dump_buffer_copy(dump_buf, &hvcli->accum_buf, &hvcli->enable_map); + hvcli->has_accum = false; + + *ts_start_ns = hvcli->ts_start_ns; + *ts_end_ns = hvirt->ts_last_dump_ns; + hvcli->ts_start_ns = hvirt->ts_last_dump_ns; + + return 0; +} + +int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf) +{ + int errcode; + struct kbase_hwcnt_virtualizer *hvirt; + + if (!hvcli || !ts_start_ns || !ts_end_ns) + return -EINVAL; + + hvirt = hvcli->hvirt; + + if (dump_buf && (dump_buf->metadata != hvirt->metadata)) + return -EINVAL; + + mutex_lock(&hvirt->lock); + + if ((hvirt->client_count == 1) && (!hvcli->has_accum)) { + /* + * If there's only one client with no prior accumulation, we can + * completely skip the virtualize and just pass through the call + * to the accumulator, saving a fair few copies and + * accumulations. + */ + errcode = kbase_hwcnt_accumulator_dump(hvirt->accum, ts_start_ns, ts_end_ns, + dump_buf); + + if (!errcode) { + /* Fix up the timestamps */ + *ts_start_ns = hvcli->ts_start_ns; + hvcli->ts_start_ns = *ts_end_ns; + + /* Store the most recent dump time for rate limiting */ + hvirt->ts_last_dump_ns = *ts_end_ns; + } + } else { + /* Otherwise, do the full virtualize */ + errcode = kbasep_hwcnt_virtualizer_client_dump_rate_limited( + hvirt, hvcli, ts_start_ns, ts_end_ns, dump_buf); + } + + mutex_unlock(&hvirt->lock); + + return errcode; +} + +int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli) +{ + int errcode; + struct kbase_hwcnt_virtualizer_client *hvcli; + + if (!hvirt || !enable_map || !out_hvcli || (enable_map->metadata != hvirt->metadata)) + return -EINVAL; + + errcode = kbasep_hwcnt_virtualizer_client_alloc(hvirt->metadata, &hvcli); + if (errcode) + return errcode; + + mutex_lock(&hvirt->lock); + + errcode = kbasep_hwcnt_virtualizer_client_add(hvirt, hvcli, enable_map); + + mutex_unlock(&hvirt->lock); + + if (errcode) { + kbasep_hwcnt_virtualizer_client_free(hvcli); + return errcode; + } + + *out_hvcli = hvcli; + return 0; +} + +void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli) +{ + if (!hvcli) + return; + + mutex_lock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_remove(hvcli->hvirt, hvcli); + + mutex_unlock(&hvcli->hvirt->lock); + + kbasep_hwcnt_virtualizer_client_free(hvcli); +} + +int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt) +{ + struct kbase_hwcnt_virtualizer *virt; + const struct kbase_hwcnt_metadata *metadata; + + if (!hctx || !out_hvirt) + return -EINVAL; + + metadata = kbase_hwcnt_context_metadata(hctx); + if (!metadata) + return -EINVAL; + + virt = kzalloc(sizeof(*virt), GFP_KERNEL); + if (!virt) + return -ENOMEM; + + virt->hctx = hctx; + virt->dump_threshold_ns = dump_threshold_ns; + virt->metadata = metadata; + + mutex_init(&virt->lock); + INIT_LIST_HEAD(&virt->clients); + + *out_hvirt = virt; + return 0; +} + +void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) +{ + if (!hvirt) + return; + + /* Non-zero client count implies client leak */ + if (WARN_ON(hvirt->client_count != 0)) { + struct kbase_hwcnt_virtualizer_client *pos, *n; + + list_for_each_entry_safe (pos, n, &hvirt->clients, node) + kbase_hwcnt_virtualizer_client_destroy(pos); + } + + WARN_ON(hvirt->client_count != 0); + WARN_ON(hvirt->accum); + + kfree(hvirt); +} + +bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, + struct work_struct *work) +{ + if (WARN_ON(!hvirt) || WARN_ON(!work)) + return false; + + return kbase_hwcnt_context_queue_work(hvirt->hctx, work); +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h new file mode 100644 index 0000000..485ba74 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Hardware counter virtualizer API. + * + * Virtualizes a hardware counter context, so multiple clients can access + * a single hardware counter resource as though each was the exclusive user. + */ + +#ifndef _KBASE_HWCNT_VIRTUALIZER_H_ +#define _KBASE_HWCNT_VIRTUALIZER_H_ + +#include <linux/types.h> +#include <linux/workqueue.h> + +struct kbase_hwcnt_context; +struct kbase_hwcnt_virtualizer; +struct kbase_hwcnt_virtualizer_client; +struct kbase_hwcnt_enable_map; +struct kbase_hwcnt_dump_buffer; + +/** + * kbase_hwcnt_virtualizer_init - Initialise a hardware counter virtualizer. + * @hctx: Non-NULL pointer to the hardware counter context to + * virtualize. + * @dump_threshold_ns: Minimum threshold period for dumps between different + * clients where a new accumulator dump will not be + * performed, and instead accumulated values will be used. + * If 0, rate limiting will be disabled. + * @out_hvirt: Non-NULL pointer to where the pointer to the created + * virtualizer will be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_init(struct kbase_hwcnt_context *hctx, u64 dump_threshold_ns, + struct kbase_hwcnt_virtualizer **out_hvirt); + +/** + * kbase_hwcnt_virtualizer_term - Terminate a hardware counter virtualizer. + * @hvirt: Pointer to virtualizer to be terminated. + */ +void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_metadata - Get the hardware counter metadata used by + * the virtualizer, so related counter data + * structures can be created. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * + * Return: Non-NULL pointer to metadata, or NULL on error. + */ +const struct kbase_hwcnt_metadata * +kbase_hwcnt_virtualizer_metadata(struct kbase_hwcnt_virtualizer *hvirt); + +/** + * kbase_hwcnt_virtualizer_client_create - Create a new virtualizer client. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @enable_map: Non-NULL pointer to the enable map for the client. Must have the + * same metadata as the virtualizer. + * @out_hvcli: Non-NULL pointer to where the pointer to the created client will + * be stored on success. + * + * Return: 0 on success, else error code. + */ +int kbase_hwcnt_virtualizer_client_create(struct kbase_hwcnt_virtualizer *hvirt, + const struct kbase_hwcnt_enable_map *enable_map, + struct kbase_hwcnt_virtualizer_client **out_hvcli); + +/** + * kbase_hwcnt_virtualizer_client_destroy() - Destroy a virtualizer client. + * @hvcli: Pointer to the hardware counter client. + */ +void kbase_hwcnt_virtualizer_client_destroy(struct kbase_hwcnt_virtualizer_client *hvcli); + +/** + * kbase_hwcnt_virtualizer_client_set_counters - Perform a dump of the client's + * currently enabled counters, and + * enable a new set of counters + * that will be used for + * subsequent dumps. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @enable_map: Non-NULL pointer to the new counter enable map for the client. + * Must have the same metadata as the virtualizer. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_set_counters(struct kbase_hwcnt_virtualizer_client *hvcli, + const struct kbase_hwcnt_enable_map *enable_map, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_client_dump - Perform a dump of the client's + * currently enabled counters. + * @hvcli: Non-NULL pointer to the virtualizer client. + * @ts_start_ns: Non-NULL pointer where the start timestamp of the dump will + * be written out to on success. + * @ts_end_ns: Non-NULL pointer where the end timestamp of the dump will + * be written out to on success. + * @dump_buf: Pointer to the buffer where the dump will be written out to on + * success. If non-NULL, must have the same metadata as the + * accumulator. If NULL, the dump will be discarded. + * + * Return: 0 on success or error code. + */ +int kbase_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer_client *hvcli, + u64 *ts_start_ns, u64 *ts_end_ns, + struct kbase_hwcnt_dump_buffer *dump_buf); + +/** + * kbase_hwcnt_virtualizer_queue_work() - Queue hardware counter related async + * work on a workqueue specialized for + * hardware counters. + * @hvirt: Non-NULL pointer to the hardware counter virtualizer. + * @work: Non-NULL pointer to work to queue. + * + * Return: false if work was already on a queue, true otherwise. + * + * This is a convenience function that directly calls the underlying + * kbase_hwcnt_context's kbase_hwcnt_context_queue_work. + */ +bool kbase_hwcnt_virtualizer_queue_work(struct kbase_hwcnt_virtualizer *hvirt, + struct work_struct *work); + +#endif /* _KBASE_HWCNT_VIRTUALIZER_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h new file mode 100644 index 0000000..501c008 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Virtual interface for hardware counter watchdog. + */ + +#ifndef _KBASE_HWCNT_WATCHDOG_IF_H_ +#define _KBASE_HWCNT_WATCHDOG_IF_H_ + +#include <linux/types.h> + +/* + * Opaque structure of information used to create a watchdog timer interface. + */ +struct kbase_hwcnt_watchdog_info; + +/** + * typedef kbase_hwcnt_watchdog_callback_fn - Callback function when watchdog timer is done + * + * @user_data: Pointer to the callback user data. + */ +typedef void kbase_hwcnt_watchdog_callback_fn(void *user_data); + +/** + * typedef kbase_hwcnt_watchdog_enable_fn - Enable watchdog timer + * + * @timer: Non-NULL pointer to a watchdog timer interface context + * @period_ms: Period in milliseconds of the watchdog timer + * @callback: Non-NULL pointer to a watchdog callback function + * @user_data: Pointer to the user data, used when watchdog timer callback is called + * + * Return: 0 if the watchdog timer enabled successfully, error code otherwise. + */ +typedef int kbase_hwcnt_watchdog_enable_fn(const struct kbase_hwcnt_watchdog_info *timer, + u32 period_ms, + kbase_hwcnt_watchdog_callback_fn *callback, + void *user_data); + +/** + * typedef kbase_hwcnt_watchdog_disable_fn - Disable watchdog timer + * + * @timer: Non-NULL pointer to a watchdog timer interface context + */ +typedef void kbase_hwcnt_watchdog_disable_fn(const struct kbase_hwcnt_watchdog_info *timer); + +/** + * typedef kbase_hwcnt_watchdog_modify_fn - Modify watchdog timer's timeout + * + * @timer: Non-NULL pointer to a watchdog timer interface context + * @delay_ms: Watchdog timer expiration in milliseconds + */ +typedef void kbase_hwcnt_watchdog_modify_fn(const struct kbase_hwcnt_watchdog_info *timer, + u32 delay_ms); + +/** + * struct kbase_hwcnt_watchdog_interface - Hardware counter watchdog virtual interface. + * + * @timer: Immutable watchdog timer info + * @enable: Function ptr to enable watchdog + * @disable: Function ptr to disable watchdog + * @modify: Function ptr to modify watchdog + */ +struct kbase_hwcnt_watchdog_interface { + const struct kbase_hwcnt_watchdog_info *timer; + kbase_hwcnt_watchdog_enable_fn *enable; + kbase_hwcnt_watchdog_disable_fn *disable; + kbase_hwcnt_watchdog_modify_fn *modify; +}; + +#endif /* _KBASE_HWCNT_WATCHDOG_IF_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c new file mode 100644 index 0000000..4caa832 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#include "mali_kbase.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +#include "hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h" + +#include <linux/workqueue.h> +#include <linux/slab.h> + +/** + * struct kbase_hwcnt_watchdog_if_timer_info - Timer information for watchdog + * interface. + * + * @workq: Single threaded work queue in which to execute callbacks. + * @dwork: Worker to execute callback function. + * @timer_enabled: True if watchdog timer enabled, otherwise false + * @callback: Watchdog callback function + * @user_data: Pointer to user data passed as argument to the callback + * function + */ +struct kbase_hwcnt_watchdog_if_timer_info { + struct workqueue_struct *workq; + struct delayed_work dwork; + bool timer_enabled; + kbase_hwcnt_watchdog_callback_fn *callback; + void *user_data; +}; + +/** + * kbasep_hwcnt_watchdog_callback() - Watchdog callback + * + * @work: Work structure + * + * Function to be called in a work queue after watchdog timer has expired. + */ +static void kbasep_hwcnt_watchdog_callback(struct work_struct *const work) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const info = + container_of(work, struct kbase_hwcnt_watchdog_if_timer_info, dwork.work); + + if (info->callback) + info->callback(info->user_data); +} + +static int kbasep_hwcnt_watchdog_if_timer_enable( + const struct kbase_hwcnt_watchdog_info *const timer, u32 const period_ms, + kbase_hwcnt_watchdog_callback_fn *const callback, void *const user_data) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + + if (WARN_ON(!timer) || WARN_ON(!callback) || WARN_ON(timer_info->timer_enabled)) + return -EINVAL; + + timer_info->callback = callback; + timer_info->user_data = user_data; + + queue_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(period_ms)); + timer_info->timer_enabled = true; + + return 0; +} + +static void +kbasep_hwcnt_watchdog_if_timer_disable(const struct kbase_hwcnt_watchdog_info *const timer) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + + if (WARN_ON(!timer)) + return; + + if (!timer_info->timer_enabled) + return; + + cancel_delayed_work_sync(&timer_info->dwork); + timer_info->timer_enabled = false; +} + +static void +kbasep_hwcnt_watchdog_if_timer_modify(const struct kbase_hwcnt_watchdog_info *const timer, + u32 const delay_ms) +{ + struct kbase_hwcnt_watchdog_if_timer_info *const timer_info = (void *)timer; + + if (WARN_ON(!timer) || WARN_ON(!timer_info->timer_enabled)) + return; + + mod_delayed_work(timer_info->workq, &timer_info->dwork, msecs_to_jiffies(delay_ms)); +} + +void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *const watchdog_if) +{ + struct kbase_hwcnt_watchdog_if_timer_info *timer_info; + + if (WARN_ON(!watchdog_if)) + return; + + timer_info = (void *)watchdog_if->timer; + + if (WARN_ON(!timer_info)) + return; + + destroy_workqueue(timer_info->workq); + kfree(timer_info); + + *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ + .timer = NULL, .enable = NULL, .disable = NULL, .modify = NULL + }; +} + +int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *const watchdog_if) +{ + struct kbase_hwcnt_watchdog_if_timer_info *timer_info; + + if (WARN_ON(!watchdog_if)) + return -EINVAL; + + timer_info = kmalloc(sizeof(*timer_info), GFP_KERNEL); + if (!timer_info) + return -ENOMEM; + + *timer_info = (struct kbase_hwcnt_watchdog_if_timer_info){ .timer_enabled = false }; + + INIT_DELAYED_WORK(&timer_info->dwork, kbasep_hwcnt_watchdog_callback); + + *watchdog_if = (struct kbase_hwcnt_watchdog_interface){ + .timer = (void *)timer_info, + .enable = kbasep_hwcnt_watchdog_if_timer_enable, + .disable = kbasep_hwcnt_watchdog_if_timer_disable, + .modify = kbasep_hwcnt_watchdog_if_timer_modify, + }; + + timer_info->workq = alloc_workqueue("mali_hwc_watchdog_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (timer_info->workq) + return 0; + + kfree(timer_info); + return -ENOMEM; +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h new file mode 100644 index 0000000..a545ad3 --- /dev/null +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_watchdog_if_timer.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/* + * Concrete implementation of kbase_hwcnt_watchdog_interface for HWC backend + */ + +#ifndef _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ +#define _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ + +struct kbase_hwcnt_watchdog_interface; + +/** + * kbase_hwcnt_watchdog_if_timer_create() - Create a watchdog interface of hardware counter backend. + * + * @watchdog_if: Non-NULL pointer to watchdog interface that is filled in on creation success + * + * Return: 0 on success, error otherwise. + */ +int kbase_hwcnt_watchdog_if_timer_create(struct kbase_hwcnt_watchdog_interface *watchdog_if); + +/** + * kbase_hwcnt_watchdog_if_timer_destroy() - Destroy a watchdog interface of hardware counter + * backend. + * + * @watchdog_if: Pointer to watchdog interface to destroy + */ +void kbase_hwcnt_watchdog_if_timer_destroy(struct kbase_hwcnt_watchdog_interface *watchdog_if); + +#endif /* _KBASE_HWCNT_WATCHDOG_IF_TIMER_H_ */ |