summaryrefslogtreecommitdiff
path: root/mali_kbase/hwcnt
diff options
context:
space:
mode:
authorJörg Wagner <jorwag@google.com>2023-12-14 09:44:26 +0000
committerJörg Wagner <jorwag@google.com>2023-12-14 09:44:26 +0000
commit049a542207ed694271316782397b78b2e202086a (patch)
tree105e9378d4d5062dc72109fdd4a77c915bd9425d /mali_kbase/hwcnt
parente61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff)
downloadgpu-049a542207ed694271316782397b78b2e202086a.tar.gz
Update KMD to r47p0
Provenance: ipdelivery@ad01e50d640910a99224382bb227e6d4de627657 Change-Id: I19ac9bce34a5c5a319c1b4a388e8b037b3dfe6e7
Diffstat (limited to 'mali_kbase/hwcnt')
-rw-r--r--mali_kbase/hwcnt/Kbuild1
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h16
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c397
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h26
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h102
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c60
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c221
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c19
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt.c36
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c782
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h137
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c298
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h330
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.c362
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_types.h631
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c18
16 files changed, 1824 insertions, 1612 deletions
diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild
index 8c8775f..d24d8ef 100644
--- a/mali_kbase/hwcnt/Kbuild
+++ b/mali_kbase/hwcnt/Kbuild
@@ -21,7 +21,6 @@
mali_kbase-y += \
hwcnt/mali_kbase_hwcnt.o \
hwcnt/mali_kbase_hwcnt_gpu.o \
- hwcnt/mali_kbase_hwcnt_gpu_narrow.o \
hwcnt/mali_kbase_hwcnt_types.o \
hwcnt/mali_kbase_hwcnt_virtualizer.o \
hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
index 6cfa6f5..cc3ba98 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -123,11 +123,21 @@ kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend,
* typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with
* the backend.
* @backend: Non-NULL pointer to backend.
+ * @dump_buffer: Pointer to an accumulated dump buffer to update or NULL.
+ * @enable_map: Pointer to enable map specifying enabled counters. Must be NULL if no @dump_buffer
*
* If the backend is already disabled, does nothing.
- * Any undumped counter values since the last dump get will be lost.
+ *
+ * Any undumped counter values since the last dump get will be lost. However, Undumped block state
+ * can be retained by the backend.
+ *
+ * @dump_buffer and @enable_map gives the backend an opportunity to update an existing accumulated
+ * buffer with state information, and for the caller take ownership of it. In particular, the
+ * caller can use this when they require such information whilst the counter dumps are disabled.
*/
-typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend);
+typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map);
/**
* typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index 27acfc6..d7911ae 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -44,6 +44,9 @@
#define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000)
#endif /* IS_FPGA && !NO_MALI */
+/* Used to check for a sample in which all counters in the block are disabled */
+#define HWCNT_BLOCK_EMPTY_SAMPLE (2)
+
/**
* enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states.
*
@@ -172,15 +175,16 @@ struct kbase_hwcnt_backend_csf_info {
/**
* struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout
- * information.
- * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are
- * sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
- * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
+ * information, as defined by the spec.
* @fe_cnt: Front end block count.
* @tiler_cnt: Tiler block count.
* @mmu_l2_cnt: Memory system (MMU and L2 cache) block count.
* @shader_cnt: Shader Core block count.
- * @fw_block_cnt: Total number of firmware counters blocks.
+ * @fw_block_cnt: Total number of firmware counter blocks, with a single
+ * global FW block and a block per CSG.
+ * @hw_block_cnt: Total number of hardware counter blocks. The hw counters blocks are
+ * sub-categorized into 4 classes: front-end, tiler, memory system, and shader.
+ * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt.
* @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt).
* @shader_avail_mask: Bitmap of all shader cores in the system.
* @enable_mask_offset: Offset in array elements of enable mask in each block
@@ -190,12 +194,12 @@ struct kbase_hwcnt_backend_csf_info {
* @values_per_block: For any block, the number of counters in total (header + payload).
*/
struct kbase_hwcnt_csf_physical_layout {
- u8 hw_block_cnt;
u8 fe_cnt;
u8 tiler_cnt;
u8 mmu_l2_cnt;
u8 shader_cnt;
u8 fw_block_cnt;
+ u8 hw_block_cnt;
u8 block_cnt;
u64 shader_avail_mask;
size_t enable_mask_offset;
@@ -220,6 +224,13 @@ struct kbase_hwcnt_csf_physical_layout {
* @old_sample_buf: HWC sample buffer to save the previous values
* for delta calculation, size
* prfcnt_info.dump_bytes.
+ * @block_states: Pointer to array of block_state values for all
+ * blocks.
+ * @to_user_block_states: Block state buffer for client user.
+ * @accum_all_blk_stt: Block state to accumulate for all known blocks
+ * on next sample.
+ * @sampled_all_blk_stt: Block State to accumulate for all known blocks
+ * into the current sample.
* @watchdog_last_seen_insert_idx: The insert index which watchdog has last
* seen, to check any new firmware automatic
* samples generated during the watchdog
@@ -243,6 +254,8 @@ struct kbase_hwcnt_csf_physical_layout {
* @hwc_dump_work: Worker to accumulate samples.
* @hwc_threshold_work: Worker for consuming available samples when
* threshold interrupt raised.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
*/
struct kbase_hwcnt_backend_csf {
struct kbase_hwcnt_backend_csf_info *info;
@@ -253,6 +266,10 @@ struct kbase_hwcnt_backend_csf {
u64 *to_user_buf;
u64 *accum_buf;
u32 *old_sample_buf;
+ blk_stt_t *block_states;
+ blk_stt_t *to_user_block_states;
+ blk_stt_t accum_all_blk_stt;
+ blk_stt_t sampled_all_blk_stt;
u32 watchdog_last_seen_insert_idx;
struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf;
void *ring_buf_cpu_base;
@@ -265,15 +282,45 @@ struct kbase_hwcnt_backend_csf {
struct workqueue_struct *hwc_dump_workq;
struct work_struct hwc_dump_work;
struct work_struct hwc_threshold_work;
+ size_t num_l2_slices;
+ u64 shader_present_bitmap;
};
static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
{
- WARN_ON(!csf_info);
+ if (WARN_ON(!csf_info))
+ return false;
+
csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx);
return (csf_info->backend != NULL);
}
+void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
+ size_t num_l2_slices, u64 shader_present_bitmap)
+{
+ struct kbase_hwcnt_backend_csf_info *csf_info;
+
+ if (!iface)
+ return;
+
+ csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info;
+
+ /* Early out if the backend does not exist. */
+ if (!csf_info || !csf_info->backend)
+ return;
+
+ if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED))
+ return;
+
+ if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) ||
+ WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) !=
+ shader_present_bitmap))
+ return;
+
+ csf_info->backend->num_l2_slices = num_l2_slices;
+ csf_info->backend->shader_present_bitmap = shader_present_bitmap;
+}
+
/**
* kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count
* tracking.
@@ -295,8 +342,7 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
clk_enable_map);
- kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk))
backend_csf->prev_cycle_count[clk] = cycle_counts[clk];
}
@@ -317,8 +363,7 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b
backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts,
backend_csf->clk_enable_map);
- kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) {
backend_csf->cycle_count_elapsed[clk] =
cycle_counts[clk] - backend_csf->prev_cycle_count[clk];
@@ -340,29 +385,29 @@ static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *bac
/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
* guarantee headers are
- * enabled if any counter is
- * required.
+ * enabled.
*@phys_enable_map: HWC physical enable map to be processed.
*/
-static void
-kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map)
+void kbasep_hwcnt_backend_csf_process_enable_map(
+ struct kbase_hwcnt_physical_enable_map *phys_enable_map)
{
WARN_ON(!phys_enable_map);
- /* Enable header if any counter is required from user, the header is
- * controlled by bit 0 of the enable mask.
+ /* Unconditionally enable each block header and first counter,
+ * the header is controlled by bit 0 of the enable mask.
*/
- if (phys_enable_map->fe_bm)
- phys_enable_map->fe_bm |= 1;
+ phys_enable_map->fe_bm |= 3;
- if (phys_enable_map->tiler_bm)
- phys_enable_map->tiler_bm |= 1;
+ phys_enable_map->tiler_bm |= 3;
- if (phys_enable_map->mmu_l2_bm)
- phys_enable_map->mmu_l2_bm |= 1;
+ phys_enable_map->mmu_l2_bm |= 3;
+
+ phys_enable_map->shader_bm |= 3;
+
+ phys_enable_map->fw_bm |= 3;
+
+ phys_enable_map->csg_bm |= 3;
- if (phys_enable_map->shader_bm)
- phys_enable_map->shader_bm |= 1;
}
static void kbasep_hwcnt_backend_csf_init_layout(
@@ -371,32 +416,35 @@ static void kbasep_hwcnt_backend_csf_init_layout(
{
size_t shader_core_cnt;
size_t values_per_block;
- size_t fw_blocks_count;
- size_t hw_blocks_count;
+ size_t fw_block_cnt;
+ size_t hw_block_cnt;
+ size_t core_cnt;
+
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
- shader_core_cnt = fls64(prfcnt_info->core_mask);
+ shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask);
values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
- fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
- hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
+ fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
+ hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
+
+ core_cnt = shader_core_cnt;
/* The number of hardware counters reported by the GPU matches the legacy guess-work we
* have done in the past
*/
- WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT +
- KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
- prfcnt_info->l2_count + shader_core_cnt);
+ WARN_ON(hw_block_cnt != KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
+ prfcnt_info->l2_count + core_cnt);
*phys_layout = (struct kbase_hwcnt_csf_physical_layout){
.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
.mmu_l2_cnt = prfcnt_info->l2_count,
.shader_cnt = shader_core_cnt,
- .fw_block_cnt = fw_blocks_count,
- .hw_block_cnt = hw_blocks_count,
- .block_cnt = fw_blocks_count + hw_blocks_count,
+ .fw_block_cnt = fw_block_cnt,
+ .hw_block_cnt = hw_block_cnt,
+ .block_cnt = fw_block_cnt + hw_block_cnt,
.shader_avail_mask = prfcnt_info->core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = values_per_block,
@@ -409,10 +457,14 @@ static void
kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf)
{
size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+ size_t block_state_bytes = backend_csf->phys_layout.block_cnt *
+ KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE;
memset(backend_csf->to_user_buf, 0, user_buf_bytes);
memset(backend_csf->accum_buf, 0, user_buf_bytes);
memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes);
+ memset(backend_csf->block_states, 0, block_state_bytes);
+ memset(backend_csf->to_user_block_states, 0, block_state_bytes);
}
static void
@@ -450,40 +502,130 @@ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_cs
static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf)
{
size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes;
+ size_t block_state_bytes = backend_csf->phys_layout.block_cnt *
+ KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE;
/* Copy the data into the sample and wait for the user to get it. */
memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes);
+ memcpy(backend_csf->to_user_block_states, backend_csf->block_states, block_state_bytes);
/* After copied data into user sample, clear the accumulator values to
* prepare for the next accumulator, such as the next request or
* threshold.
*/
memset(backend_csf->accum_buf, 0, user_buf_bytes);
+ memset(backend_csf->block_states, 0, block_state_bytes);
+}
+
+/**
+ * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
+ * information from a sample.
+ * @phys_layout: Physical memory layout information of HWC
+ * sample buffer.
+ * @enable_mask: Counter enable mask for the block whose state is being updated.
+ * @enable_state: The CSF backend internal enabled state.
+ * @exiting_protm: Whether or not the sample is taken when the GPU is exiting
+ * protected mode.
+ * @block_idx: Index of block within the ringbuffer.
+ * @block_state: Pointer to existing block state of the block whose state is being
+ * updated.
+ * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling.
+ */
+static void kbasep_hwcnt_backend_csf_update_block_state(
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask,
+ enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm,
+ size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode)
+{
+ /* Offset of shader core blocks from the start of the HW blocks in the sample */
+ size_t shader_core_block_offset =
+ (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt);
+ bool is_shader_core_block;
+
+ is_shader_core_block = block_idx >= shader_core_block_offset;
+
+ /* Set power bits for the block state for the block, for the sample */
+ switch (enable_state) {
+ /* Disabled states */
+ case KBASE_HWCNT_BACKEND_CSF_DISABLED:
+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
+ case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER:
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_OFF);
+ break;
+ /* Enabled states */
+ case KBASE_HWCNT_BACKEND_CSF_ENABLED:
+ case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED:
+ if (!is_shader_core_block)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_ON);
+ else if (!exiting_protm) {
+ /* When not exiting protected mode, a zero enable mask on a shader core
+ * counter block indicates the block was powered off for the sample, and
+ * a non-zero counter enable mask indicates the block was powered on for
+ * the sample.
+ */
+ kbase_hwcnt_block_state_append(block_state,
+ (enable_mask ? KBASE_HWCNT_STATE_ON :
+ KBASE_HWCNT_STATE_OFF));
+ }
+ break;
+ /* Error states */
+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER:
+ case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR:
+ default:
+ /* Do nothing */
+ break;
+ }
+
+ /* The following four cases apply to a block state in either normal mode or protected mode:
+ * 1. GPU executing in normal mode: Only set normal mode bit.
+ * 2. First sample request after GPU enters protected mode: Set both normal mode and
+ * protected mode bit. In this case, there will at least be one sample to accumulate
+ * in the ring buffer which was automatically triggered before GPU entered protected
+ * mode.
+ * 3. Subsequent sample requests while GPU remains in protected mode: Only set protected
+ * mode bit. In this case, the ring buffer should be empty and dump should return 0s but
+ * block state should be updated accordingly. This case is not handled here.
+ * 4. Samples requested after GPU exits protected mode: Set both protected mode and normal
+ * mode bits.
+ */
+ if (exiting_protm || fw_in_protected_mode)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_PROTECTED |
+ KBASE_HWCNT_STATE_NORMAL);
+ else
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL);
}
static void kbasep_hwcnt_backend_csf_accumulate_sample(
const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
- u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples)
+ u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf,
+ blk_stt_t *const block_states, bool clearing_samples,
+ enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode)
{
size_t block_idx;
const u32 *old_block = old_sample_buf;
const u32 *new_block = new_sample_buf;
u64 *acc_block = accum_buf;
+ /* Flag to indicate whether current sample is exiting protected mode. */
+ bool exiting_protm = false;
const size_t values_per_block = phys_layout->values_per_block;
- /* Performance counter blocks for firmware are stored before blocks for hardware.
- * We skip over the firmware's performance counter blocks (counters dumping is not
- * supported for firmware blocks, only hardware ones).
+ /* The block pointers now point to the first HW block, which is always a CSHW/front-end
+ * block. The counter enable mask for this block can be checked to determine whether this
+ * sample is taken after leaving protected mode - this is the only scenario where the CSHW
+ * block counter enable mask has only the first bit set, and no others. In this case,
+ * the values in this sample would not be meaningful, so they don't need to be accumulated.
*/
- old_block += values_per_block * phys_layout->fw_block_cnt;
- new_block += values_per_block * phys_layout->fw_block_cnt;
+ exiting_protm = (new_block[phys_layout->enable_mask_offset] == 1);
- for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt;
- block_idx++) {
+ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
+ /* Update block state with information of the current sample */
+ kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask,
+ enable_state, exiting_protm, block_idx,
+ &block_states[block_idx],
+ fw_in_protected_mode);
- if (new_enable_mask == 0) {
+ if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
/* Hardware block was unavailable or we didn't turn on
* any counters. Do nothing.
*/
@@ -492,7 +634,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
* enabled. We need to update the accumulation buffer.
*/
size_t ctr_idx;
-
/* Unconditionally copy the headers. */
for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) {
acc_block[ctr_idx] = new_block[ctr_idx];
@@ -517,8 +658,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
* saturating at their maximum value.
*/
if (!clearing_samples) {
- if (old_enable_mask == 0) {
- /* Hardware block was previously
+ if (!(old_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
+ /* Block was previously
* unavailable. Accumulate the new
* counters only, as we know previous
* values are zeroes.
@@ -545,15 +686,14 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
}
}
}
+
old_block += values_per_block;
new_block += values_per_block;
acc_block += values_per_block;
}
-
WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
- WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) -
- (values_per_block * phys_layout->fw_block_cnt));
+ WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
(void)dump_bytes;
}
@@ -569,10 +709,23 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
u32 *old_sample_buf = backend_csf->old_sample_buf;
u32 *new_sample_buf = old_sample_buf;
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout;
+
+ if (extract_index_to_start == insert_index_to_stop) {
+ /* No samples to accumulate but block states need to be updated for dump. */
+ size_t block_idx;
- if (extract_index_to_start == insert_index_to_stop)
- /* No samples to accumulate. Early out. */
+ for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) {
+ /* Set protected mode bit for block state if GPU is in protected mode,
+ * otherwise set the normal mode bit.
+ */
+ kbase_hwcnt_block_state_append(&backend_csf->block_states[block_idx],
+ backend_csf->info->fw_in_protected_mode ?
+ KBASE_HWCNT_STATE_PROTECTED :
+ KBASE_HWCNT_STATE_NORMAL);
+ }
return;
+ }
/* Sync all the buffers to CPU side before read the data. */
backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx,
@@ -587,11 +740,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
-
- kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout,
- buf_dump_bytes, backend_csf->accum_buf,
- old_sample_buf, new_sample_buf,
- clearing_samples);
+ kbasep_hwcnt_backend_csf_accumulate_sample(
+ phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf,
+ new_sample_buf, backend_csf->block_states, clearing_samples,
+ backend_csf->enable_state, backend_csf->info->fw_in_protected_mode);
old_sample_buf = new_sample_buf;
}
@@ -875,6 +1027,8 @@ kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *bac
enable->shader_bm = phys_enable_map.shader_bm;
enable->tiler_bm = phys_enable_map.tiler_bm;
enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm;
+ enable->fw_bm = phys_enable_map.fw_bm;
+ enable->csg_bm = phys_enable_map.csg_bm;
enable->counter_set = phys_counter_set;
enable->clk_enable_map = enable_map->clk_enable_map;
}
@@ -893,6 +1047,17 @@ kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx);
+ /* Enabling counters is an indication that the power may have previously been off for all
+ * blocks.
+ *
+ * In any case, the counters would not have been counting recently, so an 'off' block state
+ * is an approximation for this.
+ *
+ * This will be transferred to the dump only after a dump_wait(), or dump_disable() in
+ * cases where the caller requested such information. This is to handle when a
+ * dump_enable() happens in between dump_wait() and dump_get().
+ */
+ kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable);
/* enable_state should be DISABLED before we transfer it to enabled */
@@ -956,13 +1121,19 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete(
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend)
+static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
unsigned long flags = 0UL;
struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend;
bool do_disable = false;
- WARN_ON(!backend_csf);
+ if (WARN_ON(!backend_csf ||
+ (dump_buffer && (backend_csf->info->metadata != dump_buffer->metadata)) ||
+ (enable_map && (backend_csf->info->metadata != enable_map->metadata)) ||
+ (dump_buffer && !enable_map)))
+ return;
backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags);
@@ -1048,6 +1219,42 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba
* for next enable.
*/
kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf);
+
+ /* Disabling HWCNT is an indication that blocks have been powered off. This is important to
+ * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can
+ * know if they are being powered off.
+ *
+ * In any case, even if they weren't really powered off, we won't be counting whilst
+ * disabled.
+ *
+ * Update the block state information in the block state accumulator to show this, so that
+ * in the next dump blocks will have been seen as powered off for some of the time.
+ */
+ kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
+
+ if (dump_buffer) {
+ /* In some use-cases, the caller will need the information whilst the counters are
+ * disabled, but will not be able to call into the backend to dump them. Instead,
+ * they have an opportunity here to request them to be accumulated into their
+ * buffer immediately.
+ *
+ * This consists of taking a sample of the accumulated block state (as though a
+ * real dump_get() had happened), then transfer ownership of that to the caller
+ * (i.e. erasing our copy of it).
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt,
+ &backend_csf->accum_all_blk_stt);
+ kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map,
+ backend_csf->sampled_all_blk_stt);
+ /* Now the block state has been passed out into the caller's own accumulation
+ * buffer, clear our own accumulated and sampled block state - ownership has been
+ * transferred.
+ */
+ kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ }
}
/* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */
@@ -1183,6 +1390,16 @@ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backen
backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags);
+ /* Now that we've completed a sample, also sample+clear the accumulated block state.
+ *
+ * This is to ensure that a dump_enable() that happens in between dump_wait() and
+ * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block
+ * state is reported at the actual time that counters are being sampled.
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt,
+ &backend_csf->accum_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+
return errcode;
}
@@ -1223,8 +1440,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
return -EINVAL;
/* Extract elapsed cycle count for each clock domain if enabled. */
- kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
@@ -1238,7 +1454,20 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
* as it is undefined to call this function without a prior succeeding
* one to dump_wait().
*/
- ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate);
+ ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
+ backend_csf->to_user_block_states, dst_enable_map,
+ backend_csf->num_l2_slices,
+ backend_csf->shader_present_bitmap, accumulate);
+
+ /* If no error occurred (zero ret value), then update block state for all blocks in the
+ * accumulation with the current sample's block state.
+ */
+ if (!ret) {
+ kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map,
+ backend_csf->sampled_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ }
return ret;
}
@@ -1269,6 +1498,12 @@ static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *bac
kfree(backend_csf->to_user_buf);
backend_csf->to_user_buf = NULL;
+ kfree(backend_csf->block_states);
+ backend_csf->block_states = NULL;
+
+ kfree(backend_csf->to_user_block_states);
+ backend_csf->to_user_block_states = NULL;
+
kfree(backend_csf);
}
@@ -1285,6 +1520,7 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *
{
struct kbase_hwcnt_backend_csf *backend_csf = NULL;
int errcode = -ENOMEM;
+ size_t block_state_bytes;
WARN_ON(!csf_info);
WARN_ON(!out_backend);
@@ -1308,6 +1544,17 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *
if (!backend_csf->to_user_buf)
goto err_alloc_user_sample_buf;
+ /* Allocate space to store block state values for each block */
+ block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES *
+ KBASE_HWCNT_BLOCK_STATE_STRIDE;
+ backend_csf->block_states = kzalloc(block_state_bytes, GFP_KERNEL);
+ if (!backend_csf->block_states)
+ goto err_alloc_block_states_buf;
+
+ backend_csf->to_user_block_states = kzalloc(block_state_bytes, GFP_KERNEL);
+ if (!backend_csf->to_user_block_states)
+ goto err_alloc_user_block_state_buf;
+
errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt,
&backend_csf->ring_buf_cpu_base,
&backend_csf->ring_buf);
@@ -1343,6 +1590,8 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info *
complete_all(&backend_csf->dump_completed);
backend_csf->user_requested = false;
backend_csf->watchdog_last_seen_insert_idx = 0;
+ kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
*out_backend = backend_csf;
return 0;
@@ -1351,6 +1600,12 @@ err_alloc_workqueue:
backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx,
backend_csf->ring_buf);
err_ring_buf_alloc:
+ kfree(backend_csf->to_user_block_states);
+ backend_csf->to_user_block_states = NULL;
+err_alloc_user_block_state_buf:
+ kfree(backend_csf->block_states);
+ backend_csf->block_states = NULL;
+err_alloc_block_states_buf:
kfree(backend_csf->to_user_buf);
backend_csf->to_user_buf = NULL;
err_alloc_user_sample_buf:
@@ -1417,7 +1672,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend)
if (!backend)
return;
- kbasep_hwcnt_backend_csf_dump_disable(backend);
+ kbasep_hwcnt_backend_csf_dump_disable(backend, NULL, NULL);
/* Set the backend in csf_info to NULL so we won't handle any external
* notification anymore since we are terminating.
@@ -1828,7 +2083,21 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *
if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS)
return -EIO;
+ /* We should reject initializing the metadata for any malformed
+ * firmware size. The legitimate firmware sizes are as follows:
+ * 1. fw_size == 0 on older GPUs
+ * 2. fw_size == block_size on GPUs that support FW counters but not CSG counters
+ * 3. fw_size == (1 + #CSG) * block size on GPUs that support CSG counters
+ */
+ if ((csf_info->prfcnt_info.prfcnt_fw_size != 0) &&
+ (csf_info->prfcnt_info.prfcnt_fw_size != csf_info->prfcnt_info.prfcnt_block_size) &&
+ (csf_info->prfcnt_info.prfcnt_fw_size !=
+ ((csf_info->prfcnt_info.csg_count + 1) * csf_info->prfcnt_info.prfcnt_block_size)))
+ return -EINVAL;
+
+ gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0;
gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
+ gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count;
gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
gpu_info.prfcnt_values_per_block =
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
index 9c5a5c9..2487db2 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,6 +31,8 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+struct kbase_hwcnt_physical_enable_map;
+
/**
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
* interface.
@@ -115,6 +117,28 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i
void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface);
/**
+ * kbase_hwcnt_backend_csf_set_hw_availability() - CSF HWC backend function to
+ * set current HW configuration.
+ * HWC must be disabled before
+ * this function is called.
+ * @iface: Non-NULL pointer to HWC backend interface.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ */
+void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
+ size_t num_l2_slices,
+ uint64_t shader_present_bitmap);
+
+/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
+ * guarantee headers are
+ * enabled if any counter is
+ * required.
+ * @phys_enable_map: HWC physical enable map to be processed.
+ */
+void kbasep_hwcnt_backend_csf_process_enable_map(
+ struct kbase_hwcnt_physical_enable_map *phys_enable_map);
+
+/**
* kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample
* complete interrupt handler.
* @iface: Non-NULL pointer to HWC backend interface.
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
index 382a3ad..65bb965 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,8 @@ struct kbase_hwcnt_backend_csf_if_ring_buf;
* @shader_bm: Shader counters selection bitmask.
* @tiler_bm: Tiler counters selection bitmask.
* @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ * @fw_bm: FW counters selection bitmask
+ * @csg_bm: FW CSG counters selection bitmask.
* @counter_set: The performance counter set to enable.
* @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
* counter for a given clock domain.
@@ -48,6 +50,8 @@ struct kbase_hwcnt_backend_csf_if_enable {
u32 shader_bm;
u32 tiler_bm;
u32 mmu_l2_bm;
+ u32 fw_bm;
+ u32 csg_bm;
u8 counter_set;
u64 clk_enable_map;
};
@@ -63,6 +67,7 @@ struct kbase_hwcnt_backend_csf_if_enable {
* counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size.
* @prfcnt_block_size: Bytes of each performance counter block.
* @l2_count: The MMU L2 cache count.
+ * @csg_count: The total number of CSGs in the system
* @core_mask: Shader core mask.
* @clk_cnt: Clock domain count in the system.
* @clearing_samples: Indicates whether counters are cleared after each sample
@@ -74,6 +79,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
size_t dump_bytes;
size_t prfcnt_block_size;
size_t l2_count;
+ u32 csg_count;
u64 core_mask;
u8 clk_cnt;
bool clearing_samples;
@@ -85,8 +91,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
* held.
* @ctx: Non-NULL pointer to a CSF context.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock.
@@ -95,8 +101,8 @@ kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if
* @flags: Pointer to the memory location that would store the previous
* interrupt state.
*/
-typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- unsigned long *flags);
+typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long *flags);
/**
* typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock.
@@ -105,8 +111,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_i
* @flags: Previously stored interrupt state when Scheduler interrupt
* spinlock was acquired.
*/
-typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- unsigned long flags);
+typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ unsigned long flags);
/**
* typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance
@@ -115,7 +121,7 @@ typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf
* @prfcnt_info: Non-NULL pointer to struct where performance counter
* information should be stored.
*/
-typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
+typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info);
@@ -135,10 +141,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn(
*
* Return: 0 on success, else error code.
*/
-typedef int
-kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- u32 buf_count, void **cpu_dump_base,
- struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
+typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base,
+ struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers
@@ -157,10 +162,10 @@ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_c
* Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU
* are correctly observed.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- u32 buf_index_first, u32 buf_index_last, bool for_cpu);
+typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, u32 buf_index_first,
+ u32 buf_index_last, bool for_cpu);
/**
* typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for
@@ -169,9 +174,9 @@ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ct
* @ctx: Non-NULL pointer to a CSF interface context.
* @ring_buf: Non-NULL pointer to the ring buffer which to be freed.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
+typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_free_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf);
/**
* typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current
@@ -181,7 +186,8 @@ kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ct
*
* Return: CSF interface timestamp in nanoseconds.
*/
-typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware
@@ -192,10 +198,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backen
*
* Requires lock to be taken before calling.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
- struct kbase_hwcnt_backend_csf_if_enable *enable);
+typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx,
+ struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf,
+ struct kbase_hwcnt_backend_csf_if_enable *enable);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter
@@ -204,7 +210,8 @@ kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump.
@@ -213,7 +220,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backe
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx);
+typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx);
/**
* typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and
@@ -226,8 +234,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backe
*
* Requires lock to be taken before calling.
*/
-typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- u32 *extract_index, u32 *insert_index);
+typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index);
/**
* typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract
@@ -239,9 +247,8 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backen
*
* Requires lock to be taken before calling.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- u32 extract_index);
+typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index);
/**
* typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current
@@ -255,9 +262,8 @@ kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_i
*
* Requires lock to be taken before calling.
*/
-typedef void
-kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx,
- u64 *cycle_counts, u64 clk_enable_map);
+typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)(
+ struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, u64 clk_enable_map);
/**
* struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual
@@ -283,20 +289,20 @@ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf
*/
struct kbase_hwcnt_backend_csf_if {
struct kbase_hwcnt_backend_csf_if_ctx *ctx;
- kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held;
- kbase_hwcnt_backend_csf_if_lock_fn *lock;
- kbase_hwcnt_backend_csf_if_unlock_fn *unlock;
- kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info;
- kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc;
- kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync;
- kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free;
- kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns;
- kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable;
- kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable;
- kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request;
- kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes;
- kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index;
- kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count;
+ kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held;
+ kbase_hwcnt_backend_csf_if_lock_fn lock;
+ kbase_hwcnt_backend_csf_if_unlock_fn unlock;
+ kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info;
+ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc;
+ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync;
+ kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free;
+ kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns;
+ kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable;
+ kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable;
+ kbase_hwcnt_backend_csf_if_dump_request_fn dump_request;
+ kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes;
+ kbase_hwcnt_backend_csf_if_set_extract_index_fn set_extract_index;
+ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn get_gpu_cycle_count;
};
#endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index c8cf934..1b7a116 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -24,7 +24,7 @@
*/
#include <mali_kbase.h>
-#include <gpu/mali_kbase_gpu_regmap.h>
+#include <hw_access/mali_kbase_hw_access_regmap.h>
#include <device/mali_kbase_device.h>
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
@@ -39,7 +39,6 @@
#include <linux/log2.h>
#include "mali_kbase_ccswe.h"
-
/* Ring buffer virtual address start at 4GB */
#define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32)
@@ -206,6 +205,20 @@ kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_c
kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener);
}
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+/**
+ * kbasep_hwcnt_backend_csf_core_mask() - Obtain Core Mask - MAX Core ID
+ *
+ * @gpu_props: gpu_props structure
+ *
+ * Return: calculated core mask (maximum Core ID)
+ */
+static u64 kbasep_hwcnt_backend_csf_core_mask(struct kbase_gpu_props *gpu_props)
+{
+ return gpu_props->coherency_info.group.core_mask;
+}
+#endif
+
static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
struct kbase_hwcnt_backend_csf_if_ctx *ctx,
struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info)
@@ -234,6 +247,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
u32 prfcnt_size;
u32 prfcnt_hw_size;
u32 prfcnt_fw_size;
+ u32 csg_count;
+ u32 fw_block_count = 0;
u32 prfcnt_block_size =
KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES;
@@ -242,28 +257,41 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
kbdev = fw_ctx->kbdev;
+ csg_count = kbdev->csf.global_iface.group_num;
prfcnt_size = kbdev->csf.global_iface.prfcnt_size;
prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size);
prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size);
- fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
/* Read the block size if the GPU has the register PRFCNT_FEATURES
* which was introduced in architecture version 11.x.7.
*/
- if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >=
- GPU_ID2_PRODUCT_TTUX) {
- prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(
- kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES)))
+ if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES))) {
+ prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(KBASE_REG_READ(
+ kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES)))
<< 8;
}
+ /* Extra sanity check to ensure that we support two different configurations:
+ * a global FW block without CSG blocks and a global FW block with CSG blocks.
+ */
+ if (!prfcnt_fw_size)
+ fw_block_count = 0;
+ else if (prfcnt_fw_size == prfcnt_block_size)
+ fw_block_count = 1;
+ else if (prfcnt_fw_size == ((1 + csg_count) * prfcnt_block_size))
+ fw_block_count = 1 + csg_count;
+ else
+ WARN_ON_ONCE(true);
+
+ fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size;
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
.prfcnt_hw_size = prfcnt_hw_size,
.prfcnt_fw_size = prfcnt_fw_size,
.dump_bytes = fw_ctx->buf_bytes,
.prfcnt_block_size = prfcnt_block_size,
- .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices,
- .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask,
+ .l2_count = kbdev->gpu_props.num_l2_slices,
+ .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
+ .csg_count = fw_block_count > 1 ? csg_count : 0,
.clk_cnt = fw_ctx->clk_cnt,
.clearing_samples = true,
};
@@ -284,7 +312,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
struct page **page_list;
void *cpu_addr;
int ret;
- int i;
+ size_t i;
size_t num_pages;
u64 flags;
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf;
@@ -330,7 +358,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Get physical page for the buffer */
ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages,
phys, false, NULL);
- if (ret != num_pages)
+ if ((size_t)ret != num_pages)
goto phys_mem_pool_alloc_error;
/* Get the CPU virtual address */
@@ -342,7 +370,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
goto vmap_error;
flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX |
- KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+ KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE);
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
@@ -508,6 +536,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx
(struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx;
struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf =
(struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf;
+ u32 csg_mask;
WARN_ON(!ctx);
WARN_ON(!ring_buf);
@@ -516,6 +545,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx
kbdev = fw_ctx->kbdev;
global_iface = &kbdev->csf.global_iface;
+ csg_mask = (1 << kbdev->csf.global_iface.group_num) - 1;
/* Configure */
prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count);
@@ -536,6 +566,12 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm);
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_FW_EN, enable->fw_bm);
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_EN, enable->csg_bm);
+
+ /* Enable all of the CSGs by default. */
+ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_SELECT, csg_mask);
+
/* Configure the HWC set and buffer size */
kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config);
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 8b3caac..4df7dd4 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,30 +80,40 @@ struct kbase_hwcnt_jm_physical_layout {
/**
* struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
- * @info: Info used to create the backend.
- * @kctx: KBase context used for GPU memory allocation and
- * counter dumping.
- * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
- * @cpu_dump_va: CPU mapping of gpu_dump_va.
- * @vmap: Dump buffer vmap.
- * @to_user_buf: HWC sample buffer for client user, size
- * metadata.dump_buf_bytes.
- * @enabled: True if dumping has been enabled, else false.
- * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
- * dumping.
- * @curr_config: Current allocated hardware resources to correctly map the
- * source raw dump buffer to the destination dump buffer.
- * @clk_enable_map: The enable map specifying enabled clock domains.
- * @cycle_count_elapsed:
- * Cycle count elapsed for a given sample period.
- * The top clock cycle, index 0, is read directly from
- * hardware, but the other clock domains need to be
- * calculated with software estimation.
- * @prev_cycle_count: Previous cycle count to calculate the cycle count for
- * sample period.
- * @rate_listener: Clock rate listener callback state.
- * @ccswe_shader_cores: Shader cores cycle count software estimator.
- * @phys_layout: Physical memory layout information of HWC sample buffer.
+ * @info: Info used to create the backend.
+ * @kctx: KBase context used for GPU memory allocation and
+ * counter dumping.
+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va: CPU mapping of gpu_dump_va.
+ * @vmap: Dump buffer vmap.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
+ * @enabled: True if dumping has been enabled, else false.
+ * @accum_all_blk_stt: Block State to accumulate on next sample, for all types
+ * of block.
+ * @sampled_all_blk_stt: Block State to accumulate into the current sample, for
+ * all types of block.
+ * @debug_core_mask: User-set mask of shader cores that can be used.
+ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
+ * dumping.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
+ * @max_core_mask: Core mask of all cores allocated to the GPU (non
+ * virtualized platforms) or resource group (virtualized
+ * platforms).
+ * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non
+ * virtualized platforms) or resource group (virtualized
+ * platforms).
+ * @clk_enable_map: The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed: Cycle count elapsed for a given sample period.
+ * The top clock cycle, index 0, is read directly from
+ * hardware, but the other clock domains need to be
+ * calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ * sample period.
+ * @rate_listener: Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ * @phys_layout: Physical memory layout information of HWC sample buffer.
*/
struct kbase_hwcnt_backend_jm {
const struct kbase_hwcnt_backend_jm_info *info;
@@ -113,8 +123,13 @@ struct kbase_hwcnt_backend_jm {
struct kbase_vmap_struct *vmap;
u64 *to_user_buf;
bool enabled;
+ blk_stt_t accum_all_blk_stt;
+ blk_stt_t sampled_all_blk_stt;
+ u64 debug_core_mask;
u64 pm_core_mask;
struct kbase_hwcnt_curr_config curr_config;
+ u64 max_core_mask;
+ size_t max_l2_slices;
u64 clk_enable_map;
u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
@@ -136,26 +151,22 @@ struct kbase_hwcnt_backend_jm {
static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
struct kbase_hwcnt_gpu_info *info)
{
- size_t clk;
+ size_t clk, l2_count, core_mask;
if (!kbdev || !info)
return -EINVAL;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
- info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
- info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
-#else /* CONFIG_MALI_NO_MALI */
- {
- const struct base_gpu_props *props = &kbdev->gpu_props.props;
- const size_t l2_count = props->l2_props.num_l2_slices;
- const size_t core_mask = props->coherency_info.group[0].core_mask;
+ l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+ l2_count = kbdev->gpu_props.num_l2_slices;
+ core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
+#endif
- info->l2_count = l2_count;
- info->core_mask = core_mask;
- info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
- }
-#endif /* CONFIG_MALI_NO_MALI */
+ info->l2_count = l2_count;
+ info->core_mask = core_mask;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
/* Determine the number of available clock domains. */
for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
@@ -353,9 +364,9 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_context *kctx;
struct kbase_device *kbdev;
- struct kbase_hwcnt_physical_enable_map phys_enable_map;
+ struct kbase_hwcnt_physical_enable_map phys_enable_map = { 0 };
enum kbase_hwcnt_physical_set phys_counter_set;
- struct kbase_instr_hwcnt_enable enable;
+ struct kbase_instr_hwcnt_enable enable = { 0 };
u64 timestamp_ns;
if (!backend_jm || !enable_map || backend_jm->enabled ||
@@ -371,18 +382,21 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
- enable.fe_bm = phys_enable_map.fe_bm;
- enable.shader_bm = phys_enable_map.shader_bm;
- enable.tiler_bm = phys_enable_map.tiler_bm;
- enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
- enable.counter_set = phys_counter_set;
+ enable = (struct kbase_instr_hwcnt_enable)
+ {
+ .fe_bm = phys_enable_map.fe_bm,
+ .shader_bm = phys_enable_map.shader_bm,
+ .tiler_bm = phys_enable_map.tiler_bm,
+ .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
+ .counter_set = phys_counter_set,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* The dummy model needs the CPU mapping. */
- enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va;
+ /* The dummy model needs the CPU mapping. */
+ .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
#else
- enable.dump_buffer = backend_jm->gpu_dump_va;
+ .dump_buffer = backend_jm->gpu_dump_va,
#endif /* CONFIG_MALI_NO_MALI */
- enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+ .dump_buffer_bytes = backend_jm->info->dump_bytes,
+ };
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
@@ -395,9 +409,24 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
if (errcode)
goto error;
+ backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
+ backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
+ backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask;
+
backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
backend_jm->enabled = true;
+ /* Enabling counters is an indication that the power may have previously been off for all
+ * blocks.
+ *
+ * In any case, the counters would not have been counting recently, so an 'off' block state
+ * is an approximation for this.
+ *
+ * This will be transferred to the dump only after a dump_wait(), or dump_disable() in
+ * cases where the caller requested such information. This is to handle when a
+ * dump_enable() happens in between dump_wait() and dump_get().
+ */
+ kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
@@ -430,12 +459,20 @@ static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backe
}
/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
- if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+ if (WARN_ON(!backend_jm ||
+ (dump_buffer && (backend_jm->info->metadata != dump_buffer->metadata)) ||
+ (enable_map && (backend_jm->info->metadata != enable_map->metadata)) ||
+ (dump_buffer && !enable_map)))
+ return;
+ /* No WARN needed here, but still return early if backend is already disabled */
+ if (!backend_jm->enabled)
return;
kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
@@ -443,6 +480,42 @@ static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *bac
errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
WARN_ON(errcode);
+ /* Disabling HWCNT is an indication that blocks have been powered off. This is important to
+ * know for L2 and Tiler blocks, as this is currently the only way a backend can know if
+ * they are being powered off.
+ *
+ * In any case, even if they weren't really powered off, we won't be counting whilst
+ * disabled.
+ *
+ * Update the block state information in the block state accumulator to show this, so that
+ * in the next dump blocks will have been seen as powered off for some of the time.
+ */
+ kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
+
+ if (dump_buffer) {
+ /* In some use-cases, the caller will need the information whilst the counters are
+ * disabled, but will not be able to call into the backend to dump them. Instead,
+ * they have an opportunity here to request them to be accumulated into their
+ * buffer immediately.
+ *
+ * This consists of taking a sample of the accumulated block state (as though a
+ * real dump_get() had happened), then transfer ownership of that to the caller
+ * (i.e. erasing our copy of it).
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
+ &backend_jm->accum_all_blk_stt);
+ kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map,
+ backend_jm->sampled_all_blk_stt);
+ /* Now the block state has been passed out into the caller's own accumulation
+ * buffer, clear our own accumulated and sampled block state - ownership has been
+ * transferred.
+ */
+ kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ }
+
backend_jm->enabled = false;
}
@@ -480,8 +553,7 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back
*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
continue;
@@ -514,12 +586,27 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back
/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
{
+ int errcode;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
- return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+ errcode = kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+ if (errcode)
+ return errcode;
+
+ /* Now that we've completed a sample, also sample+clear the accumulated block state.
+ *
+ * This is to ensure that a dump_enable() that happens in between dump_wait() and
+ * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block
+ * state is reported at the actual time that counters are being sampled.
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
+ &backend_jm->accum_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+
+ return errcode;
}
/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
@@ -533,8 +620,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
struct kbase_device *kbdev;
unsigned long flags;
- int errcode;
#endif /* CONFIG_MALI_NO_MALI */
+ int errcode;
if (!backend_jm || !dst || !dst_enable_map ||
(backend_jm->info->metadata != dst->metadata) ||
@@ -548,8 +635,7 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
/* Extract elapsed cycle count for each clock domain if enabled. */
- kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
@@ -572,9 +658,18 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
if (errcode)
return errcode;
#endif /* CONFIG_MALI_NO_MALI */
- return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
- backend_jm->pm_core_mask, &backend_jm->curr_config,
- accumulate);
+ errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
+ backend_jm->pm_core_mask, backend_jm->debug_core_mask,
+ backend_jm->max_core_mask, backend_jm->max_l2_slices,
+ &backend_jm->curr_config, accumulate);
+
+ if (errcode)
+ return errcode;
+
+ kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map,
+ backend_jm->sampled_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+ return errcode;
}
/**
@@ -705,6 +800,8 @@ static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_in
kbase_ccswe_init(&backend->ccswe_shader_cores);
backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+ kbase_hwcnt_block_state_set(&backend->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
*out_backend = backend;
return 0;
@@ -752,7 +849,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
if (!backend)
return;
- kbasep_hwcnt_backend_jm_dump_disable(backend);
+ kbasep_hwcnt_backend_jm_dump_disable(backend, NULL, NULL);
kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
}
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
index a8654ea..1b54151 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -268,9 +268,9 @@ kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interfac
if (!info)
return NULL;
- *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface,
- .dump_watchdog_iface =
- watchdog_iface };
+ *info = (struct kbase_hwcnt_backend_jm_watchdog_info){
+ .jm_backend_iface = backend_iface, .dump_watchdog_iface = watchdog_iface
+ };
return info;
}
@@ -443,7 +443,8 @@ static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common(
spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags);
} else
/*Reverting the job manager backend back to disabled*/
- wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend,
+ NULL, NULL);
}
return errcode;
@@ -472,7 +473,10 @@ kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend *
}
/* Job manager watchdog backend, implementation of dump_disable */
-static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend)
+static void
+kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *buf_enable_map)
{
struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend;
unsigned long flags;
@@ -497,7 +501,8 @@ static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_bac
wd_backend->info->dump_watchdog_iface->disable(
wd_backend->info->dump_watchdog_iface->timer);
- wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend);
+ wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend, dump_buffer,
+ buf_enable_map);
}
/* Job manager watchdog backend, implementation of dump_clear */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
index 34deb5d..8b1de2e 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c
@@ -292,7 +292,8 @@ static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, b
accum->accumulated = true;
disable:
- hctx->iface->dump_disable(accum->backend);
+ hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL,
+ &accum->enable_map);
/* Regardless of any errors during the accumulate, put the accumulator
* in the disabled state.
@@ -453,8 +454,20 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
*/
if ((state == ACCUM_STATE_ENABLED) && new_map) {
/* Backend is only enabled if there were any enabled counters */
- if (cur_map_any_enabled)
- hctx->iface->dump_disable(accum->backend);
+ if (cur_map_any_enabled) {
+ /* In this case we do *not* want to have the buffer updated with extra
+ * block state, it should instead remain in the backend until the next dump
+ * happens, hence supplying NULL as the dump_buffer parameter here.
+ *
+ * Attempting to take ownership of backend-accumulated block state at this
+ * point will instead give inaccurate information. For example the dump
+ * buffer for 'set_counters' operation might be dumping a period that
+ * should've been entirely in the 'ON' state, but would report it as
+ * partially in the 'OFF' state. Instead, that 'OFF' state should be
+ * reported in the _next_ dump.
+ */
+ hctx->iface->dump_disable(accum->backend, NULL, NULL);
+ }
/* (Re-)enable the backend if the new map has enabled counters.
* No need to acquire the spinlock, as concurrent enable while
@@ -481,9 +494,15 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 *
/* If we've not written anything into the dump buffer so far, it
* means there was nothing to write. Zero any enabled counters.
+ *
+ * In this state, the blocks are likely to be off (and at the very least, not
+ * counting), so write in the 'off' block state
*/
- if (!dump_written)
+ if (!dump_written) {
kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map);
+ kbase_hwcnt_dump_buffer_block_state_update(dump_buf, cur_map,
+ KBASE_HWCNT_STATE_OFF);
+ }
}
/* Write out timestamps */
@@ -498,8 +517,13 @@ error:
/* An error was only physically possible if the backend was enabled */
WARN_ON(state != ACCUM_STATE_ENABLED);
- /* Disable the backend, and transition to the error state */
- hctx->iface->dump_disable(accum->backend);
+ /* Disable the backend, and transition to the error state. In this case, we can try to save
+ * the block state into the accumulated buffer, but there's no guarantee we'll have one, so
+ * this is more of a 'best effort' for error cases. There would be an suitable block
+ * state recorded on the next dump_enable() anyway.
+ */
+ hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL,
+ cur_map);
spin_lock_irqsave(&hctx->state_lock, flags);
accum->state = ACCUM_STATE_ERROR;
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
index 74916da..5da5645 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,10 +19,11 @@
*
*/
+#include <mali_kbase.h>
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/err.h>
+#include <linux/log2.h>
/** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements
*/
@@ -32,78 +33,107 @@ enum enable_map_idx {
EM_COUNT,
};
-static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fe_block_type(enum kbase_hwcnt_set counter_set,
+ bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE;
case KBASE_HWCNT_SET_SECONDARY:
if (is_csf)
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
case KBASE_HWCNT_SET_TERTIARY:
if (is_csf)
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
default:
- WARN_ON(true);
+ WARN(true, "Invalid counter set for FE block type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED;
}
}
-static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+static enum kbase_hwcnt_gpu_v5_block_type
+kbasep_get_tiler_block_type(enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER;
case KBASE_HWCNT_SET_SECONDARY:
case KBASE_HWCNT_SET_TERTIARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
default:
- WARN_ON(true);
+ WARN(true, "Invalid counter set for tiler block type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED;
}
}
-static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf)
+static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_sc_block_type(enum kbase_hwcnt_set counter_set,
+ bool is_csf)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC;
case KBASE_HWCNT_SET_SECONDARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2;
case KBASE_HWCNT_SET_TERTIARY:
if (is_csf)
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3;
else
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
default:
- WARN_ON(true);
+ WARN(true, "Invalid counter set for shader core block type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED;
}
}
-static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set)
+
+static enum kbase_hwcnt_gpu_v5_block_type
+kbasep_get_memsys_block_type(enum kbase_hwcnt_set counter_set)
{
switch (counter_set) {
case KBASE_HWCNT_SET_PRIMARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS;
case KBASE_HWCNT_SET_SECONDARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2;
case KBASE_HWCNT_SET_TERTIARY:
- *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
- break;
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
default:
- WARN_ON(true);
+ WARN(true, "Invalid counter set for Memsys block type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED;
+ }
+}
+
+static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fw_block_type(enum kbase_hwcnt_set counter_set)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW;
+ case KBASE_HWCNT_SET_SECONDARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2;
+ case KBASE_HWCNT_SET_TERTIARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3;
+ default:
+ WARN(true, "Invalid counter set for FW type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED;
+ }
+}
+
+static enum kbase_hwcnt_gpu_v5_block_type
+kbasep_get_csg_block_type(enum kbase_hwcnt_set counter_set)
+{
+ switch (counter_set) {
+ case KBASE_HWCNT_SET_PRIMARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG;
+ case KBASE_HWCNT_SET_SECONDARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2;
+ case KBASE_HWCNT_SET_TERTIARY:
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3;
+ default:
+ WARN(true, "Invalid counter set for CSG type: %d", counter_set);
+ return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED;
}
}
@@ -124,49 +154,89 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
const struct kbase_hwcnt_metadata **metadata)
{
struct kbase_hwcnt_description desc;
- struct kbase_hwcnt_group_description group;
- struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
- size_t non_sc_block_count;
+ struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT] = {};
+ size_t non_core_block_count;
+ size_t core_block_count;
size_t sc_block_count;
+ size_t blk_idx = 0;
- WARN_ON(!gpu_info);
- WARN_ON(!metadata);
+ if (WARN_ON(!gpu_info))
+ return -EINVAL;
- /* Calculate number of block instances that aren't shader cores */
- non_sc_block_count = 2 + gpu_info->l2_count;
+ if (WARN_ON(!metadata))
+ return -EINVAL;
+
+ /* Calculate number of block instances that aren't cores */
+ non_core_block_count = 2 + gpu_info->l2_count;
/* Calculate number of block instances that are shader cores */
- sc_block_count = fls64(gpu_info->core_mask);
+ sc_block_count = (size_t)fls64(gpu_info->core_mask);
+ /* Determine the total number of cores */
+ core_block_count = sc_block_count;
+
+
+ if (gpu_info->has_fw_counters)
+ non_core_block_count += 1 + gpu_info->csg_cnt;
/*
- * A system can have up to 64 shader cores, but the 64-bit
- * availability mask can't physically represent that many cores as well
- * as the other hardware blocks.
- * Error out if there are more blocks than our implementation can
+ * Check we have enough bits to represent the number of cores that
+ * exist in the system. Error-out if there are more blocks than our implementation can
* support.
*/
- if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
+ if ((core_block_count + non_core_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS)
return -EINVAL;
+ /* The dump starts with, on supporting systems, the FW blocks, and as such,
+ * they should be taken into account first.
+ */
+ if (gpu_info->has_fw_counters) {
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_fw_block_type(counter_set),
+ .inst_cnt = 1,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block -
+ KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
+ }
+
+ /* Some systems may support FW counters but not CSG counters, so the
+ * two are handled differently.
+ */
+ if (gpu_info->csg_cnt > 0) {
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_csg_block_type(counter_set),
+ .inst_cnt = gpu_info->csg_cnt,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block -
+ KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
+ }
+
/* One Front End block */
- kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf);
- blks[0].inst_cnt = 1;
- blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_fe_block_type(counter_set, is_csf),
+ .inst_cnt = 1,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
/* One Tiler block */
- kbasep_get_tiler_block_type(&blks[1].type, counter_set);
- blks[1].inst_cnt = 1;
- blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_tiler_block_type(counter_set),
+ .inst_cnt = 1,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
/* l2_count memsys blks */
- kbasep_get_memsys_block_type(&blks[2].type, counter_set);
- blks[2].inst_cnt = gpu_info->l2_count;
- blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_memsys_block_type(counter_set),
+ .inst_cnt = gpu_info->l2_count,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
/*
- * There are as many shader cores in the system as there are bits set in
+ * There are as many cores in the system as there are bits set in
* the core mask. However, the dump buffer memory requirements need to
* take into account the fact that the core mask may be non-contiguous.
*
@@ -179,27 +249,36 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
*
* We find the core mask's last set bit to determine the memory
* requirements, and embed the core mask into the availability mask so
- * we can determine later which shader cores physically exist.
+ * we can determine later which cores physically exist.
*/
- kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf);
- blks[3].inst_cnt = sc_block_count;
- blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
- blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK;
+ blks[blk_idx++] = (struct kbase_hwcnt_block_description){
+ .type = kbasep_get_sc_block_type(counter_set, is_csf),
+ .inst_cnt = sc_block_count,
+ .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
+ };
+
- WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4);
+ /* Currently, we're only handling a maximum of seven blocks, and this needs
+ * to be changed whenever the number of blocks increases
+ */
+ BUILD_BUG_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 7);
- group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5;
- group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT;
- group.blks = blks;
+ /* After assembling the block list in the code above, we should not end up with more
+ * elements than KBASE_HWCNT_V5_BLOCK_TYPE_COUNT.
+ */
+ WARN_ON(blk_idx > KBASE_HWCNT_V5_BLOCK_TYPE_COUNT);
- desc.grp_cnt = 1;
- desc.grps = &group;
+ desc.blk_cnt = blk_idx;
+ desc.blks = blks;
desc.clk_cnt = gpu_info->clk_cnt;
/* The JM, Tiler, and L2s are always available, and are before cores */
- desc.avail_mask = (1ull << non_sc_block_count) - 1;
- /* Embed the core mask directly in the availability mask */
- desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count);
+ kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0);
+ kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX);
+ kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count,
+ gpu_info->core_mask);
+
return kbase_hwcnt_metadata_create(&desc, metadata);
}
@@ -215,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in
{
WARN_ON(!gpu_info);
- return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) *
+ return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) *
gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
}
@@ -248,7 +327,10 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info,
* metadata since physical HW uses 32-bit per value but metadata
* specifies 64-bit per value.
*/
- WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes);
+ if (WARN(dump_bytes * 2 != metadata->dump_buf_bytes,
+ "Dump buffer size expected to be %zu, instead is %zu", dump_bytes * 2,
+ metadata->dump_buf_bytes))
+ return -EINVAL;
*out_metadata = metadata;
*out_dump_bytes = dump_bytes;
@@ -291,72 +373,76 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat
kbase_hwcnt_metadata_destroy(metadata);
}
-static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk)
+bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
{
- bool is_shader = false;
-
- /* Warn on unknown group type */
- if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
- return false;
-
if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED)
- is_shader = true;
+ return true;
- return is_shader;
+ return false;
}
-static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type)
+bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
{
- bool is_l2_cache = false;
-
- switch (grp_type) {
- case KBASE_HWCNT_GPU_GROUP_TYPE_V5:
- if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
- blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
- blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
- is_l2_cache = true;
- break;
- default:
- /* Warn on unknown group type */
- WARN_ON(true);
- }
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED)
+ return true;
+
+ return false;
+}
+
+bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
+{
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED)
+ return true;
+
+ return false;
+}
- return is_l2_cache;
+bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
+{
+ if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3 ||
+ blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED)
+ return true;
+
+ return false;
}
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask,
+ u64 debug_core_mask, u64 max_core_mask, size_t max_l2_slices,
const struct kbase_hwcnt_curr_config *curr_config, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
const u64 *dump_src = src;
size_t src_offset = 0;
u64 core_mask = pm_core_mask;
+ u64 shader_present = curr_config->shader_present;
/* Variables to deal with the current configuration */
- int l2_count = 0;
+ size_t l2_count = 0;
if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
- const size_t ctr_cnt =
- kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
- const bool is_shader_core = is_block_type_shader(
- kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk);
- const bool is_l2_cache = is_block_type_l2_cache(
- kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
- const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
- kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk);
+ const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk);
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk);
+ const bool is_shader_core = kbase_hwcnt_is_block_type_shader(blk_type);
+ const bool is_l2_cache = kbase_hwcnt_is_block_type_memsys(blk_type);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type);
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
bool hw_res_available = true;
/*
@@ -383,45 +469,107 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/*
* Skip block if no values in the destination block are enabled.
*/
- if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
- u64 *dst_blk =
- kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
- bool blk_powered;
+ bool blk_valid = (!is_undefined && hw_res_available);
+
+ if (blk_valid) {
+ bool blk_powered;
+ blk_stt_t current_block_state = 0;
+
+ if (!is_shader_core) {
+ /* The L2 block must be available at this point, or handled
+ * differently below.
+ * Every partition must have a FE and a tiler, so they
+ * must be implicitly available as part of the current
+ * configuration.
+ */
+ blk_powered = true;
+ current_block_state |= KBASE_HWCNT_STATE_AVAILABLE;
+ } else {
+ /* Check the PM core mask to see if the shader core is
+ * powered up.
+ */
+ blk_powered = core_mask & 1;
+
+ /* Set availability bits based on whether the core is
+ * present in both the shader_present AND the core
+ * mask in sysFS. The core masks are shifted to the
+ * right at the end of the loop so always check the
+ * rightmost bit.
+ */
+ if ((shader_present & debug_core_mask) & 0x1)
+ current_block_state |= KBASE_HWCNT_STATE_AVAILABLE;
+ else {
+ /* If this branch is taken, the shader core may
+ * be:
+ * * in the max configuration, but not enabled
+ * through the sysFS core mask
+ * * in the max configuration, but not in the
+ * current configuration
+ * * physically not present
+ */
+ current_block_state |=
+ KBASE_HWCNT_STATE_UNAVAILABLE;
+ }
+ }
- if (!is_shader_core) {
- /* Under the current PM system, counters will
- * only be enabled after all non shader core
- * blocks are powered up.
- */
- blk_powered = true;
- } else {
- /* Check the PM core mask to see if the shader
- * core is powered up.
+ /* Note: KBASE_HWCNT_STATE_OFF for non-shader cores (L2, Tiler, JM)
+ * is handled on this backend's dump_disable function (since
+ * they are considered to always be powered here).
*/
- blk_powered = core_mask & 1;
- }
+ current_block_state |= (blk_powered) ? KBASE_HWCNT_STATE_ON :
+ KBASE_HWCNT_STATE_OFF;
- if (blk_powered && !is_undefined && hw_res_available) {
- /* Only powered and defined blocks have valid data. */
if (accumulate) {
- kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
- hdr_cnt, ctr_cnt);
+ /* Only update existing counter values if block was powered
+ * and valid
+ */
+ if (blk_powered)
+ kbase_hwcnt_dump_buffer_block_accumulate(
+ dst_blk, src_blk, hdr_cnt, ctr_cnt);
+
+ kbase_hwcnt_block_state_append(dst_blk_stt,
+ current_block_state);
} else {
- kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
- (hdr_cnt + ctr_cnt));
+ if (blk_powered) {
+ kbase_hwcnt_dump_buffer_block_copy(
+ dst_blk, src_blk, (hdr_cnt + ctr_cnt));
+ } else {
+ /* src is garbage, so zero the dst */
+ kbase_hwcnt_dump_buffer_block_zero(
+ dst_blk, (hdr_cnt + ctr_cnt));
+ }
+
+ kbase_hwcnt_block_state_set(dst_blk_stt,
+ current_block_state);
+ }
+ } else if (is_l2_cache && !is_undefined) {
+ /* Defined L2 can only reach here when the partition does not
+ * own it. Check that the L2 count is within the resource
+ * group or whole GPU's max L2 count, and if so,
+ * mark it as unavailable.
+ */
+ if (l2_count <= max_l2_slices) {
+ kbase_hwcnt_block_state_set(
+ dst_blk_stt, KBASE_HWCNT_STATE_OFF |
+ KBASE_HWCNT_STATE_UNAVAILABLE);
}
+ kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt));
} else {
- /* Even though the block might be undefined, the
- * user has enabled counter collection for it.
- * We should not propagate garbage data.
+ /* Even though the block is undefined, the user has
+ * enabled counter collection for it. We should not propagate
+ * garbage data, or copy/accumulate the block states.
*/
if (accumulate) {
/* No-op to preserve existing values */
} else {
- /* src is garbage, so zero the dst */
+ /* src is garbage, so zero the dst and reset block state */
kbase_hwcnt_dump_buffer_block_zero(dst_blk,
(hdr_cnt + ctr_cnt));
+ kbase_hwcnt_block_state_set(dst_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
}
}
}
@@ -429,66 +577,79 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
/* Just increase the src_offset if the HW is available */
if (hw_res_available)
src_offset += (hdr_cnt + ctr_cnt);
- if (is_shader_core)
- core_mask = core_mask >> 1;
+ if (is_shader_core) {
+ /* Shift each core mask right by 1 */
+ core_mask >>= 1;
+ debug_core_mask >>= 1;
+ max_core_mask >>= 1;
+ shader_present >>= 1;
+ }
}
return 0;
}
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate)
+ blk_stt_t *src_block_stt,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u64 *dump_src = src;
size_t src_offset = 0;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
+ size_t blk_inst_count = 0;
- if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata))
+ if (!dst || !src || !src_block_stt || !dst_enable_map ||
+ (dst_enable_map->metadata != dst->metadata))
return -EINVAL;
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
- const size_t ctr_cnt =
- kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
- const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
- const bool is_undefined = kbase_hwcnt_is_block_type_undefined(
- kbase_hwcnt_metadata_group_type(metadata, grp), blk_type);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk);
+ const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk);
+ const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, blk);
+ const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type);
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
/*
* Skip block if no values in the destination block are enabled.
*/
- if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) {
- u64 *dst_blk =
- kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
if (!is_undefined) {
if (accumulate) {
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk,
hdr_cnt, ctr_cnt);
+ kbase_hwcnt_block_state_append(
+ dst_blk_stt, src_block_stt[blk_inst_count]);
} else {
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk,
(hdr_cnt + ctr_cnt));
+ kbase_hwcnt_block_state_set(dst_blk_stt,
+ src_block_stt[blk_inst_count]);
}
} else {
- /* Even though the block might be undefined, the
- * user has enabled counter collection for it.
- * We should not propagate garbage data.
+ /* Even though the block might be undefined, the user has enabled
+ * counter collection for it. We should not propagate garbage
+ * data, or copy/accumulate the block states.
*/
if (accumulate) {
/* No-op to preserve existing values */
} else {
- /* src is garbage, so zero the dst */
+ /* src is garbage, so zero the dst and reset block state */
kbase_hwcnt_dump_buffer_block_zero(dst_blk,
(hdr_cnt + ctr_cnt));
+ kbase_hwcnt_block_state_set(dst_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
}
}
}
-
+ blk_inst_count++;
src_offset += (hdr_cnt + ctr_cnt);
}
@@ -541,58 +702,79 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m
u64 shader_bm[EM_COUNT] = { 0 };
u64 tiler_bm[EM_COUNT] = { 0 };
u64 mmu_l2_bm[EM_COUNT] = { 0 };
- size_t grp, blk, blk_inst;
+ u64 fw_bm[EM_COUNT] = { 0 };
+ u64 csg_bm[EM_COUNT] = { 0 };
+ size_t blk, blk_inst;
if (WARN_ON(!src) || WARN_ON(!dst))
return;
metadata = src->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
- const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst);
-
- if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
- const size_t map_stride =
- kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
- size_t map_idx;
-
- for (map_idx = 0; map_idx < map_stride; ++map_idx) {
- if (WARN_ON(map_idx >= EM_COUNT))
- break;
-
- switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
- /* Nothing to do in this case. */
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
- fe_bm[map_idx] |= blk_map[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
- tiler_bm[map_idx] |= blk_map[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
- shader_bm[map_idx] |= blk_map[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
- mmu_l2_bm[map_idx] |= blk_map[map_idx];
- break;
- default:
- WARN_ON(true);
- }
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk);
+ const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, blk, blk_inst);
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk);
+ size_t map_idx;
+
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
+
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED:
+ /* Nothing to do in this case. */
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ fe_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ tiler_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ shader_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ mmu_l2_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3:
+ fw_bm[map_idx] |= blk_map[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3:
+ csg_bm[map_idx] |= blk_map[map_idx];
+ break;
+ default:
+ WARN(true, "Unknown block type %llu", blk_type);
}
- } else {
- WARN_ON(true);
}
}
@@ -603,6 +785,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m
kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]);
dst->mmu_l2_bm =
kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]);
+ dst->fw_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fw_bm[EM_LO], fw_bm[EM_HI]);
+ dst->csg_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(csg_bm[EM_LO], csg_bm[EM_HI]);
}
void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src)
@@ -625,72 +809,102 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kb
void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst,
const struct kbase_hwcnt_physical_enable_map *src)
{
- const struct kbase_hwcnt_metadata *metadata;
+ struct kbase_hwcnt_enable_cm cm = {};
- u64 fe_bm[EM_COUNT] = { 0 };
- u64 shader_bm[EM_COUNT] = { 0 };
- u64 tiler_bm[EM_COUNT] = { 0 };
- u64 mmu_l2_bm[EM_COUNT] = { 0 };
- size_t grp, blk, blk_inst;
+ if (WARN_ON(!src) || WARN_ON(!dst))
+ return;
+
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &cm.fe_bm[EM_LO],
+ &cm.fe_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &cm.shader_bm[EM_LO],
+ &cm.shader_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &cm.tiler_bm[EM_LO],
+ &cm.tiler_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &cm.mmu_l2_bm[EM_LO],
+ &cm.mmu_l2_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fw_bm, &cm.fw_bm[EM_LO],
+ &cm.fw_bm[EM_HI]);
+ kbasep_hwcnt_backend_gpu_block_map_from_physical(src->csg_bm, &cm.csg_bm[EM_LO],
+ &cm.csg_bm[EM_HI]);
+
+ kbase_hwcnt_gpu_enable_map_from_cm(dst, &cm);
+}
+
+void kbase_hwcnt_gpu_enable_map_from_cm(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_cm *src)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t blk, blk_inst;
if (WARN_ON(!src) || WARN_ON(!dst))
return;
metadata = dst->metadata;
- kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]);
- kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO],
- &shader_bm[EM_HI]);
- kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO],
- &tiler_bm[EM_HI]);
- kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO],
- &mmu_l2_bm[EM_HI]);
-
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
- const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk);
- u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
-
- if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
- const size_t map_stride =
- kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
- size_t map_idx;
-
- for (map_idx = 0; map_idx < map_stride; ++map_idx) {
- if (WARN_ON(map_idx >= EM_COUNT))
- break;
-
- switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
- /* Nothing to do in this case. */
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
- blk_map[map_idx] = fe_bm[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
- blk_map[map_idx] = tiler_bm[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
- blk_map[map_idx] = shader_bm[map_idx];
- break;
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
- case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
- blk_map[map_idx] = mmu_l2_bm[map_idx];
- break;
- default:
- WARN_ON(true);
- }
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk);
+ u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst);
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk);
+ size_t map_idx;
+
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
+
+ switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) {
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED:
+ /* Nothing to do in this case. */
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
+ blk_map[map_idx] = src->fe_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
+ blk_map[map_idx] = src->tiler_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
+ blk_map[map_idx] = src->shader_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
+ blk_map[map_idx] = src->mmu_l2_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3:
+ blk_map[map_idx] = src->fw_bm[map_idx];
+ break;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2:
+ fallthrough;
+ case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3:
+ blk_map[map_idx] = src->csg_bm[map_idx];
+ break;
+ default:
+ WARN(true, "Invalid block type %llu", blk_type);
}
- } else {
- WARN_ON(true);
}
}
}
@@ -699,40 +913,34 @@ void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata))
return;
metadata = buf->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp);
- u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, blk, blk_inst);
const u64 *blk_map =
- kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+ kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst);
- if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) {
- const size_t map_stride =
- kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk);
- u64 prfcnt_bm[EM_COUNT] = { 0 };
- u32 prfcnt_en = 0;
- size_t map_idx;
+ const size_t map_stride =
+ kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk);
+ u64 prfcnt_bm[EM_COUNT] = { 0 };
+ u32 prfcnt_en = 0;
+ size_t map_idx;
- for (map_idx = 0; map_idx < map_stride; ++map_idx) {
- if (WARN_ON(map_idx >= EM_COUNT))
- break;
+ for (map_idx = 0; map_idx < map_stride; ++map_idx) {
+ if (WARN_ON(map_idx >= EM_COUNT))
+ break;
- prfcnt_bm[map_idx] = blk_map[map_idx];
- }
+ prfcnt_bm[map_idx] = blk_map[map_idx];
+ }
- prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
- prfcnt_bm[EM_HI]);
+ prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO],
+ prfcnt_bm[EM_HI]);
- buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
- } else {
- WARN_ON(true);
- }
+ buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en;
}
}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
index a49c31e..4339fdd 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#ifndef _KBASE_HWCNT_GPU_H_
#define _KBASE_HWCNT_GPU_H_
+#include "hwcnt/mali_kbase_hwcnt_types.h"
+
#include <linux/bug.h>
#include <linux/types.h>
@@ -31,10 +33,10 @@ struct kbase_hwcnt_enable_map;
struct kbase_hwcnt_dump_buffer;
/* Hardware counter version 5 definitions, V5 is the only supported version. */
-#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4
+#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 7
#define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4
#define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60
-#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
+#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \
(KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK)
/* FrontEnd block count in V5 GPU hardware counter. */
@@ -49,15 +51,6 @@ struct kbase_hwcnt_dump_buffer;
#define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32))
/**
- * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to
- * identify metadata groups.
- * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type.
- */
-enum kbase_hwcnt_gpu_group_type {
- KBASE_HWCNT_GPU_GROUP_TYPE_V5,
-};
-
-/**
* enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types,
* used to identify metadata blocks.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: Front End block (Job manager
@@ -79,6 +72,14 @@ enum kbase_hwcnt_gpu_group_type {
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block.
* @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: FW block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: Secondary FW block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: Tertiary FW block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: Undefined FW block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: CSG block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: Secondary CSG block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: Tertiary CSG block.
+ * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: Undefined CSG block.
*/
enum kbase_hwcnt_gpu_v5_block_type {
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE,
@@ -94,6 +95,14 @@ enum kbase_hwcnt_gpu_v5_block_type {
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2,
KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3,
+ KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED,
};
/**
@@ -117,12 +126,34 @@ enum kbase_hwcnt_set {
* @shader_bm: Shader counters selection bitmask.
* @tiler_bm: Tiler counters selection bitmask.
* @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ * @fw_bm: CSF firmware counters selection bitmask.
+ * @csg_bm: CSF CSG counters selection bitmask.
*/
struct kbase_hwcnt_physical_enable_map {
u32 fe_bm;
u32 shader_bm;
u32 tiler_bm;
u32 mmu_l2_bm;
+ u32 fw_bm;
+ u32 csg_bm;
+};
+
+/**
+ * struct kbase_hwcnt_enable_cm - 128-bit enable counter masks.
+ * @fe_bm: Front end (JM/CSHW) counters selection bitmask.
+ * @shader_bm: Shader counters selection bitmask.
+ * @tiler_bm: Tiler counters selection bitmask.
+ * @mmu_l2_bm: MMU_L2 counters selection bitmask.
+ * @fw_bm: CSF firmware counters selection bitmask.
+ * @csg_bm: CSF CSG counters selection bitmask.
+ */
+struct kbase_hwcnt_enable_cm {
+ u64 fe_bm[2];
+ u64 shader_bm[2];
+ u64 tiler_bm[2];
+ u64 mmu_l2_bm[2];
+ u64 fw_bm[2];
+ u64 csg_bm[2];
};
/*
@@ -140,14 +171,18 @@ enum kbase_hwcnt_physical_set {
* @l2_count: L2 cache count.
* @core_mask: Shader core mask. May be sparse.
* @clk_cnt: Number of clock domains available.
+ * @csg_cnt: Number of CSGs available.
* @prfcnt_values_per_block: Total entries (header + counters) of performance
* counter per block.
+ * @has_fw_counters: Whether the GPU has FW counters available.
*/
struct kbase_hwcnt_gpu_info {
size_t l2_count;
u64 core_mask;
u8 clk_cnt;
+ u8 csg_cnt;
size_t prfcnt_values_per_block;
+ bool has_fw_counters;
};
/**
@@ -197,18 +232,12 @@ struct kbase_hwcnt_curr_config {
/**
* kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined.
*
- * @grp_type: Hardware counter group type.
* @blk_type: Hardware counter block type.
*
* Return: true if the block type is undefined, else false.
*/
-static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type,
- const uint64_t blk_type)
+static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t blk_type)
{
- /* Warn on unknown group type */
- if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5))
- return false;
-
return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED ||
blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED ||
@@ -264,16 +293,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat
* kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw
* dump buffer in src into the dump buffer
* abstraction in dst.
- * @dst: Non-NULL pointer to destination dump buffer.
- * @src: Non-NULL pointer to source raw dump buffer, of same length
- * as dump_buf_bytes in the metadata of destination dump
- * buffer.
- * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- * @pm_core_mask: PM state synchronized shaders core mask with the dump.
- * @curr_config: Current allocated hardware resources to correctly map the
- * source raw dump buffer to the destination dump buffer.
- * @accumulate: True if counters in source should be accumulated into
- * destination, rather than copied.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of destination dump
+ * buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @pm_core_mask: PM state synchronized shaders core mask with the dump.
+ * @debug_core_mask: User-set mask of cores to be used by the GPU.
+ * @max_core_mask: Core mask of all cores allocated to the GPU (non
+ * virtualized platforms) or resource group (virtualized
+ * platforms).
+ * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non
+ * virtualised platforms) or resource group (virtualized
+ * platforms).
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
+ * @accumulate: True if counters in source should be accumulated into
+ * destination, rather than copied.
*
* The dst and dst_enable_map MUST have been created from the same metadata as
* returned from the call to kbase_hwcnt_jm_metadata_create as was used to get
@@ -283,19 +319,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat
*/
int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
const struct kbase_hwcnt_enable_map *dst_enable_map,
- const u64 pm_core_mask,
- const struct kbase_hwcnt_curr_config *curr_config, bool accumulate);
+ const u64 pm_core_mask, u64 debug_core_mask, u64 max_core_mask,
+ size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config,
+ bool accumulate);
/**
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
* dump buffer in src into the dump buffer
* abstraction in dst.
- * @dst: Non-NULL pointer to destination dump buffer.
- * @src: Non-NULL pointer to source raw dump buffer, of same length
- * as dump_buf_bytes in the metadata of dst dump buffer.
- * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- * @accumulate: True if counters in src should be accumulated into
- * destination, rather than copied.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of dst dump buffer.
+ * @src_block_stt: Non-NULL pointer to source block state buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @accumulate: True if counters in src should be accumulated into
+ * destination, rather than copied.
*
* The dst and dst_enable_map MUST have been created from the same metadata as
* returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
@@ -304,7 +344,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* Return: 0 on success, else error code.
*/
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate);
+ blk_stt_t *src_block_stt,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate);
/**
* kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -404,4 +446,23 @@ void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst
void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf,
const struct kbase_hwcnt_enable_map *enable_map);
+bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
+bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
+bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
+bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+/**
+ * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from
+ * counter selection bitmasks.
+ * @dst: Non-NULL pointer to destination enable map abstraction.
+ * @src: Non-NULL pointer to source counter selection bitmasks.
+ *
+ * The dst must have been created from a metadata returned from a call to
+ * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create.
+ */
+void kbase_hwcnt_gpu_enable_map_from_cm(struct kbase_hwcnt_enable_map *dst,
+ const struct kbase_hwcnt_enable_cm *src);
+
#endif /* _KBASE_HWCNT_GPU_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
deleted file mode 100644
index 0cf2f94..0000000
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c
+++ /dev/null
@@ -1,298 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
-/*
- *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h"
-
-#include <linux/bug.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
- const struct kbase_hwcnt_metadata *src_md)
-{
- struct kbase_hwcnt_description desc;
- struct kbase_hwcnt_group_description group;
- struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT];
- size_t prfcnt_values_per_block;
- size_t blk;
- int err;
- struct kbase_hwcnt_metadata_narrow *metadata_narrow;
-
- if (!dst_md_narrow || !src_md || !src_md->grp_metadata ||
- !src_md->grp_metadata[0].blk_metadata)
- return -EINVAL;
-
- /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block
- * count in the metadata.
- */
- if ((kbase_hwcnt_metadata_group_count(src_md) != 1) ||
- (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT))
- return -EINVAL;
-
- /* Get the values count in the first block. */
- prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0);
-
- /* check all blocks should have same values count. */
- for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk);
- if (val_cnt != prfcnt_values_per_block)
- return -EINVAL;
- }
-
- /* Only support 64 and 128 entries per block. */
- if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128))
- return -EINVAL;
-
- metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL);
- if (!metadata_narrow)
- return -ENOMEM;
-
- /* Narrow to 64 entries per block to keep API backward compatibility. */
- prfcnt_values_per_block = 64;
-
- for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) {
- size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk);
- blks[blk] = (struct kbase_hwcnt_block_description){
- .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk),
- .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk),
- .hdr_cnt = blk_hdr_cnt,
- .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt,
- };
- }
-
- group = (struct kbase_hwcnt_group_description){
- .type = kbase_hwcnt_metadata_group_type(src_md, 0),
- .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT,
- .blks = blks,
- };
-
- desc = (struct kbase_hwcnt_description){
- .grp_cnt = kbase_hwcnt_metadata_group_count(src_md),
- .avail_mask = src_md->avail_mask,
- .clk_cnt = src_md->clk_cnt,
- .grps = &group,
- };
-
- err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata);
- if (!err) {
- /* Narrow down the buffer size to half as the narrowed metadata
- * only supports 32-bit but the created metadata uses 64-bit for
- * block entry.
- */
- metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1;
- *dst_md_narrow = metadata_narrow;
- } else {
- kfree(metadata_narrow);
- }
-
- return err;
-}
-
-void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow)
-{
- if (!md_narrow)
- return;
-
- kbase_hwcnt_metadata_destroy(md_narrow->metadata);
- kfree(md_narrow);
-}
-
-int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf)
-{
- size_t dump_buf_bytes;
- size_t clk_cnt_buf_bytes;
- u8 *buf;
-
- if (!md_narrow || !dump_buf)
- return -EINVAL;
-
- dump_buf_bytes = md_narrow->dump_buf_bytes;
- clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
-
- /* Make a single allocation for both dump_buf and clk_cnt_buf. */
- buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
-
- *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){
- .md_narrow = md_narrow,
- .dump_buf = (u32 *)buf,
- .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes),
- };
-
- return 0;
-}
-
-void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow)
-{
- if (!dump_buf_narrow)
- return;
-
- kfree(dump_buf_narrow->dump_buf);
- *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL,
- .dump_buf = NULL,
- .clk_cnt_buf = NULL };
-}
-
-int kbase_hwcnt_dump_buffer_narrow_array_alloc(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
- struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
-{
- struct kbase_hwcnt_dump_buffer_narrow *buffers;
- size_t buf_idx;
- unsigned int order;
- unsigned long addr;
- size_t dump_buf_bytes;
- size_t clk_cnt_buf_bytes;
- size_t total_dump_buf_size;
-
- if (!md_narrow || !dump_bufs)
- return -EINVAL;
-
- dump_buf_bytes = md_narrow->dump_buf_bytes;
- clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt;
-
- /* Allocate memory for the dump buffer struct array */
- buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
- if (!buffers)
- return -ENOMEM;
-
- /* Allocate pages for the actual dump buffers, as they tend to be fairly
- * large.
- */
- order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
- addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-
- if (!addr) {
- kfree(buffers);
- return -ENOMEM;
- }
-
- *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){
- .page_addr = addr,
- .page_order = order,
- .buf_cnt = n,
- .bufs = buffers,
- };
-
- total_dump_buf_size = dump_buf_bytes * n;
- /* Set the buffer of each dump buf */
- for (buf_idx = 0; buf_idx < n; buf_idx++) {
- const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
- const size_t clk_cnt_buf_offset =
- total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx);
-
- buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){
- .md_narrow = md_narrow,
- .dump_buf = (u32 *)(addr + dump_buf_offset),
- .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset),
- };
- }
-
- return 0;
-}
-
-void kbase_hwcnt_dump_buffer_narrow_array_free(
- struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs)
-{
- if (!dump_bufs)
- return;
-
- kfree(dump_bufs->bufs);
- free_pages(dump_bufs->page_addr, dump_bufs->page_order);
- memset(dump_bufs, 0, sizeof(*dump_bufs));
-}
-
-void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
- const u64 *blk_em, size_t val_cnt)
-{
- size_t val;
-
- for (val = 0; val < val_cnt; val++) {
- bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val);
- u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val];
-
- dst_blk[val] = val_enabled ? src_val : 0;
- }
-}
-
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map)
-{
- const struct kbase_hwcnt_metadata_narrow *metadata_narrow;
- size_t grp;
- size_t clk;
-
- if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) ||
- WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) ||
- WARN_ON(src->metadata->grp_cnt != 1) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
- src->metadata->grp_metadata[0].blk_cnt) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt !=
- KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) ||
- WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt >
- src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt))
- return;
-
- /* Don't use src metadata since src buffer is bigger than dst buffer. */
- metadata_narrow = dst_narrow->md_narrow;
-
- for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) {
- size_t blk;
- size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp);
-
- for (blk = 0; blk < blk_cnt; blk++) {
- size_t blk_inst;
- size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count(
- metadata_narrow, grp, blk);
-
- for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) {
- /* The narrowed down buffer is only 32-bit. */
- u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance(
- dst_narrow, grp, blk, blk_inst);
- const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(
- src, grp, blk, blk_inst);
- const u64 *blk_em = kbase_hwcnt_enable_map_block_instance(
- dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count(
- metadata_narrow, grp, blk);
- /* Align upwards to include padding bytes */
- val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
- val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT /
- KBASE_HWCNT_VALUE_BYTES));
-
- kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk,
- blk_em, val_cnt);
- }
- }
- }
-
- for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) {
- bool clk_enabled =
- kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
-
- dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0;
- }
-}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
deleted file mode 100644
index afd236d..0000000
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h
+++ /dev/null
@@ -1,330 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_HWCNT_GPU_NARROW_H_
-#define _KBASE_HWCNT_GPU_NARROW_H_
-
-#include "hwcnt/mali_kbase_hwcnt_types.h"
-#include <linux/types.h>
-
-struct kbase_device;
-struct kbase_hwcnt_metadata;
-struct kbase_hwcnt_enable_map;
-struct kbase_hwcnt_dump_buffer;
-
-/**
- * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical
- * layout of narrow dump buffers.
- * For backward compatibility, the narrow
- * metadata only supports 64 counters per
- * block and 32-bit per block entry.
- * @metadata: Non-NULL pointer to the metadata before narrow down to
- * 32-bit per block entry, it has 64 counters per block and
- * 64-bit per value.
- * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block
- * entry.
- */
-struct kbase_hwcnt_metadata_narrow {
- const struct kbase_hwcnt_metadata *metadata;
- size_t dump_buf_bytes;
-};
-
-/**
- * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer.
- * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to
- * describe the layout of the narrow dump buffer.
- * @dump_buf: Non-NULL pointer to an array of u32 values, the array size
- * is md_narrow->dump_buf_bytes.
- * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
- * for each clock domain.
- */
-struct kbase_hwcnt_dump_buffer_narrow {
- const struct kbase_hwcnt_metadata_narrow *md_narrow;
- u32 *dump_buf;
- u64 *clk_cnt_buf;
-};
-
-/**
- * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump
- * buffer array.
- * @page_addr: Address of first allocated page. A single allocation is used for
- * all narrow dump buffers in the array.
- * @page_order: The allocation order of the pages, the order is on a logarithmic
- * scale.
- * @buf_cnt: The number of allocated dump buffers.
- * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors.
- */
-struct kbase_hwcnt_dump_buffer_narrow_array {
- unsigned long page_addr;
- unsigned int page_order;
- size_t buf_cnt;
- struct kbase_hwcnt_dump_buffer_narrow *bufs;
-};
-
-/**
- * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from
- * narrow metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- *
- * Return: Number of hardware counter groups described by narrow metadata.
- */
-static inline size_t
-kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow)
-{
- return kbase_hwcnt_metadata_group_count(md_narrow->metadata);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group
- * from narrow metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- *
- * Return: Type of the group grp.
- */
-static inline u64
-kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- size_t grp)
-{
- return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a
- * group from narrow metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- *
- * Return: Number of blocks in group grp.
- */
-static inline size_t
-kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- size_t grp)
-{
- return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of
- * instances of a block
- * from narrow metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- * @blk: Index of the block in the group.
- *
- * Return: Number of instances of block blk in group grp.
- */
-static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
-{
- return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter
- * headers from narrow
- * metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- * @blk: Index of the block in the group.
- *
- * Return: Number of counter headers in each instance of block blk in group grp.
- */
-static inline size_t
-kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- size_t grp, size_t blk)
-{
- return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of
- * counters from narrow
- * metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- * @blk: Index of the block in the group.
- *
- * Return: Number of counters in each instance of block blk in group grp.
- */
-static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk)
-{
- return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk);
-}
-
-/**
- * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values
- * from narrow metadata.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @grp: Index of the group in the narrow metadata.
- * @blk: Index of the block in the group.
- *
- * Return: Number of headers plus counters in each instance of block blk
- * in group grp.
- */
-static inline size_t
-kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- size_t grp, size_t blk)
-{
- return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) +
- kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk);
-}
-
-/**
- * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a
- * narrowed block instance's
- * dump buffer.
- * @buf: Non-NULL pointer to narrow dump buffer.
- * @grp: Index of the group in the narrow metadata.
- * @blk: Index of the block in the group.
- * @blk_inst: Index of the block instance in the block.
- *
- * Return: u32* to the dump buffer for the block instance.
- */
-static inline u32 *
-kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf,
- size_t grp, size_t blk, size_t blk_inst)
-{
- return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index +
- buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
- (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride *
- blk_inst);
-}
-
-/**
- * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC
- * entries per block truncated to
- * 64 entries and block entry size
- * narrowed down to 32-bit.
- *
- * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored
- * on success.
- * @src_md: Non-NULL pointer to the HWC metadata used as the source to
- * create dst_md_narrow.
- *
- * For backward compatibility of the interface to user clients, a new metadata
- * with entries per block truncated to 64 and block entry size narrowed down
- * to 32-bit will be created for dst_md_narrow.
- * The total entries per block in src_md must be 64 or 128, if it's other
- * values, function returns error since it's not supported.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow,
- const struct kbase_hwcnt_metadata *src_md);
-
-/**
- * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow
- * metadata object.
- * @md_narrow: Pointer to hardware counter narrow metadata.
- */
-void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow);
-
-/**
- * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be
- * initialised to undefined values, so must be used as a copy
- * destination, or cleared before use.
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow,
- struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
-
-/**
- * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer.
- * @dump_buf: Dump buffer to be freed.
- *
- * Can be safely called on an all-zeroed narrow dump buffer structure, or on an
- * already freed narrow dump buffer.
- */
-void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf);
-
-/**
- * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow
- * dump buffers.
- * @md_narrow: Non-NULL pointer to narrow metadata.
- * @n: Number of narrow dump buffers to allocate
- * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array
- * object to be initialised.
- *
- * A single zeroed contiguous page allocation will be used for all of the
- * buffers inside the object, where:
- * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_dump_buffer_narrow_array_alloc(
- const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n,
- struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
-
-/**
- * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer
- * array.
- * @dump_bufs: Narrow Dump buffer array to be freed.
- *
- * Can be safely called on an all-zeroed narrow dump buffer array structure, or
- * on an already freed narrow dump buffer array.
- */
-void kbase_hwcnt_dump_buffer_narrow_array_free(
- struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs);
-
-/**
- * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block
- * values from source to
- * destination.
- * @dst_blk: Non-NULL pointer to destination block obtained from a call to
- * kbase_hwcnt_dump_buffer_narrow_block_instance.
- * @src_blk: Non-NULL pointer to source block obtained from a call to
- * kbase_hwcnt_dump_buffer_block_instance.
- * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to
- * kbase_hwcnt_enable_map_block_instance.
- * @val_cnt: Number of values in the block.
- *
- * After the copy, any disabled values in destination will be zero, the enabled
- * values in destination will be saturated at U32_MAX if the corresponding
- * source value is bigger than U32_MAX, or copy the value from source if the
- * corresponding source value is less than or equal to U32_MAX.
- */
-void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk,
- const u64 *blk_em, size_t val_cnt);
-
-/**
- * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a
- * narrow dump buffer.
- * @dst_narrow: Non-NULL pointer to destination dump buffer.
- * @src: Non-NULL pointer to source dump buffer.
- * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- *
- * After the operation, all non-enabled values (including padding bytes) will be
- * zero. Slower than the non-strict variant.
- *
- * The enabled values in dst_narrow will be saturated at U32_MAX if the
- * corresponding source value is bigger than U32_MAX, or copy the value from
- * source if the corresponding source value is less than or equal to U32_MAX.
- */
-void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow,
- const struct kbase_hwcnt_dump_buffer *src,
- const struct kbase_hwcnt_enable_map *dst_enable_map);
-
-#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
index 763eb31..3d0ad5a 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,15 +27,15 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
const struct kbase_hwcnt_metadata **out_metadata)
{
char *buf;
+ size_t blk;
struct kbase_hwcnt_metadata *metadata;
- struct kbase_hwcnt_group_metadata *grp_mds;
- size_t grp;
- size_t enable_map_count; /* Number of u64 bitfields (inc padding) */
- size_t dump_buf_count; /* Number of u64 values (inc padding) */
- size_t avail_mask_bits; /* Number of availability mask bits */
-
- size_t size;
+ struct kbase_hwcnt_block_metadata *blk_mds;
+ size_t enable_map_count = 0; /* Number of u64 bitfields (inc padding) */
+ size_t dump_buf_count = 0; /* Number of u64 values (inc padding) */
+ size_t avail_mask_bits = 0;
+ size_t state_count = 0;
size_t offset;
+ size_t size;
if (!desc || !out_metadata)
return -EINVAL;
@@ -50,13 +50,8 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
size = 0;
size += sizeof(struct kbase_hwcnt_metadata);
- /* Group metadata */
- size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
-
/* Block metadata */
- for (grp = 0; grp < desc->grp_cnt; grp++) {
- size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt;
- }
+ size += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt;
/* Single allocation for the entire metadata */
buf = kmalloc(size, GFP_KERNEL);
@@ -70,79 +65,59 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
metadata = (struct kbase_hwcnt_metadata *)(buf + offset);
offset += sizeof(struct kbase_hwcnt_metadata);
- /* Bump allocate the group metadata */
- grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset);
- offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt;
-
- enable_map_count = 0;
- dump_buf_count = 0;
- avail_mask_bits = 0;
-
- for (grp = 0; grp < desc->grp_cnt; grp++) {
- size_t blk;
-
- const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp;
- struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp;
-
- size_t group_enable_map_count = 0;
- size_t group_dump_buffer_count = 0;
- size_t group_avail_mask_bits = 0;
-
- /* Bump allocate this group's block metadata */
- struct kbase_hwcnt_block_metadata *blk_mds =
- (struct kbase_hwcnt_block_metadata *)(buf + offset);
- offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt;
-
- /* Fill in each block in the group's information */
- for (blk = 0; blk < grp_desc->blk_cnt; blk++) {
- const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk;
- struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
- const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
-
- blk_md->type = blk_desc->type;
- blk_md->inst_cnt = blk_desc->inst_cnt;
- blk_md->hdr_cnt = blk_desc->hdr_cnt;
- blk_md->ctr_cnt = blk_desc->ctr_cnt;
- blk_md->enable_map_index = group_enable_map_count;
- blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values);
- blk_md->dump_buf_index = group_dump_buffer_count;
- blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
+ /* Bump allocate the block metadata */
+ blk_mds = (struct kbase_hwcnt_block_metadata *)(buf + offset);
+ offset += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt;
+
+ /* Fill in each block */
+ for (blk = 0; blk < desc->blk_cnt; blk++) {
+ const struct kbase_hwcnt_block_description *blk_desc = desc->blks + blk;
+ struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk;
+ const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt;
+
+ *blk_md = (struct kbase_hwcnt_block_metadata){
+ .type = blk_desc->type,
+ .inst_cnt = blk_desc->inst_cnt,
+ .hdr_cnt = blk_desc->hdr_cnt,
+ .ctr_cnt = blk_desc->ctr_cnt,
+ .enable_map_index = enable_map_count,
+ .enable_map_stride = kbase_hwcnt_bitfield_count(n_values),
+ .dump_buf_index = dump_buf_count,
+ .dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS(
n_values,
- (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
- blk_md->avail_mask_index = group_avail_mask_bits;
-
- group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
- group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
- group_avail_mask_bits += blk_md->inst_cnt;
- }
-
- /* Fill in the group's information */
- grp_md->type = grp_desc->type;
- grp_md->blk_cnt = grp_desc->blk_cnt;
- grp_md->blk_metadata = blk_mds;
- grp_md->enable_map_index = enable_map_count;
- grp_md->dump_buf_index = dump_buf_count;
- grp_md->avail_mask_index = avail_mask_bits;
-
- enable_map_count += group_enable_map_count;
- dump_buf_count += group_dump_buffer_count;
- avail_mask_bits += group_avail_mask_bits;
+ (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)),
+ .avail_mask_index = avail_mask_bits,
+ .blk_stt_index = state_count,
+ .blk_stt_stride = KBASE_HWCNT_BLOCK_STATE_STRIDE,
+ };
+
+ enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt;
+ dump_buf_count += blk_md->dump_buf_stride * blk_md->inst_cnt;
+ avail_mask_bits += blk_md->inst_cnt;
+ state_count += blk_md->inst_cnt * blk_md->blk_stt_stride;
}
/* Fill in the top level metadata's information */
- metadata->grp_cnt = desc->grp_cnt;
- metadata->grp_metadata = grp_mds;
- metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES;
- metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES;
- metadata->avail_mask = desc->avail_mask;
- metadata->clk_cnt = desc->clk_cnt;
-
- WARN_ON(size != offset);
+ *metadata = (struct kbase_hwcnt_metadata){
+ .blk_cnt = desc->blk_cnt,
+ .blk_metadata = blk_mds,
+ .enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES,
+ .dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES,
+ .blk_stt_bytes = state_count * KBASE_HWCNT_BLOCK_STATE_BYTES,
+ .clk_cnt = desc->clk_cnt,
+ };
+
+ kbase_hwcnt_cp_avail_mask(&metadata->avail_mask, &desc->avail_mask);
+
+ if (WARN_ON(size != offset))
+ return -EINVAL;
+
/* Due to the block alignment, there should be exactly one enable map
* bit per 4 bytes in the dump buffer.
*/
- WARN_ON(metadata->dump_buf_bytes !=
- (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES));
+ if (WARN_ON(metadata->dump_buf_bytes !=
+ (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)))
+ return -EINVAL;
*out_metadata = metadata;
return 0;
@@ -189,6 +164,7 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
{
size_t dump_buf_bytes;
size_t clk_cnt_buf_bytes;
+ size_t block_state_bytes;
u8 *buf;
if (!metadata || !dump_buf)
@@ -196,15 +172,17 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
dump_buf_bytes = metadata->dump_buf_bytes;
clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
+ block_state_bytes = metadata->blk_stt_bytes;
- /* Make a single allocation for both dump_buf and clk_cnt_buf. */
- buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL);
+ /* Make a single allocation for dump_buf, clk_cnt_buf and block_state_buf. */
+ buf = kzalloc(dump_buf_bytes + clk_cnt_buf_bytes + block_state_bytes, GFP_KERNEL);
if (!buf)
return -ENOMEM;
dump_buf->metadata = metadata;
dump_buf->dump_buf = (u64 *)buf;
dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes);
+ dump_buf->blk_stt_buf = (blk_stt_t *)(buf + dump_buf_bytes + clk_cnt_buf_bytes);
return 0;
}
@@ -218,72 +196,11 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf)
memset(dump_buf, 0, sizeof(*dump_buf));
}
-int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
- struct kbase_hwcnt_dump_buffer_array *dump_bufs)
-{
- struct kbase_hwcnt_dump_buffer *buffers;
- size_t buf_idx;
- unsigned int order;
- unsigned long addr;
- size_t dump_buf_bytes;
- size_t clk_cnt_buf_bytes;
-
- if (!metadata || !dump_bufs)
- return -EINVAL;
-
- dump_buf_bytes = metadata->dump_buf_bytes;
- clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt;
-
- /* Allocate memory for the dump buffer struct array */
- buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL);
- if (!buffers)
- return -ENOMEM;
-
- /* Allocate pages for the actual dump buffers, as they tend to be fairly
- * large.
- */
- order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n);
- addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-
- if (!addr) {
- kfree(buffers);
- return -ENOMEM;
- }
-
- dump_bufs->page_addr = addr;
- dump_bufs->page_order = order;
- dump_bufs->buf_cnt = n;
- dump_bufs->bufs = buffers;
-
- /* Set the buffer of each dump buf */
- for (buf_idx = 0; buf_idx < n; buf_idx++) {
- const size_t dump_buf_offset = dump_buf_bytes * buf_idx;
- const size_t clk_cnt_buf_offset =
- (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx);
-
- buffers[buf_idx].metadata = metadata;
- buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset);
- buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset);
- }
-
- return 0;
-}
-
-void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs)
-{
- if (!dump_bufs)
- return;
-
- kfree(dump_bufs->bufs);
- free_pages(dump_bufs->page_addr, dump_bufs->page_order);
- memset(dump_bufs, 0, sizeof(*dump_bufs));
-}
-
void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
@@ -291,21 +208,21 @@ void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst,
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
u64 *dst_blk;
size_t val_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
- val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk);
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
}
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt);
+ memset(dst->blk_stt_buf, 0, metadata->blk_stt_bytes);
}
void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
@@ -314,15 +231,15 @@ void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst)
return;
memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes);
-
memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt);
+ memset(dst->blk_stt_buf, 0, dst->metadata->blk_stt_bytes);
}
void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
WARN_ON(dst->metadata != dst_enable_map->metadata))
@@ -330,23 +247,29 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *ds
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
const u64 *blk_em =
- kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk);
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
- if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) {
+ if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst)) {
/* Block available, so only zero non-enabled values */
kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt);
+
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
+ kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+
} else {
/* Block not available, so zero the entire thing */
kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt);
+ kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
}
}
}
@@ -356,7 +279,7 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
@@ -366,24 +289,27 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst,
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
u64 *dst_blk;
const u64 *src_blk;
+ blk_stt_t *dst_blk_stt;
+ const blk_stt_t *src_blk_stt;
size_t val_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
- src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
- val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk);
+ dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
+ src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst);
kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt);
+ kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk];
}
@@ -394,7 +320,7 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
@@ -404,23 +330,30 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst,
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
- const u64 *src_blk =
- kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst);
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
+ const blk_stt_t *src_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst);
const u64 *blk_em =
- kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
- size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk);
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst);
+ size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk);
+
/* Align upwards to include padding bytes */
val_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES));
kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt);
+
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
+ kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt);
+ else
+ kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
bool clk_enabled =
kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk);
@@ -433,7 +366,7 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
@@ -443,26 +376,29 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst,
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
u64 *dst_blk;
const u64 *src_blk;
+ blk_stt_t *dst_blk_stt;
+ const blk_stt_t *src_blk_stt;
size_t hdr_cnt;
size_t ctr_cnt;
- if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst))
+ if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
continue;
- dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
- src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
- hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
- ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst);
+ dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
+ src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst);
+ hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk);
+ ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk);
kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt);
+ kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
}
@@ -473,7 +409,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d
const struct kbase_hwcnt_enable_map *dst_enable_map)
{
const struct kbase_hwcnt_metadata *metadata;
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
size_t clk;
if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) ||
@@ -483,15 +419,19 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d
metadata = dst->metadata;
- kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst)
- {
- u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
- const u64 *src_blk =
- kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
+ const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst);
const u64 *blk_em =
- kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst);
- size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
- size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk);
+ kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst);
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
+ const blk_stt_t *src_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst);
+
+ size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk);
+ size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk);
+
/* Align upwards to include padding bytes */
ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS(
hdr_cnt + ctr_cnt,
@@ -499,13 +439,41 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d
kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt,
ctr_cnt);
+
+ if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst))
+ kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt);
+ else
+ kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
}
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk];
else
dst->clk_cnt_buf[clk] = 0;
}
}
+
+void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ blk_stt_t blk_stt_val)
+{
+ const struct kbase_hwcnt_metadata *metadata;
+ size_t blk, blk_inst;
+
+ if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) ||
+ WARN_ON(dst->metadata != dst_enable_map->metadata))
+ return;
+
+ metadata = dst->metadata;
+
+ kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) {
+ if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst) &&
+ kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
+ blk_stt_t *dst_blk_stt =
+ kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
+
+ *dst_blk_stt |= blk_stt_val;
+ }
+ }
+}
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
index 5c5ada4..c7afe17 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -34,12 +34,8 @@
* Terminology:
*
* Hardware Counter System:
- * A collection of hardware counter groups, making a full hardware counter
+ * A collection of hardware counter blocks, making a full hardware counter
* system.
- * Hardware Counter Group:
- * A group of Hardware Counter Blocks (e.g. a t62x might have more than one
- * core group, so has one counter group per core group, where each group
- * may have a different number and layout of counter blocks).
* Hardware Counter Block:
* A block of hardware counters (e.g. shader block, tiler block).
* Hardware Counter Block Instance:
@@ -59,10 +55,16 @@
*
* Enable Map:
* An array of u64 bitfields, where each bit either enables exactly one
- * block value, or is unused (padding).
+ * block value, or is unused (padding). Note that this is derived from
+ * the client configuration, and is not obtained from the hardware.
* Dump Buffer:
* An array of u64 values, where each u64 corresponds either to one block
* value, or is unused (padding).
+ * Block State Buffer:
+ * An array of blk_stt_t values, where each blk_stt_t corresponds to one block
+ * instance and is used to track the on/off power state transitions, as well has
+ * hardware resource availability, and whether the block was operating
+ * in normal or protected mode.
* Availability Mask:
* A bitfield, where each bit corresponds to whether a block instance is
* physically available (e.g. an MP3 GPU may have a sparse core mask of
@@ -74,7 +76,6 @@
* Metadata:
* Structure describing the physical layout of the enable map and dump buffers
* for a specific hardware counter system.
- *
*/
#ifndef _KBASE_HWCNT_TYPES_H_
@@ -98,10 +99,14 @@
*/
#define KBASE_HWCNT_VALUE_BYTES (sizeof(u64))
+/* Number of elements in the avail_mask aray, in kbase_hwcnt_metadata */
+#define KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT 2
+
/* Number of bits in an availability mask (i.e. max total number of block
* instances supported in a Hardware Counter System)
*/
-#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE)
+#define KBASE_HWCNT_AVAIL_MASK_BITS \
+ (sizeof(u64) * KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT * BITS_PER_BYTE)
/* Minimum alignment of each block of hardware counters */
#define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES)
@@ -114,9 +119,60 @@
* Return: Input value if already aligned to the specified boundary, or next
* (incrementing upwards) aligned value.
*/
-#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
+#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \
(value + ((alignment - (value % alignment)) % alignment))
+typedef u8 blk_stt_t;
+
+/* Number of bytes storing the per-block state transition information. */
+#define KBASE_HWCNT_BLOCK_STATE_BYTES (sizeof(blk_stt_t))
+
+/* Number of entries of blk_stt_t used to store the block state. */
+#define KBASE_HWCNT_BLOCK_STATE_STRIDE (1)
+
+/* Block state indicating that the hardware block state was indeterminable
+ * or not set during the sampling period.
+ */
+#define KBASE_HWCNT_STATE_UNKNOWN ((blk_stt_t)(0))
+
+/* Block state indicating that the hardware block was on or transitioned to on
+ * during the sampling period.
+ */
+#define KBASE_HWCNT_STATE_ON ((blk_stt_t)(1u << 0))
+
+/* Block state indicating that the hardware block was off or transitioned to off
+ * during the sampling period.
+ */
+#define KBASE_HWCNT_STATE_OFF ((blk_stt_t)(1u << 1))
+
+/* Block state indicating that the hardware block was available to the current
+ * VM for some portion of the sampling period.
+ */
+#define KBASE_HWCNT_STATE_AVAILABLE ((blk_stt_t)(1u << 2))
+
+/* Block state indicating that the hardware block was unavailable to the current
+ * VM for some portion of the sampling period.
+ */
+#define KBASE_HWCNT_STATE_UNAVAILABLE ((blk_stt_t)(1u << 3))
+
+/* Block state indicating that the hardware block was operating in normal mode
+ * for some portion of the sampling period.
+ */
+#define KBASE_HWCNT_STATE_NORMAL ((blk_stt_t)(1u << 4))
+
+/* Block state indicating that the hardware block was operating in protected mode
+ * for some portion of the sampling period.
+ */
+#define KBASE_HWCNT_STATE_PROTECTED ((blk_stt_t)(1u << 5))
+
+/* For a valid block state with the above masks, only a maximum of
+ * KBASE_HWCNT_STATE_BITS can be set.
+ */
+#define KBASE_HWCNT_STATE_BITS (6)
+
+/* Mask to detect malformed block state bitmaps. */
+#define KBASE_HWCNT_STATE_MASK ((blk_stt_t)((1u << KBASE_HWCNT_STATE_BITS) - 1))
+
/**
* struct kbase_hwcnt_block_description - Description of one or more identical,
* contiguous, Hardware Counter Blocks.
@@ -133,31 +189,25 @@ struct kbase_hwcnt_block_description {
};
/**
- * struct kbase_hwcnt_group_description - Description of one or more identical,
- * contiguous Hardware Counter Groups.
- * @type: The arbitrary identifier used to identify the type of the group.
- * @blk_cnt: The number of types of Hardware Counter Block in the group.
- * @blks: Non-NULL pointer to an array of blk_cnt block descriptions,
- * describing each type of Hardware Counter Block in the group.
- */
-struct kbase_hwcnt_group_description {
- u64 type;
- size_t blk_cnt;
- const struct kbase_hwcnt_block_description *blks;
+ * struct kbase_hwcnt_avail_mask - Mask type for HW Counter availablility.
+ * @mask: Array of bitmask elements.
+ */
+struct kbase_hwcnt_avail_mask {
+ u64 mask[KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT];
};
-/**
+/*
* struct kbase_hwcnt_description - Description of a Hardware Counter System.
- * @grp_cnt: The number of Hardware Counter Groups.
- * @grps: Non-NULL pointer to an array of grp_cnt group descriptions,
- * describing each Hardware Counter Group in the system.
+ * @blk_cnt: The number of Hardware Counter Blocks.
+ * @blks: Non-NULL pointer to an array of blk_cnt block descriptions,
+ * describing each Hardware Counter Blocks in the system.
* @avail_mask: Flat Availability Mask for all block instances in the system.
* @clk_cnt: The number of clock domains in the system. The maximum is 64.
*/
struct kbase_hwcnt_description {
- size_t grp_cnt;
- const struct kbase_hwcnt_group_description *grps;
- u64 avail_mask;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_description *blks;
+ struct kbase_hwcnt_avail_mask avail_mask;
u8 clk_cnt;
};
@@ -183,6 +233,12 @@ struct kbase_hwcnt_description {
* @avail_mask_index: Index in bits into the parent's Availability Mask where
* the Availability Masks of the Block Instances described
* by this metadata start.
+ * @blk_stt_index: Index in bits into the parent's Block State Buffer
+ * where the Block State Masks of the Block Instances described
+ * by this metadata start.
+ * @blk_stt_stride: Stride in the underly block state tracking type between
+ * the Block State bytes corresponding to each of the
+ * Block Instances.
*/
struct kbase_hwcnt_block_metadata {
u64 type;
@@ -194,58 +250,148 @@ struct kbase_hwcnt_block_metadata {
size_t dump_buf_index;
size_t dump_buf_stride;
size_t avail_mask_index;
+ size_t blk_stt_index;
+ size_t blk_stt_stride;
};
/**
- * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout
- * of a group of blocks in a Hardware
- * Counter System's Dump Buffers and Enable
- * Maps.
- * @type: The arbitrary identifier used to identify the type of the
- * group.
- * @blk_cnt: The number of types of Hardware Counter Block in the
- * group.
- * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata,
- * describing the physical layout of each type of Hardware
- * Counter Block in the group.
- * @enable_map_index: Index in u64s into the parent's Enable Map where the
- * Enable Maps of the blocks within the group described by
- * this metadata start.
- * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the
- * Dump Buffers of the blocks within the group described by
- * metadata start.
- * @avail_mask_index: Index in bits into the parent's Availability Mask where
- * the Availability Masks of the blocks within the group
- * described by this metadata start.
- */
-struct kbase_hwcnt_group_metadata {
- u64 type;
- size_t blk_cnt;
- const struct kbase_hwcnt_block_metadata *blk_metadata;
- size_t enable_map_index;
- size_t dump_buf_index;
- size_t avail_mask_index;
-};
+ * kbase_hwcnt_set_avail_mask() - Set bitfield values into a large bitmask. Convenience function.
+ *
+ * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements
+ * @u0: Value of element 0.
+ * @u1: Value of element 1
+ */
+static inline void kbase_hwcnt_set_avail_mask(struct kbase_hwcnt_avail_mask *avail_mask, u64 u0,
+ u64 u1)
+{
+ /* If KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT gets updated, we must modify the signature of
+ * kbase_hwcnt_set_avail_mask() so that all elements continue to be set.
+ */
+ BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2);
+
+ avail_mask->mask[0] = u0;
+ avail_mask->mask[1] = u1;
+}
+
+/**
+ * kbase_hwcnt_avail_masks_equal() - Compare two HWC availability masks
+ * @avail_mask0: First mask to compare
+ * @avail_mask1: Second mask to compare
+ *
+ * Return: 1 if masks are equal. Otherwise, 0.
+ */
+static inline bool kbase_hwcnt_avail_masks_equal(const struct kbase_hwcnt_avail_mask *avail_mask0,
+ const struct kbase_hwcnt_avail_mask *avail_mask1)
+{
+ return (!memcmp(avail_mask0, avail_mask1, sizeof(*avail_mask0)));
+}
+
+/**
+ * kbase_hwcnt_avail_masks_equal_values() - Compare two HWC availability masks
+ * @avail_mask: Kask to compare
+ * @u0: First element of mask to compare against
+ * @u1: Second element of mask to compare against
+ *
+ * Return: 1 if masks are equal. Otherwise, 0.
+ */
+static inline bool
+kbase_hwcnt_avail_masks_equal_values(const struct kbase_hwcnt_avail_mask *avail_mask, u64 u0,
+ u64 u1)
+{
+ BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2);
+ return ((avail_mask->mask[0] == u0) && (avail_mask->mask[1] == u1));
+}
+
+/**
+ * kbase_hwcnt_cp_avail_mask - Copy one avail mask into another
+ * @dst_avail_mask: Destination mask
+ * @src_avail_mask: Source Mask
+ */
+static inline void kbase_hwcnt_cp_avail_mask(struct kbase_hwcnt_avail_mask *dst_avail_mask,
+ const struct kbase_hwcnt_avail_mask *src_avail_mask)
+{
+ memcpy(dst_avail_mask, src_avail_mask, sizeof(*dst_avail_mask));
+}
+
+/**
+ * kbase_hwcnt_set_avail_mask_bits() - Set a bitfield value into a large bitmask
+ *
+ * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements
+ * @offset_in_bits: The offset into which to place the value in the bitmask. The value being
+ * placed is expected to be fully contained by the array of bitmask elements.
+ * @length_in_bits: The length of the value being placed in the bitmask. Assumed to be no more
+ * than 64 bits in length.
+ * @value: The source value to be written into the bitmask.
+ */
+static inline void kbase_hwcnt_set_avail_mask_bits(struct kbase_hwcnt_avail_mask *avail_mask,
+ size_t offset_in_bits, size_t length_in_bits,
+ u64 value)
+{
+ size_t arr_offset = offset_in_bits / 64;
+ size_t bits_set = 0;
+
+ if (!length_in_bits)
+ return;
+
+ WARN_ON(length_in_bits > 64);
+ if (WARN_ON((offset_in_bits + length_in_bits) > (KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT << 6)))
+ return;
+
+ do {
+ size_t remaining_to_set = length_in_bits - bits_set;
+ size_t start_dest_bit_in_word = (offset_in_bits + bits_set) - (arr_offset * 64);
+ size_t bits_that_fit_into_this_word =
+ min(64 - start_dest_bit_in_word, remaining_to_set);
+
+ uint64_t dest_mask, mask, source_mask;
+ uint64_t source_fragment;
+
+ if (bits_that_fit_into_this_word == 64) {
+ mask = U64_MAX;
+ source_mask = U64_MAX;
+ dest_mask = U64_MAX;
+ } else {
+ mask = (1ULL << bits_that_fit_into_this_word) - 1;
+ source_mask = ((1ULL << (bits_that_fit_into_this_word)) - 1) << bits_set;
+ dest_mask = mask << start_dest_bit_in_word;
+ }
+
+ source_fragment = (value & source_mask) >> bits_set;
+
+ if (WARN_ON(arr_offset >= KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT))
+ break;
+
+ avail_mask->mask[arr_offset] &= ~dest_mask;
+ avail_mask->mask[arr_offset] |=
+ ((source_fragment & mask) << start_dest_bit_in_word);
+
+ arr_offset++;
+ bits_set += bits_that_fit_into_this_word;
+ } while (bits_set < length_in_bits);
+}
/**
* struct kbase_hwcnt_metadata - Metadata describing the memory layout
* of Dump Buffers and Enable Maps within a
* Hardware Counter System.
- * @grp_cnt: The number of Hardware Counter Groups.
- * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata,
+ * @blk_cnt: The number of Hardware Counter Blocks
+ * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata,
* describing the physical layout of each Hardware Counter
- * Group in the system.
+ * Block in the system.
* @enable_map_bytes: The size in bytes of an Enable Map needed for the system.
* @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system.
+ * @blk_stt_bytes: The size in bytes of a Block State Buffer needed for
+ * the system.
* @avail_mask: The Availability Mask for the system.
* @clk_cnt: The number of clock domains in the system.
*/
struct kbase_hwcnt_metadata {
- size_t grp_cnt;
- const struct kbase_hwcnt_group_metadata *grp_metadata;
+ size_t blk_cnt;
+ const struct kbase_hwcnt_block_metadata *blk_metadata;
size_t enable_map_bytes;
size_t dump_buf_bytes;
- u64 avail_mask;
+ size_t blk_stt_bytes;
+ struct kbase_hwcnt_avail_mask avail_mask;
u8 clk_cnt;
};
@@ -257,7 +403,7 @@ struct kbase_hwcnt_metadata {
* @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an
* array of u64 bitfields, each bit of which enables one hardware
* counter.
- * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle
+ * @clk_enable_map: A u64 bitfield, each bit of which enables cycle
* counter for a given clock domain.
*/
struct kbase_hwcnt_enable_map {
@@ -274,27 +420,14 @@ struct kbase_hwcnt_enable_map {
* metadata->dump_buf_bytes.
* @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed
* for each clock domain.
+ * @blk_stt_buf: A pointer to an array of blk_stt_t values holding block state
+ * information for each block.
*/
struct kbase_hwcnt_dump_buffer {
const struct kbase_hwcnt_metadata *metadata;
u64 *dump_buf;
u64 *clk_cnt_buf;
-};
-
-/**
- * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array.
- * @page_addr: Address of allocated pages. A single allocation is used for all
- * Dump Buffers in the array.
- * @page_order: The allocation order of the pages, the order is on a logarithmic
- * scale.
- * @buf_cnt: The number of allocated Dump Buffers.
- * @bufs: Non-NULL pointer to the array of Dump Buffers.
- */
-struct kbase_hwcnt_dump_buffer_array {
- unsigned long page_addr;
- unsigned int page_order;
- size_t buf_cnt;
- struct kbase_hwcnt_dump_buffer *bufs;
+ blk_stt_t *blk_stt_buf;
};
/**
@@ -316,232 +449,229 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc,
void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata);
/**
- * kbase_hwcnt_metadata_group_count() - Get the number of groups.
- * @metadata: Non-NULL pointer to metadata.
- *
- * Return: Number of hardware counter groups described by metadata.
+ * kbase_hwcnt_block_state_set() - Set one or more block states
+ * for a block instance.
+ * @blk_stt: Pointer to destination block state instance
+ * @stt: Block state bitmask
*/
-static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata)
+static inline void kbase_hwcnt_block_state_set(blk_stt_t *blk_stt, blk_stt_t stt)
{
- if (WARN_ON(!metadata))
- return 0;
+ if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK))
+ return;
- return metadata->grp_cnt;
+ *blk_stt = stt;
}
/**
- * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group.
- * @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- *
- * Return: Type of the group grp.
+ * kbase_hwcnt_block_state_append() - Adds one or more block states
+ * onto a block instance.
+ * @blk_stt: Pointer to destination block state instance
+ * @stt: Block state bitmask
*/
-static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata,
- size_t grp)
+static inline void kbase_hwcnt_block_state_append(blk_stt_t *blk_stt, blk_stt_t stt)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
- return 0;
+ if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK))
+ return;
- return metadata->grp_metadata[grp].type;
+ *blk_stt |= stt;
+}
+
+/**
+ * kbase_hwcnt_block_state_copy() - Copy block state between two block
+ * state instances.
+ * @dst_blk_stt: Pointer to destination block state instance
+ * @src_blk_stt: Pointer to source block state instance.
+ */
+static inline void kbase_hwcnt_block_state_copy(blk_stt_t *dst_blk_stt,
+ const blk_stt_t *src_blk_stt)
+{
+ kbase_hwcnt_block_state_set(dst_blk_stt, *src_blk_stt);
+}
+
+/**
+ * kbase_hwcnt_block_state_accumulate() - Accumulate block state between two block
+ * state instances.
+ * @dst_blk_stt: Pointer to destination block state instance
+ * @src_blk_stt: Pointer to source block state instance.
+ */
+static inline void kbase_hwcnt_block_state_accumulate(blk_stt_t *dst_blk_stt,
+ const blk_stt_t *src_blk_stt)
+{
+ kbase_hwcnt_block_state_append(dst_blk_stt, *src_blk_stt);
}
/**
- * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group.
+ * kbase_hwcnt_metadata_block_count() - Get the number of blocks in the metadata.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
*
- * Return: Number of blocks in group grp.
+ * Return: Number of blocks in the metadata.
*/
-static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata,
- size_t grp)
+static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt))
+ if (WARN_ON(!metadata))
return 0;
- return metadata->grp_metadata[grp].blk_cnt;
+ return metadata->blk_cnt;
}
/**
* kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block.
*
- * Return: Type of the block blk in group grp.
+ * Return: Type of the block blk.
*/
static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata,
- size_t grp, size_t blk)
+ size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].blk_metadata[blk].type;
+ return metadata->blk_metadata[blk].type;
}
/**
* kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of
* a block.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
- * Return: Number of instances of block blk in group grp.
+ * Return: Number of instances of block blk.
*/
static inline size_t
-kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk)
+kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt;
+ return metadata->blk_metadata[blk].inst_cnt;
}
/**
* kbase_hwcnt_metadata_block_headers_count() - Get the number of counter
* headers.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
- * Return: Number of counter headers in each instance of block blk in group grp.
+ * Return: Number of counter headers in each instance of block blk.
*/
static inline size_t
-kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk)
+kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt;
+ return metadata->blk_metadata[blk].hdr_cnt;
}
/**
* kbase_hwcnt_metadata_block_counters_count() - Get the number of counters.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
- * Return: Number of counters in each instance of block blk in group grp.
+ * Return: Number of counters in each instance of block blk.
*/
static inline size_t
-kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk)
+kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt;
+ return metadata->blk_metadata[blk].ctr_cnt;
}
/**
* kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
- * Return: enable map stride in each instance of block blk in group grp.
+ * Return: enable map stride in each instance of block blk.
*/
static inline size_t
kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata,
- size_t grp, size_t blk)
+ size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride;
+ return metadata->blk_metadata[blk].enable_map_stride;
}
/**
* kbase_hwcnt_metadata_block_values_count() - Get the number of values.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
* Return: Number of headers plus counters in each instance of block blk
- * in group grp.
+ * in the metadata.
*/
static inline size_t
-kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk)
+kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) +
- kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk);
+ return kbase_hwcnt_metadata_block_counters_count(metadata, blk) +
+ kbase_hwcnt_metadata_block_headers_count(metadata, blk);
}
/**
* kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in
* the metadata.
* @md: Non-NULL pointer to metadata.
- * @grp: size_t variable used as group iterator.
* @blk: size_t variable used as block iterator.
* @blk_inst: size_t variable used as block instance iterator.
*
- * Iteration order is group, then block, then block instance (i.e. linearly
- * through memory).
+ * Iteration order is block, then block instance (i.e. linearly through memory).
*/
-#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \
- for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \
- for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \
- for ((blk_inst) = 0; \
- (blk_inst) < \
- kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \
- (blk_inst)++)
+#define kbase_hwcnt_metadata_for_each_block(md, blk, blk_inst) \
+ for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md)); (blk)++) \
+ for ((blk_inst) = 0; \
+ (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (blk)); \
+ (blk_inst)++)
/**
* kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail
* mask corresponding to the block.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
*
* Return: The bit index into the avail mask for the block.
*/
static inline size_t
-kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk)
+kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t blk)
{
- if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) ||
- WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt))
+ if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt))
return 0;
- return metadata->grp_metadata[grp].avail_mask_index +
- metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index;
+ return metadata->blk_metadata[blk].avail_mask_index;
}
/**
* kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is
* available.
* @metadata: Non-NULL pointer to metadata.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*
* Return: true if the block instance is available, else false.
*/
static inline bool
-kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp,
- size_t blk, size_t blk_inst)
+kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t blk,
+ size_t blk_inst)
{
size_t bit;
+ size_t mask_index;
u64 mask;
if (WARN_ON(!metadata))
return false;
- bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst;
- mask = 1ull << bit;
+ bit = kbase_hwcnt_metadata_block_avail_bit(metadata, blk) + blk_inst;
+ mask_index = bit >> 6;
+ mask = 1ull << (bit & 0x3f);
- return (metadata->avail_mask & mask) != 0;
+ return (metadata->avail_mask.mask[mask_index] & mask) != 0;
}
/**
@@ -568,31 +698,28 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map);
* kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block
* instance's enable map.
* @map: Non-NULL pointer to enable map.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*
* Return: u64* to the bitfield(s) used as the enable map for the
* block instance.
*/
static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map,
- size_t grp, size_t blk, size_t blk_inst)
+ size_t blk, size_t blk_inst)
{
if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map))
return NULL;
- if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) ||
- WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) ||
- WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+ if (WARN_ON(!map->metadata) || WARN_ON(blk >= map->metadata->blk_cnt) ||
+ WARN_ON(blk_inst >= map->metadata->blk_metadata[blk].inst_cnt))
return map->hwcnt_enable_map;
- return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index +
- map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index +
- (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst);
+ return map->hwcnt_enable_map + map->metadata->blk_metadata[blk].enable_map_index +
+ (map->metadata->blk_metadata[blk].enable_map_stride * blk_inst);
}
/**
- * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required
+ * kbase_hwcnt_bitfield_count - Calculate the number of u64 bitfields required
* to have at minimum one bit per value.
* @val_cnt: Number of values.
*
@@ -604,24 +731,22 @@ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt)
}
/**
- * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block.
+ * kbase_hwcnt_enable_map_block_disable_all - Disable all values in a block.
* @dst: Non-NULL pointer to enable map.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*/
static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst,
- size_t grp, size_t blk, size_t blk_inst)
+ size_t blk, size_t blk_inst)
{
size_t val_cnt;
size_t bitfld_cnt;
- u64 *const block_enable_map =
- kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+ u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst);
if (WARN_ON(!dst))
return;
- val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk);
bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES);
@@ -645,23 +770,21 @@ static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_
/**
* kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block.
* @dst: Non-NULL pointer to enable map.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*/
static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst,
- size_t grp, size_t blk, size_t blk_inst)
+ size_t blk, size_t blk_inst)
{
size_t val_cnt;
size_t bitfld_cnt;
- u64 *const block_enable_map =
- kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst);
+ u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst);
size_t bitfld_idx;
if (WARN_ON(!dst))
return;
- val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk);
bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
@@ -682,13 +805,13 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_en
*/
static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst)
{
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
if (WARN_ON(!dst) || WARN_ON(!dst->metadata))
return;
- kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst)
- kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst);
+ kbase_hwcnt_metadata_for_each_block(dst->metadata, blk, blk_inst)
+ kbase_hwcnt_enable_map_block_enable_all(dst, blk, blk_inst);
dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1;
}
@@ -751,27 +874,26 @@ static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *d
* kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block
* instance are enabled.
* @enable_map: Non-NULL pointer to enable map.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*
* Return: true if any values in the block are enabled, else false.
*/
static inline bool
-kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp,
- size_t blk, size_t blk_inst)
+kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t blk,
+ size_t blk_inst)
{
bool any_enabled = false;
size_t val_cnt;
size_t bitfld_cnt;
const u64 *const block_enable_map =
- kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst);
+ kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst);
size_t bitfld_idx;
if (WARN_ON(!enable_map))
return false;
- val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk);
+ val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, blk);
bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt);
for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) {
@@ -796,7 +918,7 @@ kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable
static inline bool
kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map)
{
- size_t grp, blk, blk_inst;
+ size_t blk, blk_inst;
u64 clk_enable_map_mask;
if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata))
@@ -807,9 +929,8 @@ kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_m
if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask))
return true;
- kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst)
- {
- if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
+ kbase_hwcnt_metadata_for_each_block(enable_map->metadata, blk, blk_inst) {
+ if (kbase_hwcnt_enable_map_block_enabled(enable_map, blk, blk_inst))
return true;
}
@@ -869,9 +990,8 @@ static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_
/**
* kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer.
* @metadata: Non-NULL pointer to metadata describing the system.
- * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be
- * initialised to undefined values, so must be used as a copy dest,
- * or cleared before use.
+ * @dump_buf: Non-NULL pointer to a zero-initialized dump buffer.
+ * The memory will be zero allocated
*
* Return: 0 on success, else error code.
*/
@@ -888,53 +1008,51 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata,
void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf);
/**
- * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers.
- * @metadata: Non-NULL pointer to metadata describing the system.
- * @n: Number of dump buffers to allocate
- * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised.
- *
- * A single zeroed contiguous page allocation will be used for all of the
- * buffers inside the array, where:
- * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes
- *
- * Return: 0 on success, else error code.
- */
-int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n,
- struct kbase_hwcnt_dump_buffer_array *dump_bufs);
-
-/**
- * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array.
- * @dump_bufs: Dump buffer array to be freed.
- *
- * Can be safely called on an all-zeroed dump buffer array structure, or on an
- * already freed dump buffer array.
- */
-void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs);
-
-/**
* kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block
* instance's dump buffer.
* @buf: Non-NULL pointer to dump buffer.
- * @grp: Index of the group in the metadata.
- * @blk: Index of the block in the group.
+ * @blk: Index of the block in the metadata.
* @blk_inst: Index of the block instance in the block.
*
* Return: u64* to the dump buffer for the block instance.
*/
static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf,
- size_t grp, size_t blk, size_t blk_inst)
+ size_t blk, size_t blk_inst)
{
if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
return NULL;
- if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) ||
- WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) ||
- WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt))
+ if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) ||
+ WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt))
return buf->dump_buf;
- return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index +
- buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index +
- (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst);
+ return buf->dump_buf + buf->metadata->blk_metadata[blk].dump_buf_index +
+ (buf->metadata->blk_metadata[blk].dump_buf_stride * blk_inst);
+}
+
+/**
+ * kbase_hwcnt_dump_buffer_block_state_instance() - Get the pointer to a block
+ * instance's block state mask.
+ * @buf: Non-NULL pointer to dump buffer.
+ * @blk: Index of the block in the metadata.
+ * @blk_inst: Index of the block instance in the block.
+ *
+ * Return: blk_stt_t* to the block state mask of the block instance in the dump
+ * buffer.
+ */
+static inline blk_stt_t *
+kbase_hwcnt_dump_buffer_block_state_instance(const struct kbase_hwcnt_dump_buffer *buf, size_t blk,
+ size_t blk_inst)
+{
+ if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf))
+ return NULL;
+
+ if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) ||
+ WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt))
+ return buf->blk_stt_buf;
+
+ return buf->blk_stt_buf + buf->metadata->blk_metadata[blk].blk_stt_index +
+ (buf->metadata->blk_metadata[blk].blk_stt_stride * blk_inst);
}
/**
@@ -1228,4 +1346,19 @@ static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map,
return false;
}
+/**
+ * kbase_hwcnt_dump_buffer_block_state_update() - Update the enabled block instances' block states
+ * in dst. After the operation, all non-enabled or
+ * unavailable block instances will be unchanged.
+ * @dst: Non-NULL pointer to dump buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @blk_stt_val: Mask of block states to update. Block states not set in this mask will still be
+ * preserved in dst.
+ *
+ * The dst and dst_enable_map MUST have been created from the same metadata.
+ */
+void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst,
+ const struct kbase_hwcnt_enable_map *dst_enable_map,
+ blk_stt_t blk_stt_val);
+
#endif /* _KBASE_HWCNT_TYPES_H_ */
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
index d618764..89cca45 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -255,7 +255,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h
/* Make the scratch enable map the union of all enable maps */
kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
/* Set the counters with the new union enable map */
@@ -264,7 +264,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h
&hvirt->scratch_buf);
/* Accumulate into only existing clients' accumulation bufs */
if (!errcode)
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
kbasep_hwcnt_virtualizer_client_accumulate(pos,
&hvirt->scratch_buf);
}
@@ -315,7 +315,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize
struct kbase_hwcnt_virtualizer_client *pos;
/* Make the scratch enable map the union of all enable maps */
kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map);
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
/* Set the counters with the new union enable map */
errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map,
@@ -323,7 +323,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize
&hvirt->scratch_buf);
/* Accumulate into remaining clients' accumulation bufs */
if (!errcode) {
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
kbasep_hwcnt_virtualizer_client_accumulate(pos,
&hvirt->scratch_buf);
@@ -373,7 +373,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters(
/* Make the scratch enable map the union of all enable maps */
kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map);
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
/* Ignore the enable map of the selected client */
if (pos != hvcli)
kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map);
@@ -385,7 +385,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters(
return errcode;
/* Accumulate into all accumulation bufs except the selected client's */
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
if (pos != hvcli)
kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
@@ -503,7 +503,7 @@ static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer *
return errcode;
/* Accumulate into all accumulation bufs except the selected client's */
- list_for_each_entry (pos, &hvirt->clients, node)
+ list_for_each_entry(pos, &hvirt->clients, node)
if (pos != hvcli)
kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf);
@@ -724,7 +724,7 @@ void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt)
if (WARN_ON(hvirt->client_count != 0)) {
struct kbase_hwcnt_virtualizer_client *pos, *n;
- list_for_each_entry_safe (pos, n, &hvirt->clients, node)
+ list_for_each_entry_safe(pos, n, &hvirt->clients, node)
kbase_hwcnt_virtualizer_client_destroy(pos);
}