summaryrefslogtreecommitdiff
path: root/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
diff options
context:
space:
mode:
authorVamsidhar reddy Gaddam <gvamsi@google.com>2024-03-13 09:45:22 +0000
committerVamsidhar reddy Gaddam <gvamsi@google.com>2024-03-13 09:45:22 +0000
commita999cd8fd398aed7390c8e5d99795e9b735d6ba7 (patch)
tree79503e1b07ccfd66140fb903be3a0f2e0ace147c /mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
parent049a542207ed694271316782397b78b2e202086a (diff)
downloadgpu-a999cd8fd398aed7390c8e5d99795e9b735d6ba7.tar.gz
Update KMD to r48p0
Provenance: ipdelivery@02a6b5e039b17fd395ddc13d09efbe440223a56c Change-Id: Ia6d72d40f5c57508d818ad24e57547c1a411d644 Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
Diffstat (limited to 'mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c')
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c101
1 files changed, 54 insertions, 47 deletions
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index d7911ae..d1290ca 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/log2.h>
#include <linux/kernel.h>
@@ -255,7 +254,8 @@ struct kbase_hwcnt_csf_physical_layout {
* @hwc_threshold_work: Worker for consuming available samples when
* threshold interrupt raised.
* @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ * and the shader_present_bitmap.
*/
struct kbase_hwcnt_backend_csf {
struct kbase_hwcnt_backend_csf_info *info;
@@ -283,7 +283,7 @@ struct kbase_hwcnt_backend_csf {
struct work_struct hwc_dump_work;
struct work_struct hwc_threshold_work;
size_t num_l2_slices;
- u64 shader_present_bitmap;
+ u64 powered_shader_core_mask;
};
static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
@@ -296,7 +296,7 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c
}
void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
- size_t num_l2_slices, u64 shader_present_bitmap)
+ size_t num_l2_slices, u64 powered_shader_core_mask)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
@@ -313,12 +313,12 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte
return;
if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) ||
- WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) !=
- shader_present_bitmap))
+ WARN_ON((powered_shader_core_mask & csf_info->backend->phys_layout.shader_avail_mask) !=
+ powered_shader_core_mask))
return;
csf_info->backend->num_l2_slices = num_l2_slices;
- csf_info->backend->shader_present_bitmap = shader_present_bitmap;
+ csf_info->backend->powered_shader_core_mask = powered_shader_core_mask;
}
/**
@@ -424,7 +424,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
- shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask);
+ shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask);
values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
@@ -445,7 +445,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
.fw_block_cnt = fw_block_cnt,
.hw_block_cnt = hw_block_cnt,
.block_cnt = fw_block_cnt + hw_block_cnt,
- .shader_avail_mask = prfcnt_info->core_mask,
+ .shader_avail_mask = prfcnt_info->sc_core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = values_per_block,
.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
@@ -517,34 +517,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe
memset(backend_csf->block_states, 0, block_state_bytes);
}
-/**
- * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
- * information from a sample.
- * @phys_layout: Physical memory layout information of HWC
- * sample buffer.
- * @enable_mask: Counter enable mask for the block whose state is being updated.
- * @enable_state: The CSF backend internal enabled state.
- * @exiting_protm: Whether or not the sample is taken when the GPU is exiting
- * protected mode.
- * @block_idx: Index of block within the ringbuffer.
- * @block_state: Pointer to existing block state of the block whose state is being
- * updated.
- * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling.
- */
-static void kbasep_hwcnt_backend_csf_update_block_state(
- const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask,
- enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm,
- size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode)
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+ const u32 enable_mask, bool exiting_protm,
+ size_t block_idx, blk_stt_t *const block_state,
+ bool fw_in_protected_mode)
{
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
/* Offset of shader core blocks from the start of the HW blocks in the sample */
size_t shader_core_block_offset =
- (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt);
+ (size_t)(phys_layout->block_cnt - phys_layout->shader_cnt);
bool is_shader_core_block;
- is_shader_core_block = block_idx >= shader_core_block_offset;
+ is_shader_core_block = (block_idx >= shader_core_block_offset);
/* Set power bits for the block state for the block, for the sample */
- switch (enable_state) {
+ switch (backend->enable_state) {
/* Disabled states */
case KBASE_HWCNT_BACKEND_CSF_DISABLED:
case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
@@ -592,21 +579,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state(
KBASE_HWCNT_STATE_NORMAL);
else
kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL);
+
+ /* powered_shader_core_mask stored in the backend is a combination of
+ * the shader present and the debug core mask, so explicit checking of the
+ * core mask is not required here.
+ */
+ if (is_shader_core_block) {
+ u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset);
+
+ WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64);
+
+ if (current_shader_core & backend->info->backend->powered_shader_core_mask)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
+ else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE);
+ else
+ WARN_ON_ONCE(true);
+ }
+ else
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
}
-static void kbasep_hwcnt_backend_csf_accumulate_sample(
- const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
- u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf,
- blk_stt_t *const block_states, bool clearing_samples,
- enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode)
+static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend,
+ const u32 *old_sample_buf,
+ const u32 *new_sample_buf)
{
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
+ const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes;
+ const size_t values_per_block = phys_layout->values_per_block;
+ blk_stt_t *const block_states = backend->block_states;
+ const bool fw_in_protected_mode = backend->info->fw_in_protected_mode;
+ const bool clearing_samples = backend->info->prfcnt_info.clearing_samples;
+ u64 *accum_buf = backend->accum_buf;
+
size_t block_idx;
const u32 *old_block = old_sample_buf;
const u32 *new_block = new_sample_buf;
u64 *acc_block = accum_buf;
/* Flag to indicate whether current sample is exiting protected mode. */
bool exiting_protm = false;
- const size_t values_per_block = phys_layout->values_per_block;
/* The block pointers now point to the first HW block, which is always a CSHW/front-end
* block. The counter enable mask for this block can be checked to determine whether this
@@ -620,9 +631,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
/* Update block state with information of the current sample */
- kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask,
- enable_state, exiting_protm, block_idx,
- &block_states[block_idx],
+ kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm,
+ block_idx, &block_states[block_idx],
fw_in_protected_mode);
if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
@@ -706,7 +716,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
- bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
u32 *old_sample_buf = backend_csf->old_sample_buf;
u32 *new_sample_buf = old_sample_buf;
const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout;
@@ -740,10 +749,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
- kbasep_hwcnt_backend_csf_accumulate_sample(
- phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf,
- new_sample_buf, backend_csf->block_states, clearing_samples,
- backend_csf->enable_state, backend_csf->info->fw_in_protected_mode);
+ kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf,
+ new_sample_buf);
old_sample_buf = new_sample_buf;
}
@@ -1457,7 +1464,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
backend_csf->to_user_block_states, dst_enable_map,
backend_csf->num_l2_slices,
- backend_csf->shader_present_bitmap, accumulate);
+ backend_csf->powered_shader_core_mask, accumulate);
/* If no error occurred (zero ret value), then update block state for all blocks in the
* accumulation with the current sample's block state.
@@ -2098,7 +2105,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *
gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0;
gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count;
- gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+ gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask;
gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
gpu_info.prfcnt_values_per_block =
csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;