summaryrefslogtreecommitdiff
path: root/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c')
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c221
1 files changed, 159 insertions, 62 deletions
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 8b3caac..4df7dd4 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -80,30 +80,40 @@ struct kbase_hwcnt_jm_physical_layout {
/**
* struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
- * @info: Info used to create the backend.
- * @kctx: KBase context used for GPU memory allocation and
- * counter dumping.
- * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
- * @cpu_dump_va: CPU mapping of gpu_dump_va.
- * @vmap: Dump buffer vmap.
- * @to_user_buf: HWC sample buffer for client user, size
- * metadata.dump_buf_bytes.
- * @enabled: True if dumping has been enabled, else false.
- * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
- * dumping.
- * @curr_config: Current allocated hardware resources to correctly map the
- * source raw dump buffer to the destination dump buffer.
- * @clk_enable_map: The enable map specifying enabled clock domains.
- * @cycle_count_elapsed:
- * Cycle count elapsed for a given sample period.
- * The top clock cycle, index 0, is read directly from
- * hardware, but the other clock domains need to be
- * calculated with software estimation.
- * @prev_cycle_count: Previous cycle count to calculate the cycle count for
- * sample period.
- * @rate_listener: Clock rate listener callback state.
- * @ccswe_shader_cores: Shader cores cycle count software estimator.
- * @phys_layout: Physical memory layout information of HWC sample buffer.
+ * @info: Info used to create the backend.
+ * @kctx: KBase context used for GPU memory allocation and
+ * counter dumping.
+ * @gpu_dump_va: GPU hardware counter dump buffer virtual address.
+ * @cpu_dump_va: CPU mapping of gpu_dump_va.
+ * @vmap: Dump buffer vmap.
+ * @to_user_buf: HWC sample buffer for client user, size
+ * metadata.dump_buf_bytes.
+ * @enabled: True if dumping has been enabled, else false.
+ * @accum_all_blk_stt: Block State to accumulate on next sample, for all types
+ * of block.
+ * @sampled_all_blk_stt: Block State to accumulate into the current sample, for
+ * all types of block.
+ * @debug_core_mask: User-set mask of shader cores that can be used.
+ * @pm_core_mask: PM state sync-ed shaders core mask for the enabled
+ * dumping.
+ * @curr_config: Current allocated hardware resources to correctly map the
+ * source raw dump buffer to the destination dump buffer.
+ * @max_core_mask: Core mask of all cores allocated to the GPU (non
+ * virtualized platforms) or resource group (virtualized
+ * platforms).
+ * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non
+ * virtualized platforms) or resource group (virtualized
+ * platforms).
+ * @clk_enable_map: The enable map specifying enabled clock domains.
+ * @cycle_count_elapsed: Cycle count elapsed for a given sample period.
+ * The top clock cycle, index 0, is read directly from
+ * hardware, but the other clock domains need to be
+ * calculated with software estimation.
+ * @prev_cycle_count: Previous cycle count to calculate the cycle count for
+ * sample period.
+ * @rate_listener: Clock rate listener callback state.
+ * @ccswe_shader_cores: Shader cores cycle count software estimator.
+ * @phys_layout: Physical memory layout information of HWC sample buffer.
*/
struct kbase_hwcnt_backend_jm {
const struct kbase_hwcnt_backend_jm_info *info;
@@ -113,8 +123,13 @@ struct kbase_hwcnt_backend_jm {
struct kbase_vmap_struct *vmap;
u64 *to_user_buf;
bool enabled;
+ blk_stt_t accum_all_blk_stt;
+ blk_stt_t sampled_all_blk_stt;
+ u64 debug_core_mask;
u64 pm_core_mask;
struct kbase_hwcnt_curr_config curr_config;
+ u64 max_core_mask;
+ size_t max_l2_slices;
u64 clk_enable_map;
u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
@@ -136,26 +151,22 @@ struct kbase_hwcnt_backend_jm {
static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
struct kbase_hwcnt_gpu_info *info)
{
- size_t clk;
+ size_t clk, l2_count, core_mask;
if (!kbdev || !info)
return -EINVAL;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
- info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
- info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
-#else /* CONFIG_MALI_NO_MALI */
- {
- const struct base_gpu_props *props = &kbdev->gpu_props.props;
- const size_t l2_count = props->l2_props.num_l2_slices;
- const size_t core_mask = props->coherency_info.group[0].core_mask;
+ l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
+ core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
+#else
+ l2_count = kbdev->gpu_props.num_l2_slices;
+ core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
+#endif
- info->l2_count = l2_count;
- info->core_mask = core_mask;
- info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
- }
-#endif /* CONFIG_MALI_NO_MALI */
+ info->l2_count = l2_count;
+ info->core_mask = core_mask;
+ info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
/* Determine the number of available clock domains. */
for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
@@ -353,9 +364,9 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
struct kbase_context *kctx;
struct kbase_device *kbdev;
- struct kbase_hwcnt_physical_enable_map phys_enable_map;
+ struct kbase_hwcnt_physical_enable_map phys_enable_map = { 0 };
enum kbase_hwcnt_physical_set phys_counter_set;
- struct kbase_instr_hwcnt_enable enable;
+ struct kbase_instr_hwcnt_enable enable = { 0 };
u64 timestamp_ns;
if (!backend_jm || !enable_map || backend_jm->enabled ||
@@ -371,18 +382,21 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);
- enable.fe_bm = phys_enable_map.fe_bm;
- enable.shader_bm = phys_enable_map.shader_bm;
- enable.tiler_bm = phys_enable_map.tiler_bm;
- enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm;
- enable.counter_set = phys_counter_set;
+ enable = (struct kbase_instr_hwcnt_enable)
+ {
+ .fe_bm = phys_enable_map.fe_bm,
+ .shader_bm = phys_enable_map.shader_bm,
+ .tiler_bm = phys_enable_map.tiler_bm,
+ .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
+ .counter_set = phys_counter_set,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
- /* The dummy model needs the CPU mapping. */
- enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va;
+ /* The dummy model needs the CPU mapping. */
+ .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
#else
- enable.dump_buffer = backend_jm->gpu_dump_va;
+ .dump_buffer = backend_jm->gpu_dump_va,
#endif /* CONFIG_MALI_NO_MALI */
- enable.dump_buffer_bytes = backend_jm->info->dump_bytes;
+ .dump_buffer_bytes = backend_jm->info->dump_bytes,
+ };
timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
@@ -395,9 +409,24 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
if (errcode)
goto error;
+ backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
+ backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
+ backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask;
+
backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
backend_jm->enabled = true;
+ /* Enabling counters is an indication that the power may have previously been off for all
+ * blocks.
+ *
+ * In any case, the counters would not have been counting recently, so an 'off' block state
+ * is an approximation for this.
+ *
+ * This will be transferred to the dump only after a dump_wait(), or dump_disable() in
+ * cases where the caller requested such information. This is to handle when a
+ * dump_enable() happens in between dump_wait() and dump_get().
+ */
+ kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);
@@ -430,12 +459,20 @@ static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backe
}
/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
-static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend)
+static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend,
+ struct kbase_hwcnt_dump_buffer *dump_buffer,
+ const struct kbase_hwcnt_enable_map *enable_map)
{
int errcode;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
- if (WARN_ON(!backend_jm) || !backend_jm->enabled)
+ if (WARN_ON(!backend_jm ||
+ (dump_buffer && (backend_jm->info->metadata != dump_buffer->metadata)) ||
+ (enable_map && (backend_jm->info->metadata != enable_map->metadata)) ||
+ (dump_buffer && !enable_map)))
+ return;
+ /* No WARN needed here, but still return early if backend is already disabled */
+ if (!backend_jm->enabled)
return;
kbasep_hwcnt_backend_jm_cc_disable(backend_jm);
@@ -443,6 +480,42 @@ static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *bac
errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
WARN_ON(errcode);
+ /* Disabling HWCNT is an indication that blocks have been powered off. This is important to
+ * know for L2 and Tiler blocks, as this is currently the only way a backend can know if
+ * they are being powered off.
+ *
+ * In any case, even if they weren't really powered off, we won't be counting whilst
+ * disabled.
+ *
+ * Update the block state information in the block state accumulator to show this, so that
+ * in the next dump blocks will have been seen as powered off for some of the time.
+ */
+ kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);
+
+ if (dump_buffer) {
+ /* In some use-cases, the caller will need the information whilst the counters are
+ * disabled, but will not be able to call into the backend to dump them. Instead,
+ * they have an opportunity here to request them to be accumulated into their
+ * buffer immediately.
+ *
+ * This consists of taking a sample of the accumulated block state (as though a
+ * real dump_get() had happened), then transfer ownership of that to the caller
+ * (i.e. erasing our copy of it).
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
+ &backend_jm->accum_all_blk_stt);
+ kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map,
+ backend_jm->sampled_all_blk_stt);
+ /* Now the block state has been passed out into the caller's own accumulation
+ * buffer, clear our own accumulated and sampled block state - ownership has been
+ * transferred.
+ */
+ kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt,
+ KBASE_HWCNT_STATE_UNKNOWN);
+ }
+
backend_jm->enabled = false;
}
@@ -480,8 +553,7 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back
*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);
- kbase_hwcnt_metadata_for_each_clock(metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
continue;
@@ -514,12 +586,27 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back
/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
{
+ int errcode;
struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
if (!backend_jm || !backend_jm->enabled)
return -EINVAL;
- return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+ errcode = kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
+ if (errcode)
+ return errcode;
+
+ /* Now that we've completed a sample, also sample+clear the accumulated block state.
+ *
+ * This is to ensure that a dump_enable() that happens in between dump_wait() and
+ * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block
+ * state is reported at the actual time that counters are being sampled.
+ */
+ kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
+ &backend_jm->accum_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+
+ return errcode;
}
/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
@@ -533,8 +620,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
struct kbase_device *kbdev;
unsigned long flags;
- int errcode;
#endif /* CONFIG_MALI_NO_MALI */
+ int errcode;
if (!backend_jm || !dst || !dst_enable_map ||
(backend_jm->info->metadata != dst->metadata) ||
@@ -548,8 +635,7 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
kbasep_hwcnt_backend_jm_dump_sample(backend_jm);
/* Extract elapsed cycle count for each clock domain if enabled. */
- kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk)
- {
+ kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
continue;
@@ -572,9 +658,18 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
if (errcode)
return errcode;
#endif /* CONFIG_MALI_NO_MALI */
- return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
- backend_jm->pm_core_mask, &backend_jm->curr_config,
- accumulate);
+ errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
+ backend_jm->pm_core_mask, backend_jm->debug_core_mask,
+ backend_jm->max_core_mask, backend_jm->max_l2_slices,
+ &backend_jm->curr_config, accumulate);
+
+ if (errcode)
+ return errcode;
+
+ kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map,
+ backend_jm->sampled_all_blk_stt);
+ kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+ return errcode;
}
/**
@@ -705,6 +800,8 @@ static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_in
kbase_ccswe_init(&backend->ccswe_shader_cores);
backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
+ kbase_hwcnt_block_state_set(&backend->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
+ kbase_hwcnt_block_state_set(&backend->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
*out_backend = backend;
return 0;
@@ -752,7 +849,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
if (!backend)
return;
- kbasep_hwcnt_backend_jm_dump_disable(backend);
+ kbasep_hwcnt_backend_jm_dump_disable(backend, NULL, NULL);
kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
}