From 049a542207ed694271316782397b78b2e202086a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Wagner?= Date: Thu, 14 Dec 2023 09:44:26 +0000 Subject: Update KMD to r47p0 Provenance: ipdelivery@ad01e50d640910a99224382bb227e6d4de627657 Change-Id: I19ac9bce34a5c5a319c1b4a388e8b037b3dfe6e7 --- mali_kbase/hwcnt/Kbuild | 1 - .../hwcnt/backend/mali_kbase_hwcnt_backend.h | 16 +- .../hwcnt/backend/mali_kbase_hwcnt_backend_csf.c | 397 +++++++++-- .../hwcnt/backend/mali_kbase_hwcnt_backend_csf.h | 26 +- .../backend/mali_kbase_hwcnt_backend_csf_if.h | 102 +-- .../backend/mali_kbase_hwcnt_backend_csf_if_fw.c | 60 +- .../hwcnt/backend/mali_kbase_hwcnt_backend_jm.c | 221 ++++-- .../backend/mali_kbase_hwcnt_backend_jm_watchdog.c | 19 +- mali_kbase/hwcnt/mali_kbase_hwcnt.c | 36 +- mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c | 782 +++++++++++++-------- mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h | 137 +++- mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c | 298 -------- mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h | 330 --------- mali_kbase/hwcnt/mali_kbase_hwcnt_types.c | 362 +++++----- mali_kbase/hwcnt/mali_kbase_hwcnt_types.h | 631 ++++++++++------- mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c | 18 +- 16 files changed, 1824 insertions(+), 1612 deletions(-) delete mode 100644 mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c delete mode 100644 mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h (limited to 'mali_kbase/hwcnt') diff --git a/mali_kbase/hwcnt/Kbuild b/mali_kbase/hwcnt/Kbuild index 8c8775f..d24d8ef 100644 --- a/mali_kbase/hwcnt/Kbuild +++ b/mali_kbase/hwcnt/Kbuild @@ -21,7 +21,6 @@ mali_kbase-y += \ hwcnt/mali_kbase_hwcnt.o \ hwcnt/mali_kbase_hwcnt_gpu.o \ - hwcnt/mali_kbase_hwcnt_gpu_narrow.o \ hwcnt/mali_kbase_hwcnt_types.o \ hwcnt/mali_kbase_hwcnt_virtualizer.o \ hwcnt/mali_kbase_hwcnt_watchdog_if_timer.o diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h index 6cfa6f5..cc3ba98 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -123,11 +123,21 @@ kbase_hwcnt_backend_dump_enable_nolock_fn(struct kbase_hwcnt_backend *backend, * typedef kbase_hwcnt_backend_dump_disable_fn - Disable counter dumping with * the backend. * @backend: Non-NULL pointer to backend. + * @dump_buffer: Pointer to an accumulated dump buffer to update or NULL. + * @enable_map: Pointer to enable map specifying enabled counters. Must be NULL if no @dump_buffer * * If the backend is already disabled, does nothing. - * Any undumped counter values since the last dump get will be lost. + * + * Any undumped counter values since the last dump get will be lost. However, Undumped block state + * can be retained by the backend. + * + * @dump_buffer and @enable_map gives the backend an opportunity to update an existing accumulated + * buffer with state information, and for the caller take ownership of it. In particular, the + * caller can use this when they require such information whilst the counter dumps are disabled. */ -typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend); +typedef void kbase_hwcnt_backend_dump_disable_fn(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map); /** * typedef kbase_hwcnt_backend_dump_clear_fn - Reset all the current undumped diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index 27acfc6..d7911ae 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -44,6 +44,9 @@ #define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) #endif /* IS_FPGA && !NO_MALI */ +/* Used to check for a sample in which all counters in the block are disabled */ +#define HWCNT_BLOCK_EMPTY_SAMPLE (2) + /** * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. * @@ -172,15 +175,16 @@ struct kbase_hwcnt_backend_csf_info { /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout - * information. - * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are - * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. - * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. + * information, as defined by the spec. * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. - * @fw_block_cnt: Total number of firmware counters blocks. + * @fw_block_cnt: Total number of firmware counter blocks, with a single + * global FW block and a block per CSG. + * @hw_block_cnt: Total number of hardware counter blocks. The hw counters blocks are + * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. + * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). * @shader_avail_mask: Bitmap of all shader cores in the system. * @enable_mask_offset: Offset in array elements of enable mask in each block @@ -190,12 +194,12 @@ struct kbase_hwcnt_backend_csf_info { * @values_per_block: For any block, the number of counters in total (header + payload). */ struct kbase_hwcnt_csf_physical_layout { - u8 hw_block_cnt; u8 fe_cnt; u8 tiler_cnt; u8 mmu_l2_cnt; u8 shader_cnt; u8 fw_block_cnt; + u8 hw_block_cnt; u8 block_cnt; u64 shader_avail_mask; size_t enable_mask_offset; @@ -220,6 +224,13 @@ struct kbase_hwcnt_csf_physical_layout { * @old_sample_buf: HWC sample buffer to save the previous values * for delta calculation, size * prfcnt_info.dump_bytes. + * @block_states: Pointer to array of block_state values for all + * blocks. + * @to_user_block_states: Block state buffer for client user. + * @accum_all_blk_stt: Block state to accumulate for all known blocks + * on next sample. + * @sampled_all_blk_stt: Block State to accumulate for all known blocks + * into the current sample. * @watchdog_last_seen_insert_idx: The insert index which watchdog has last * seen, to check any new firmware automatic * samples generated during the watchdog @@ -243,6 +254,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_dump_work: Worker to accumulate samples. * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -253,6 +266,10 @@ struct kbase_hwcnt_backend_csf { u64 *to_user_buf; u64 *accum_buf; u32 *old_sample_buf; + blk_stt_t *block_states; + blk_stt_t *to_user_block_states; + blk_stt_t accum_all_blk_stt; + blk_stt_t sampled_all_blk_stt; u32 watchdog_last_seen_insert_idx; struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; void *ring_buf_cpu_base; @@ -265,15 +282,45 @@ struct kbase_hwcnt_backend_csf { struct workqueue_struct *hwc_dump_workq; struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; + size_t num_l2_slices; + u64 shader_present_bitmap; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) { - WARN_ON(!csf_info); + if (WARN_ON(!csf_info)) + return false; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } +void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, + size_t num_l2_slices, u64 shader_present_bitmap) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + if (!iface) + return; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + /* Early out if the backend does not exist. */ + if (!csf_info || !csf_info->backend) + return; + + if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)) + return; + + if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || + WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != + shader_present_bitmap)) + return; + + csf_info->backend->num_l2_slices = num_l2_slices; + csf_info->backend->shader_present_bitmap = shader_present_bitmap; +} + /** * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count * tracking. @@ -295,8 +342,7 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } @@ -317,8 +363,7 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, backend_csf->clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { backend_csf->cycle_count_elapsed[clk] = cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; @@ -340,29 +385,29 @@ static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *bac /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are - * enabled if any counter is - * required. + * enabled. *@phys_enable_map: HWC physical enable map to be processed. */ -static void -kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) +void kbasep_hwcnt_backend_csf_process_enable_map( + struct kbase_hwcnt_physical_enable_map *phys_enable_map) { WARN_ON(!phys_enable_map); - /* Enable header if any counter is required from user, the header is - * controlled by bit 0 of the enable mask. + /* Unconditionally enable each block header and first counter, + * the header is controlled by bit 0 of the enable mask. */ - if (phys_enable_map->fe_bm) - phys_enable_map->fe_bm |= 1; + phys_enable_map->fe_bm |= 3; - if (phys_enable_map->tiler_bm) - phys_enable_map->tiler_bm |= 1; + phys_enable_map->tiler_bm |= 3; - if (phys_enable_map->mmu_l2_bm) - phys_enable_map->mmu_l2_bm |= 1; + phys_enable_map->mmu_l2_bm |= 3; + + phys_enable_map->shader_bm |= 3; + + phys_enable_map->fw_bm |= 3; + + phys_enable_map->csg_bm |= 3; - if (phys_enable_map->shader_bm) - phys_enable_map->shader_bm |= 1; } static void kbasep_hwcnt_backend_csf_init_layout( @@ -371,32 +416,35 @@ static void kbasep_hwcnt_backend_csf_init_layout( { size_t shader_core_cnt; size_t values_per_block; - size_t fw_blocks_count; - size_t hw_blocks_count; + size_t fw_block_cnt; + size_t hw_block_cnt; + size_t core_cnt; + WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); - shader_core_cnt = fls64(prfcnt_info->core_mask); + shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; - fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); - hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); + hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + + core_cnt = shader_core_cnt; /* The number of hardware counters reported by the GPU matches the legacy guess-work we * have done in the past */ - WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + - KBASE_HWCNT_V5_TILER_BLOCK_COUNT + - prfcnt_info->l2_count + shader_core_cnt); + WARN_ON(hw_block_cnt != KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + prfcnt_info->l2_count + core_cnt); *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, .mmu_l2_cnt = prfcnt_info->l2_count, .shader_cnt = shader_core_cnt, - .fw_block_cnt = fw_blocks_count, - .hw_block_cnt = hw_blocks_count, - .block_cnt = fw_blocks_count + hw_blocks_count, + .fw_block_cnt = fw_block_cnt, + .hw_block_cnt = hw_block_cnt, + .block_cnt = fw_block_cnt + hw_block_cnt, .shader_avail_mask = prfcnt_info->core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, @@ -409,10 +457,14 @@ static void kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); + memset(backend_csf->to_user_block_states, 0, block_state_bytes); } static void @@ -450,40 +502,130 @@ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_cs static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); + memcpy(backend_csf->to_user_block_states, backend_csf->block_states, block_state_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ memset(backend_csf->accum_buf, 0, user_buf_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); +} + +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @phys_layout: Physical memory layout information of HWC + * sample buffer. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @enable_state: The CSF backend internal enabled state. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +static void kbasep_hwcnt_backend_csf_update_block_state( + const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +{ + /* Offset of shader core blocks from the start of the HW blocks in the sample */ + size_t shader_core_block_offset = + (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt); + bool is_shader_core_block; + + is_shader_core_block = block_idx >= shader_core_block_offset; + + /* Set power bits for the block state for the block, for the sample */ + switch (enable_state) { + /* Disabled states */ + case KBASE_HWCNT_BACKEND_CSF_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_OFF); + break; + /* Enabled states */ + case KBASE_HWCNT_BACKEND_CSF_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: + if (!is_shader_core_block) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_ON); + else if (!exiting_protm) { + /* When not exiting protected mode, a zero enable mask on a shader core + * counter block indicates the block was powered off for the sample, and + * a non-zero counter enable mask indicates the block was powered on for + * the sample. + */ + kbase_hwcnt_block_state_append(block_state, + (enable_mask ? KBASE_HWCNT_STATE_ON : + KBASE_HWCNT_STATE_OFF)); + } + break; + /* Error states */ + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: + default: + /* Do nothing */ + break; + } + + /* The following four cases apply to a block state in either normal mode or protected mode: + * 1. GPU executing in normal mode: Only set normal mode bit. + * 2. First sample request after GPU enters protected mode: Set both normal mode and + * protected mode bit. In this case, there will at least be one sample to accumulate + * in the ring buffer which was automatically triggered before GPU entered protected + * mode. + * 3. Subsequent sample requests while GPU remains in protected mode: Only set protected + * mode bit. In this case, the ring buffer should be empty and dump should return 0s but + * block state should be updated accordingly. This case is not handled here. + * 4. Samples requested after GPU exits protected mode: Set both protected mode and normal + * mode bits. + */ + if (exiting_protm || fw_in_protected_mode) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_PROTECTED | + KBASE_HWCNT_STATE_NORMAL); + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) + u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, + blk_stt_t *const block_states, bool clearing_samples, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) { size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; + /* Flag to indicate whether current sample is exiting protected mode. */ + bool exiting_protm = false; const size_t values_per_block = phys_layout->values_per_block; - /* Performance counter blocks for firmware are stored before blocks for hardware. - * We skip over the firmware's performance counter blocks (counters dumping is not - * supported for firmware blocks, only hardware ones). + /* The block pointers now point to the first HW block, which is always a CSHW/front-end + * block. The counter enable mask for this block can be checked to determine whether this + * sample is taken after leaving protected mode - this is the only scenario where the CSHW + * block counter enable mask has only the first bit set, and no others. In this case, + * the values in this sample would not be meaningful, so they don't need to be accumulated. */ - old_block += values_per_block * phys_layout->fw_block_cnt; - new_block += values_per_block * phys_layout->fw_block_cnt; + exiting_protm = (new_block[phys_layout->enable_mask_offset] == 1); - for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; - block_idx++) { + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; + /* Update block state with information of the current sample */ + kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, + enable_state, exiting_protm, block_idx, + &block_states[block_idx], + fw_in_protected_mode); - if (new_enable_mask == 0) { + if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { /* Hardware block was unavailable or we didn't turn on * any counters. Do nothing. */ @@ -492,7 +634,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * enabled. We need to update the accumulation buffer. */ size_t ctr_idx; - /* Unconditionally copy the headers. */ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { acc_block[ctr_idx] = new_block[ctr_idx]; @@ -517,8 +658,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * saturating at their maximum value. */ if (!clearing_samples) { - if (old_enable_mask == 0) { - /* Hardware block was previously + if (!(old_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { + /* Block was previously * unavailable. Accumulate the new * counters only, as we know previous * values are zeroes. @@ -545,15 +686,14 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( } } } + old_block += values_per_block; new_block += values_per_block; acc_block += values_per_block; } - WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - - (values_per_block * phys_layout->fw_block_cnt)); + WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); (void)dump_bytes; } @@ -569,10 +709,23 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; + + if (extract_index_to_start == insert_index_to_stop) { + /* No samples to accumulate but block states need to be updated for dump. */ + size_t block_idx; - if (extract_index_to_start == insert_index_to_stop) - /* No samples to accumulate. Early out. */ + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + /* Set protected mode bit for block state if GPU is in protected mode, + * otherwise set the normal mode bit. + */ + kbase_hwcnt_block_state_append(&backend_csf->block_states[block_idx], + backend_csf->info->fw_in_protected_mode ? + KBASE_HWCNT_STATE_PROTECTED : + KBASE_HWCNT_STATE_NORMAL); + } return; + } /* Sync all the buffers to CPU side before read the data. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, @@ -587,11 +740,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - - kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, - buf_dump_bytes, backend_csf->accum_buf, - old_sample_buf, new_sample_buf, - clearing_samples); + kbasep_hwcnt_backend_csf_accumulate_sample( + phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, + new_sample_buf, backend_csf->block_states, clearing_samples, + backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); old_sample_buf = new_sample_buf; } @@ -875,6 +1027,8 @@ kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *bac enable->shader_bm = phys_enable_map.shader_bm; enable->tiler_bm = phys_enable_map.tiler_bm; enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable->fw_bm = phys_enable_map.fw_bm; + enable->csg_bm = phys_enable_map.csg_bm; enable->counter_set = phys_counter_set; enable->clk_enable_map = enable_map->clk_enable_map; } @@ -893,6 +1047,17 @@ kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + /* Enabling counters is an indication that the power may have previously been off for all + * blocks. + * + * In any case, the counters would not have been counting recently, so an 'off' block state + * is an approximation for this. + * + * This will be transferred to the dump only after a dump_wait(), or dump_disable() in + * cases where the caller requested such information. This is to handle when a + * dump_enable() happens in between dump_wait() and dump_get(). + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); /* enable_state should be DISABLED before we transfer it to enabled */ @@ -956,13 +1121,19 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( } /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map) { unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; - WARN_ON(!backend_csf); + if (WARN_ON(!backend_csf || + (dump_buffer && (backend_csf->info->metadata != dump_buffer->metadata)) || + (enable_map && (backend_csf->info->metadata != enable_map->metadata)) || + (dump_buffer && !enable_map))) + return; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); @@ -1048,6 +1219,42 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba * for next enable. */ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); + + /* Disabling HWCNT is an indication that blocks have been powered off. This is important to + * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can + * know if they are being powered off. + * + * In any case, even if they weren't really powered off, we won't be counting whilst + * disabled. + * + * Update the block state information in the block state accumulator to show this, so that + * in the next dump blocks will have been seen as powered off for some of the time. + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); + + if (dump_buffer) { + /* In some use-cases, the caller will need the information whilst the counters are + * disabled, but will not be able to call into the backend to dump them. Instead, + * they have an opportunity here to request them to be accumulated into their + * buffer immediately. + * + * This consists of taking a sample of the accumulated block state (as though a + * real dump_get() had happened), then transfer ownership of that to the caller + * (i.e. erasing our copy of it). + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map, + backend_csf->sampled_all_blk_stt); + /* Now the block state has been passed out into the caller's own accumulation + * buffer, clear our own accumulated and sampled block state - ownership has been + * transferred. + */ + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ @@ -1183,6 +1390,16 @@ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backen backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + /* Now that we've completed a sample, also sample+clear the accumulated block state. + * + * This is to ensure that a dump_enable() that happens in between dump_wait() and + * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block + * state is reported at the actual time that counters are being sampled. + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + return errcode; } @@ -1223,8 +1440,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend return -EINVAL; /* Extract elapsed cycle count for each clock domain if enabled. */ - kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) continue; @@ -1238,7 +1454,20 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend * as it is undefined to call this function without a prior succeeding * one to dump_wait(). */ - ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, + backend_csf->to_user_block_states, dst_enable_map, + backend_csf->num_l2_slices, + backend_csf->shader_present_bitmap, accumulate); + + /* If no error occurred (zero ret value), then update block state for all blocks in the + * accumulation with the current sample's block state. + */ + if (!ret) { + kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map, + backend_csf->sampled_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } return ret; } @@ -1269,6 +1498,12 @@ static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *bac kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; + + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; + kfree(backend_csf); } @@ -1285,6 +1520,7 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * { struct kbase_hwcnt_backend_csf *backend_csf = NULL; int errcode = -ENOMEM; + size_t block_state_bytes; WARN_ON(!csf_info); WARN_ON(!out_backend); @@ -1308,6 +1544,17 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; + /* Allocate space to store block state values for each block */ + block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * + KBASE_HWCNT_BLOCK_STATE_STRIDE; + backend_csf->block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->block_states) + goto err_alloc_block_states_buf; + + backend_csf->to_user_block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->to_user_block_states) + goto err_alloc_user_block_state_buf; + errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); @@ -1343,6 +1590,8 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * complete_all(&backend_csf->dump_completed); backend_csf->user_requested = false; backend_csf->watchdog_last_seen_insert_idx = 0; + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); *out_backend = backend_csf; return 0; @@ -1351,6 +1600,12 @@ err_alloc_workqueue: backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); err_ring_buf_alloc: + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; +err_alloc_user_block_state_buf: + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; +err_alloc_block_states_buf: kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; err_alloc_user_sample_buf: @@ -1417,7 +1672,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) if (!backend) return; - kbasep_hwcnt_backend_csf_dump_disable(backend); + kbasep_hwcnt_backend_csf_dump_disable(backend, NULL, NULL); /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. @@ -1828,7 +2083,21 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; + /* We should reject initializing the metadata for any malformed + * firmware size. The legitimate firmware sizes are as follows: + * 1. fw_size == 0 on older GPUs + * 2. fw_size == block_size on GPUs that support FW counters but not CSG counters + * 3. fw_size == (1 + #CSG) * block size on GPUs that support CSG counters + */ + if ((csf_info->prfcnt_info.prfcnt_fw_size != 0) && + (csf_info->prfcnt_info.prfcnt_fw_size != csf_info->prfcnt_info.prfcnt_block_size) && + (csf_info->prfcnt_info.prfcnt_fw_size != + ((csf_info->prfcnt_info.csg_count + 1) * csf_info->prfcnt_info.prfcnt_block_size))) + return -EINVAL; + + gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; gpu_info.core_mask = csf_info->prfcnt_info.core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h index 9c5a5c9..2487db2 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,6 +31,8 @@ #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h" #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h" +struct kbase_hwcnt_physical_enable_map; + /** * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend * interface. @@ -114,6 +116,28 @@ void kbase_hwcnt_backend_csf_on_unrecoverable_error(struct kbase_hwcnt_backend_i */ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interface *iface); +/** + * kbase_hwcnt_backend_csf_set_hw_availability() - CSF HWC backend function to + * set current HW configuration. + * HWC must be disabled before + * this function is called. + * @iface: Non-NULL pointer to HWC backend interface. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + */ +void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, + size_t num_l2_slices, + uint64_t shader_present_bitmap); + +/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to + * guarantee headers are + * enabled if any counter is + * required. + * @phys_enable_map: HWC physical enable map to be processed. + */ +void kbasep_hwcnt_backend_csf_process_enable_map( + struct kbase_hwcnt_physical_enable_map *phys_enable_map); + /** * kbase_hwcnt_backend_csf_on_prfcnt_sample() - CSF performance counter sample * complete interrupt handler. diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h index 382a3ad..65bb965 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -39,6 +39,8 @@ struct kbase_hwcnt_backend_csf_if_ring_buf; * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @fw_bm: FW counters selection bitmask + * @csg_bm: FW CSG counters selection bitmask. * @counter_set: The performance counter set to enable. * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle * counter for a given clock domain. @@ -48,6 +50,8 @@ struct kbase_hwcnt_backend_csf_if_enable { u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; + u32 fw_bm; + u32 csg_bm; u8 counter_set; u64 clk_enable_map; }; @@ -63,6 +67,7 @@ struct kbase_hwcnt_backend_csf_if_enable { * counter dump. dump_bytes = prfcnt_hw_size + prfcnt_fw_size. * @prfcnt_block_size: Bytes of each performance counter block. * @l2_count: The MMU L2 cache count. + * @csg_count: The total number of CSGs in the system * @core_mask: Shader core mask. * @clk_cnt: Clock domain count in the system. * @clearing_samples: Indicates whether counters are cleared after each sample @@ -74,6 +79,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { size_t dump_bytes; size_t prfcnt_block_size; size_t l2_count; + u32 csg_count; u64 core_mask; u8 clk_cnt; bool clearing_samples; @@ -85,8 +91,8 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info { * held. * @ctx: Non-NULL pointer to a CSF context. */ -typedef void -kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_assert_lock_held_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_lock_fn - Acquire backend spinlock. @@ -95,8 +101,8 @@ kbase_hwcnt_backend_csf_if_assert_lock_held_fn(struct kbase_hwcnt_backend_csf_if * @flags: Pointer to the memory location that would store the previous * interrupt state. */ -typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long *flags); +typedef void (*kbase_hwcnt_backend_csf_if_lock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long *flags); /** * typedef kbase_hwcnt_backend_csf_if_unlock_fn - Release backend spinlock. @@ -105,8 +111,8 @@ typedef void kbase_hwcnt_backend_csf_if_lock_fn(struct kbase_hwcnt_backend_csf_i * @flags: Previously stored interrupt state when Scheduler interrupt * spinlock was acquired. */ -typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - unsigned long flags); +typedef void (*kbase_hwcnt_backend_csf_if_unlock_fn)(struct kbase_hwcnt_backend_csf_if_ctx *ctx, + unsigned long flags); /** * typedef kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn - Get performance @@ -115,7 +121,7 @@ typedef void kbase_hwcnt_backend_csf_if_unlock_fn(struct kbase_hwcnt_backend_csf * @prfcnt_info: Non-NULL pointer to struct where performance counter * information should be stored. */ -typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( +typedef void (*kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn)( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info); @@ -135,10 +141,9 @@ typedef void kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn( * * Return: 0 on success, else error code. */ -typedef int -kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 buf_count, void **cpu_dump_base, - struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); +typedef int (*kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 buf_count, void **cpu_dump_base, + struct kbase_hwcnt_backend_csf_if_ring_buf **ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_sync_fn - Sync HWC dump buffers @@ -157,10 +162,10 @@ kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn(struct kbase_hwcnt_backend_csf_if_c * Flush cached HWC dump buffer data to ensure that all writes from GPU and CPU * are correctly observed. */ -typedef void -kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - u32 buf_index_first, u32 buf_index_last, bool for_cpu); +typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_sync_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, u32 buf_index_first, + u32 buf_index_last, bool for_cpu); /** * typedef kbase_hwcnt_backend_csf_if_ring_buf_free_fn - Free a ring buffer for @@ -169,9 +174,9 @@ kbase_hwcnt_backend_csf_if_ring_buf_sync_fn(struct kbase_hwcnt_backend_csf_if_ct * @ctx: Non-NULL pointer to a CSF interface context. * @ring_buf: Non-NULL pointer to the ring buffer which to be freed. */ -typedef void -kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); +typedef void (*kbase_hwcnt_backend_csf_if_ring_buf_free_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf); /** * typedef kbase_hwcnt_backend_csf_if_timestamp_ns_fn - Get the current @@ -181,7 +186,8 @@ kbase_hwcnt_backend_csf_if_ring_buf_free_fn(struct kbase_hwcnt_backend_csf_if_ct * * Return: CSF interface timestamp in nanoseconds. */ -typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef u64 (*kbase_hwcnt_backend_csf_if_timestamp_ns_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_enable_fn - Setup and enable hardware @@ -192,10 +198,10 @@ typedef u64 kbase_hwcnt_backend_csf_if_timestamp_ns_fn(struct kbase_hwcnt_backen * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, - struct kbase_hwcnt_backend_csf_if_enable *enable); +typedef void (*kbase_hwcnt_backend_csf_if_dump_enable_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, + struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf, + struct kbase_hwcnt_backend_csf_if_enable *enable); /** * typedef kbase_hwcnt_backend_csf_if_dump_disable_fn - Disable hardware counter @@ -204,7 +210,8 @@ kbase_hwcnt_backend_csf_if_dump_enable_fn(struct kbase_hwcnt_backend_csf_if_ctx * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_dump_disable_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_dump_request_fn - Request a HWC dump. @@ -213,7 +220,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_disable_fn(struct kbase_hwcnt_backe * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx); +typedef void (*kbase_hwcnt_backend_csf_if_dump_request_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx); /** * typedef kbase_hwcnt_backend_csf_if_get_indexes_fn - Get current extract and @@ -226,8 +234,8 @@ typedef void kbase_hwcnt_backend_csf_if_dump_request_fn(struct kbase_hwcnt_backe * * Requires lock to be taken before calling. */ -typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 *extract_index, u32 *insert_index); +typedef void (*kbase_hwcnt_backend_csf_if_get_indexes_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 *extract_index, u32 *insert_index); /** * typedef kbase_hwcnt_backend_csf_if_set_extract_index_fn - Update the extract @@ -239,9 +247,8 @@ typedef void kbase_hwcnt_backend_csf_if_get_indexes_fn(struct kbase_hwcnt_backen * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u32 extract_index); +typedef void (*kbase_hwcnt_backend_csf_if_set_extract_index_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u32 extract_index); /** * typedef kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn - Get the current @@ -255,9 +262,8 @@ kbase_hwcnt_backend_csf_if_set_extract_index_fn(struct kbase_hwcnt_backend_csf_i * * Requires lock to be taken before calling. */ -typedef void -kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf_if_ctx *ctx, - u64 *cycle_counts, u64 clk_enable_map); +typedef void (*kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn)( + struct kbase_hwcnt_backend_csf_if_ctx *ctx, u64 *cycle_counts, u64 clk_enable_map); /** * struct kbase_hwcnt_backend_csf_if - Hardware counter backend CSF virtual @@ -283,20 +289,20 @@ kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn(struct kbase_hwcnt_backend_csf */ struct kbase_hwcnt_backend_csf_if { struct kbase_hwcnt_backend_csf_if_ctx *ctx; - kbase_hwcnt_backend_csf_if_assert_lock_held_fn *assert_lock_held; - kbase_hwcnt_backend_csf_if_lock_fn *lock; - kbase_hwcnt_backend_csf_if_unlock_fn *unlock; - kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn *get_prfcnt_info; - kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn *ring_buf_alloc; - kbase_hwcnt_backend_csf_if_ring_buf_sync_fn *ring_buf_sync; - kbase_hwcnt_backend_csf_if_ring_buf_free_fn *ring_buf_free; - kbase_hwcnt_backend_csf_if_timestamp_ns_fn *timestamp_ns; - kbase_hwcnt_backend_csf_if_dump_enable_fn *dump_enable; - kbase_hwcnt_backend_csf_if_dump_disable_fn *dump_disable; - kbase_hwcnt_backend_csf_if_dump_request_fn *dump_request; - kbase_hwcnt_backend_csf_if_get_indexes_fn *get_indexes; - kbase_hwcnt_backend_csf_if_set_extract_index_fn *set_extract_index; - kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn *get_gpu_cycle_count; + kbase_hwcnt_backend_csf_if_assert_lock_held_fn assert_lock_held; + kbase_hwcnt_backend_csf_if_lock_fn lock; + kbase_hwcnt_backend_csf_if_unlock_fn unlock; + kbase_hwcnt_backend_csf_if_get_prfcnt_info_fn get_prfcnt_info; + kbase_hwcnt_backend_csf_if_ring_buf_alloc_fn ring_buf_alloc; + kbase_hwcnt_backend_csf_if_ring_buf_sync_fn ring_buf_sync; + kbase_hwcnt_backend_csf_if_ring_buf_free_fn ring_buf_free; + kbase_hwcnt_backend_csf_if_timestamp_ns_fn timestamp_ns; + kbase_hwcnt_backend_csf_if_dump_enable_fn dump_enable; + kbase_hwcnt_backend_csf_if_dump_disable_fn dump_disable; + kbase_hwcnt_backend_csf_if_dump_request_fn dump_request; + kbase_hwcnt_backend_csf_if_get_indexes_fn get_indexes; + kbase_hwcnt_backend_csf_if_set_extract_index_fn set_extract_index; + kbase_hwcnt_backend_csf_if_get_gpu_cycle_count_fn get_gpu_cycle_count; }; #endif /* #define _KBASE_HWCNT_BACKEND_CSF_IF_H_ */ diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index c8cf934..1b7a116 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include "hwcnt/mali_kbase_hwcnt_gpu.h" #include "hwcnt/mali_kbase_hwcnt_types.h" @@ -39,7 +39,6 @@ #include #include "mali_kbase_ccswe.h" - /* Ring buffer virtual address start at 4GB */ #define KBASE_HWC_CSF_RING_BUFFER_VA_START (1ull << 32) @@ -206,6 +205,20 @@ kbasep_hwcnt_backend_csf_if_fw_cc_disable(struct kbase_hwcnt_backend_csf_if_fw_c kbase_clk_rate_trace_manager_unsubscribe(rtm, &fw_ctx->rate_listener); } +#if !IS_ENABLED(CONFIG_MALI_NO_MALI) +/** + * kbasep_hwcnt_backend_csf_core_mask() - Obtain Core Mask - MAX Core ID + * + * @gpu_props: gpu_props structure + * + * Return: calculated core mask (maximum Core ID) + */ +static u64 kbasep_hwcnt_backend_csf_core_mask(struct kbase_gpu_props *gpu_props) +{ + return gpu_props->coherency_info.group.core_mask; +} +#endif + static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( struct kbase_hwcnt_backend_csf_if_ctx *ctx, struct kbase_hwcnt_backend_csf_if_prfcnt_info *prfcnt_info) @@ -234,6 +247,8 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( u32 prfcnt_size; u32 prfcnt_hw_size; u32 prfcnt_fw_size; + u32 csg_count; + u32 fw_block_count = 0; u32 prfcnt_block_size = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK * KBASE_HWCNT_VALUE_HW_BYTES; @@ -242,28 +257,41 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info( fw_ctx = (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; kbdev = fw_ctx->kbdev; + csg_count = kbdev->csf.global_iface.group_num; prfcnt_size = kbdev->csf.global_iface.prfcnt_size; prfcnt_hw_size = GLB_PRFCNT_SIZE_HARDWARE_SIZE_GET(prfcnt_size); prfcnt_fw_size = GLB_PRFCNT_SIZE_FIRMWARE_SIZE_GET(prfcnt_size); - fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; /* Read the block size if the GPU has the register PRFCNT_FEATURES * which was introduced in architecture version 11.x.7. */ - if ((kbdev->gpu_props.props.raw_props.gpu_id & GPU_ID2_PRODUCT_MODEL) >= - GPU_ID2_PRODUCT_TTUX) { - prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET( - kbase_reg_read(kbdev, GPU_CONTROL_REG(PRFCNT_FEATURES))) + if (kbase_reg_is_valid(kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES))) { + prfcnt_block_size = PRFCNT_FEATURES_COUNTER_BLOCK_SIZE_GET(KBASE_REG_READ( + kbdev, GPU_CONTROL_ENUM(PRFCNT_FEATURES))) << 8; } + /* Extra sanity check to ensure that we support two different configurations: + * a global FW block without CSG blocks and a global FW block with CSG blocks. + */ + if (!prfcnt_fw_size) + fw_block_count = 0; + else if (prfcnt_fw_size == prfcnt_block_size) + fw_block_count = 1; + else if (prfcnt_fw_size == ((1 + csg_count) * prfcnt_block_size)) + fw_block_count = 1 + csg_count; + else + WARN_ON_ONCE(true); + + fw_ctx->buf_bytes = prfcnt_hw_size + prfcnt_fw_size; *prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){ .prfcnt_hw_size = prfcnt_hw_size, .prfcnt_fw_size = prfcnt_fw_size, .dump_bytes = fw_ctx->buf_bytes, .prfcnt_block_size = prfcnt_block_size, - .l2_count = kbdev->gpu_props.props.l2_props.num_l2_slices, - .core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask, + .l2_count = kbdev->gpu_props.num_l2_slices, + .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props), + .csg_count = fw_block_count > 1 ? csg_count : 0, .clk_cnt = fw_ctx->clk_cnt, .clearing_samples = true, }; @@ -284,7 +312,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( struct page **page_list; void *cpu_addr; int ret; - int i; + size_t i; size_t num_pages; u64 flags; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf; @@ -330,7 +358,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Get physical page for the buffer */ ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], num_pages, phys, false, NULL); - if (ret != num_pages) + if ((size_t)ret != num_pages) goto phys_mem_pool_alloc_error; /* Get the CPU virtual address */ @@ -342,7 +370,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( goto vmap_error; flags = KBASE_REG_GPU_WR | KBASE_REG_GPU_NX | - KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE); + KBASE_REG_MEMATTR_INDEX(KBASE_MEMATTR_INDEX_NON_CACHEABLE); /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, @@ -508,6 +536,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx (struct kbase_hwcnt_backend_csf_if_fw_ctx *)ctx; struct kbase_hwcnt_backend_csf_if_fw_ring_buf *fw_ring_buf = (struct kbase_hwcnt_backend_csf_if_fw_ring_buf *)ring_buf; + u32 csg_mask; WARN_ON(!ctx); WARN_ON(!ring_buf); @@ -516,6 +545,7 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx kbdev = fw_ctx->kbdev; global_iface = &kbdev->csf.global_iface; + csg_mask = (1 << kbdev->csf.global_iface.group_num) - 1; /* Configure */ prfcnt_config = GLB_PRFCNT_CONFIG_SIZE_SET(0, fw_ring_buf->buf_count); @@ -536,6 +566,12 @@ kbasep_hwcnt_backend_csf_if_fw_dump_enable(struct kbase_hwcnt_backend_csf_if_ctx kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_SHADER_EN, enable->shader_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_MMU_L2_EN, enable->mmu_l2_bm); kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_TILER_EN, enable->tiler_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_FW_EN, enable->fw_bm); + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_EN, enable->csg_bm); + + /* Enable all of the CSGs by default. */ + kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CSG_SELECT, csg_mask); + /* Configure the HWC set and buffer size */ kbase_csf_firmware_global_input(global_iface, GLB_PRFCNT_CONFIG, prfcnt_config); diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c index 8b3caac..4df7dd4 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -80,30 +80,40 @@ struct kbase_hwcnt_jm_physical_layout { /** * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend. - * @info: Info used to create the backend. - * @kctx: KBase context used for GPU memory allocation and - * counter dumping. - * @gpu_dump_va: GPU hardware counter dump buffer virtual address. - * @cpu_dump_va: CPU mapping of gpu_dump_va. - * @vmap: Dump buffer vmap. - * @to_user_buf: HWC sample buffer for client user, size - * metadata.dump_buf_bytes. - * @enabled: True if dumping has been enabled, else false. - * @pm_core_mask: PM state sync-ed shaders core mask for the enabled - * dumping. - * @curr_config: Current allocated hardware resources to correctly map the - * source raw dump buffer to the destination dump buffer. - * @clk_enable_map: The enable map specifying enabled clock domains. - * @cycle_count_elapsed: - * Cycle count elapsed for a given sample period. - * The top clock cycle, index 0, is read directly from - * hardware, but the other clock domains need to be - * calculated with software estimation. - * @prev_cycle_count: Previous cycle count to calculate the cycle count for - * sample period. - * @rate_listener: Clock rate listener callback state. - * @ccswe_shader_cores: Shader cores cycle count software estimator. - * @phys_layout: Physical memory layout information of HWC sample buffer. + * @info: Info used to create the backend. + * @kctx: KBase context used for GPU memory allocation and + * counter dumping. + * @gpu_dump_va: GPU hardware counter dump buffer virtual address. + * @cpu_dump_va: CPU mapping of gpu_dump_va. + * @vmap: Dump buffer vmap. + * @to_user_buf: HWC sample buffer for client user, size + * metadata.dump_buf_bytes. + * @enabled: True if dumping has been enabled, else false. + * @accum_all_blk_stt: Block State to accumulate on next sample, for all types + * of block. + * @sampled_all_blk_stt: Block State to accumulate into the current sample, for + * all types of block. + * @debug_core_mask: User-set mask of shader cores that can be used. + * @pm_core_mask: PM state sync-ed shaders core mask for the enabled + * dumping. + * @curr_config: Current allocated hardware resources to correctly map the + * source raw dump buffer to the destination dump buffer. + * @max_core_mask: Core mask of all cores allocated to the GPU (non + * virtualized platforms) or resource group (virtualized + * platforms). + * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non + * virtualized platforms) or resource group (virtualized + * platforms). + * @clk_enable_map: The enable map specifying enabled clock domains. + * @cycle_count_elapsed: Cycle count elapsed for a given sample period. + * The top clock cycle, index 0, is read directly from + * hardware, but the other clock domains need to be + * calculated with software estimation. + * @prev_cycle_count: Previous cycle count to calculate the cycle count for + * sample period. + * @rate_listener: Clock rate listener callback state. + * @ccswe_shader_cores: Shader cores cycle count software estimator. + * @phys_layout: Physical memory layout information of HWC sample buffer. */ struct kbase_hwcnt_backend_jm { const struct kbase_hwcnt_backend_jm_info *info; @@ -113,8 +123,13 @@ struct kbase_hwcnt_backend_jm { struct kbase_vmap_struct *vmap; u64 *to_user_buf; bool enabled; + blk_stt_t accum_all_blk_stt; + blk_stt_t sampled_all_blk_stt; + u64 debug_core_mask; u64 pm_core_mask; struct kbase_hwcnt_curr_config curr_config; + u64 max_core_mask; + size_t max_l2_slices; u64 clk_enable_map; u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS]; u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS]; @@ -136,26 +151,22 @@ struct kbase_hwcnt_backend_jm { static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev, struct kbase_hwcnt_gpu_info *info) { - size_t clk; + size_t clk, l2_count, core_mask; if (!kbdev || !info) return -EINVAL; #if IS_ENABLED(CONFIG_MALI_NO_MALI) - info->l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; - info->core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; - info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; -#else /* CONFIG_MALI_NO_MALI */ - { - const struct base_gpu_props *props = &kbdev->gpu_props.props; - const size_t l2_count = props->l2_props.num_l2_slices; - const size_t core_mask = props->coherency_info.group[0].core_mask; + l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS; + core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1; +#else + l2_count = kbdev->gpu_props.num_l2_slices; + core_mask = kbdev->gpu_props.coherency_info.group.core_mask; +#endif - info->l2_count = l2_count; - info->core_mask = core_mask; - info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; - } -#endif /* CONFIG_MALI_NO_MALI */ + info->l2_count = l2_count; + info->core_mask = core_mask; + info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK; /* Determine the number of available clock domains. */ for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) { @@ -353,9 +364,9 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; struct kbase_context *kctx; struct kbase_device *kbdev; - struct kbase_hwcnt_physical_enable_map phys_enable_map; + struct kbase_hwcnt_physical_enable_map phys_enable_map = { 0 }; enum kbase_hwcnt_physical_set phys_counter_set; - struct kbase_instr_hwcnt_enable enable; + struct kbase_instr_hwcnt_enable enable = { 0 }; u64 timestamp_ns; if (!backend_jm || !enable_map || backend_jm->enabled || @@ -371,18 +382,21 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set); - enable.fe_bm = phys_enable_map.fe_bm; - enable.shader_bm = phys_enable_map.shader_bm; - enable.tiler_bm = phys_enable_map.tiler_bm; - enable.mmu_l2_bm = phys_enable_map.mmu_l2_bm; - enable.counter_set = phys_counter_set; + enable = (struct kbase_instr_hwcnt_enable) + { + .fe_bm = phys_enable_map.fe_bm, + .shader_bm = phys_enable_map.shader_bm, + .tiler_bm = phys_enable_map.tiler_bm, + .mmu_l2_bm = phys_enable_map.mmu_l2_bm, + .counter_set = phys_counter_set, #if IS_ENABLED(CONFIG_MALI_NO_MALI) - /* The dummy model needs the CPU mapping. */ - enable.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va; + /* The dummy model needs the CPU mapping. */ + .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va, #else - enable.dump_buffer = backend_jm->gpu_dump_va; + .dump_buffer = backend_jm->gpu_dump_va, #endif /* CONFIG_MALI_NO_MALI */ - enable.dump_buffer_bytes = backend_jm->info->dump_bytes; + .dump_buffer_bytes = backend_jm->info->dump_bytes, + }; timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); @@ -395,9 +409,24 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend, if (errcode) goto error; + backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev); + backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count; + backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask; + backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev); backend_jm->enabled = true; + /* Enabling counters is an indication that the power may have previously been off for all + * blocks. + * + * In any case, the counters would not have been counting recently, so an 'off' block state + * is an approximation for this. + * + * This will be transferred to the dump only after a dump_wait(), or dump_disable() in + * cases where the caller requested such information. This is to handle when a + * dump_enable() happens in between dump_wait() and dump_get(). + */ + kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns); @@ -430,12 +459,20 @@ static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backe } /* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map) { int errcode; struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; - if (WARN_ON(!backend_jm) || !backend_jm->enabled) + if (WARN_ON(!backend_jm || + (dump_buffer && (backend_jm->info->metadata != dump_buffer->metadata)) || + (enable_map && (backend_jm->info->metadata != enable_map->metadata)) || + (dump_buffer && !enable_map))) + return; + /* No WARN needed here, but still return early if backend is already disabled */ + if (!backend_jm->enabled) return; kbasep_hwcnt_backend_jm_cc_disable(backend_jm); @@ -443,6 +480,42 @@ static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *bac errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx); WARN_ON(errcode); + /* Disabling HWCNT is an indication that blocks have been powered off. This is important to + * know for L2 and Tiler blocks, as this is currently the only way a backend can know if + * they are being powered off. + * + * In any case, even if they weren't really powered off, we won't be counting whilst + * disabled. + * + * Update the block state information in the block state accumulator to show this, so that + * in the next dump blocks will have been seen as powered off for some of the time. + */ + kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); + + if (dump_buffer) { + /* In some use-cases, the caller will need the information whilst the counters are + * disabled, but will not be able to call into the backend to dump them. Instead, + * they have an opportunity here to request them to be accumulated into their + * buffer immediately. + * + * This consists of taking a sample of the accumulated block state (as though a + * real dump_get() had happened), then transfer ownership of that to the caller + * (i.e. erasing our copy of it). + */ + kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt, + &backend_jm->accum_all_blk_stt); + kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map, + backend_jm->sampled_all_blk_stt); + /* Now the block state has been passed out into the caller's own accumulation + * buffer, clear our own accumulated and sampled block state - ownership has been + * transferred. + */ + kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } + backend_jm->enabled = false; } @@ -480,8 +553,7 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back *dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend); ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx); - kbase_hwcnt_metadata_for_each_clock(metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk)) continue; @@ -514,12 +586,27 @@ static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *back /* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */ static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend) { + int errcode; struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend; if (!backend_jm || !backend_jm->enabled) return -EINVAL; - return kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); + errcode = kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx); + if (errcode) + return errcode; + + /* Now that we've completed a sample, also sample+clear the accumulated block state. + * + * This is to ensure that a dump_enable() that happens in between dump_wait() and + * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block + * state is reported at the actual time that counters are being sampled. + */ + kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt, + &backend_jm->accum_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + + return errcode; } /* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */ @@ -533,8 +620,8 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, #if IS_ENABLED(CONFIG_MALI_NO_MALI) struct kbase_device *kbdev; unsigned long flags; - int errcode; #endif /* CONFIG_MALI_NO_MALI */ + int errcode; if (!backend_jm || !dst || !dst_enable_map || (backend_jm->info->metadata != dst->metadata) || @@ -548,8 +635,7 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, kbasep_hwcnt_backend_jm_dump_sample(backend_jm); /* Extract elapsed cycle count for each clock domain if enabled. */ - kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) continue; @@ -572,9 +658,18 @@ static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend, if (errcode) return errcode; #endif /* CONFIG_MALI_NO_MALI */ - return kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, - backend_jm->pm_core_mask, &backend_jm->curr_config, - accumulate); + errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map, + backend_jm->pm_core_mask, backend_jm->debug_core_mask, + backend_jm->max_core_mask, backend_jm->max_l2_slices, + &backend_jm->curr_config, accumulate); + + if (errcode) + return errcode; + + kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map, + backend_jm->sampled_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + return errcode; } /** @@ -705,6 +800,8 @@ static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_in kbase_ccswe_init(&backend->ccswe_shader_cores); backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change; + kbase_hwcnt_block_state_set(&backend->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); *out_backend = backend; return 0; @@ -752,7 +849,7 @@ static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend) if (!backend) return; - kbasep_hwcnt_backend_jm_dump_disable(backend); + kbasep_hwcnt_backend_jm_dump_disable(backend, NULL, NULL); kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend); } diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c index a8654ea..1b54151 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -268,9 +268,9 @@ kbasep_hwcnt_backend_jm_watchdog_info_create(struct kbase_hwcnt_backend_interfac if (!info) return NULL; - *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ .jm_backend_iface = backend_iface, - .dump_watchdog_iface = - watchdog_iface }; + *info = (struct kbase_hwcnt_backend_jm_watchdog_info){ + .jm_backend_iface = backend_iface, .dump_watchdog_iface = watchdog_iface + }; return info; } @@ -443,7 +443,8 @@ static int kbasep_hwcnt_backend_jm_watchdog_dump_enable_common( spin_unlock_irqrestore(&wd_backend->locked.watchdog_lock, flags); } else /*Reverting the job manager backend back to disabled*/ - wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend, + NULL, NULL); } return errcode; @@ -472,7 +473,10 @@ kbasep_hwcnt_backend_jm_watchdog_dump_enable_nolock(struct kbase_hwcnt_backend * } /* Job manager watchdog backend, implementation of dump_disable */ -static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend) +static void +kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *buf_enable_map) { struct kbase_hwcnt_backend_jm_watchdog *const wd_backend = (void *)backend; unsigned long flags; @@ -497,7 +501,8 @@ static void kbasep_hwcnt_backend_jm_watchdog_dump_disable(struct kbase_hwcnt_bac wd_backend->info->dump_watchdog_iface->disable( wd_backend->info->dump_watchdog_iface->timer); - wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend); + wd_backend->info->jm_backend_iface->dump_disable(wd_backend->jm_backend, dump_buffer, + buf_enable_map); } /* Job manager watchdog backend, implementation of dump_clear */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt.c b/mali_kbase/hwcnt/mali_kbase_hwcnt.c index 34deb5d..8b1de2e 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt.c @@ -292,7 +292,8 @@ static void kbasep_hwcnt_accumulator_disable(struct kbase_hwcnt_context *hctx, b accum->accumulated = true; disable: - hctx->iface->dump_disable(accum->backend); + hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL, + &accum->enable_map); /* Regardless of any errors during the accumulate, put the accumulator * in the disabled state. @@ -453,8 +454,20 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 * */ if ((state == ACCUM_STATE_ENABLED) && new_map) { /* Backend is only enabled if there were any enabled counters */ - if (cur_map_any_enabled) - hctx->iface->dump_disable(accum->backend); + if (cur_map_any_enabled) { + /* In this case we do *not* want to have the buffer updated with extra + * block state, it should instead remain in the backend until the next dump + * happens, hence supplying NULL as the dump_buffer parameter here. + * + * Attempting to take ownership of backend-accumulated block state at this + * point will instead give inaccurate information. For example the dump + * buffer for 'set_counters' operation might be dumping a period that + * should've been entirely in the 'ON' state, but would report it as + * partially in the 'OFF' state. Instead, that 'OFF' state should be + * reported in the _next_ dump. + */ + hctx->iface->dump_disable(accum->backend, NULL, NULL); + } /* (Re-)enable the backend if the new map has enabled counters. * No need to acquire the spinlock, as concurrent enable while @@ -481,9 +494,15 @@ static int kbasep_hwcnt_accumulator_dump(struct kbase_hwcnt_context *hctx, u64 * /* If we've not written anything into the dump buffer so far, it * means there was nothing to write. Zero any enabled counters. + * + * In this state, the blocks are likely to be off (and at the very least, not + * counting), so write in the 'off' block state */ - if (!dump_written) + if (!dump_written) { kbase_hwcnt_dump_buffer_zero(dump_buf, cur_map); + kbase_hwcnt_dump_buffer_block_state_update(dump_buf, cur_map, + KBASE_HWCNT_STATE_OFF); + } } /* Write out timestamps */ @@ -498,8 +517,13 @@ error: /* An error was only physically possible if the backend was enabled */ WARN_ON(state != ACCUM_STATE_ENABLED); - /* Disable the backend, and transition to the error state */ - hctx->iface->dump_disable(accum->backend); + /* Disable the backend, and transition to the error state. In this case, we can try to save + * the block state into the accumulated buffer, but there's no guarantee we'll have one, so + * this is more of a 'best effort' for error cases. There would be an suitable block + * state recorded on the next dump_enable() anyway. + */ + hctx->iface->dump_disable(accum->backend, (accum->accumulated) ? &accum->accum_buf : NULL, + cur_map); spin_lock_irqsave(&hctx->state_lock, flags); accum->state = ACCUM_STATE_ERROR; diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c index 74916da..5da5645 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,11 @@ * */ +#include #include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_types.h" #include +#include /** enum enable_map_idx - index into a block enable map that spans multiple u64 array elements */ @@ -32,78 +33,107 @@ enum enable_map_idx { EM_COUNT, }; -static void kbasep_get_fe_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fe_block_type(enum kbase_hwcnt_set counter_set, + bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE; case KBASE_HWCNT_SET_SECONDARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for FE block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED; } } -static void kbasep_get_tiler_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_tiler_block_type(enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER; case KBASE_HWCNT_SET_SECONDARY: case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for tiler block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED; } } -static void kbasep_get_sc_block_type(u64 *dst, enum kbase_hwcnt_set counter_set, bool is_csf) +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_sc_block_type(enum kbase_hwcnt_set counter_set, + bool is_csf) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC; case KBASE_HWCNT_SET_SECONDARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2; case KBASE_HWCNT_SET_TERTIARY: if (is_csf) - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3; else - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for shader core block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED; } } -static void kbasep_get_memsys_block_type(u64 *dst, enum kbase_hwcnt_set counter_set) + +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_memsys_block_type(enum kbase_hwcnt_set counter_set) { switch (counter_set) { case KBASE_HWCNT_SET_PRIMARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS; case KBASE_HWCNT_SET_SECONDARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2; case KBASE_HWCNT_SET_TERTIARY: - *dst = KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; - break; + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; default: - WARN_ON(true); + WARN(true, "Invalid counter set for Memsys block type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED; + } +} + +static enum kbase_hwcnt_gpu_v5_block_type kbasep_get_fw_block_type(enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW; + case KBASE_HWCNT_SET_SECONDARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2; + case KBASE_HWCNT_SET_TERTIARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3; + default: + WARN(true, "Invalid counter set for FW type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED; + } +} + +static enum kbase_hwcnt_gpu_v5_block_type +kbasep_get_csg_block_type(enum kbase_hwcnt_set counter_set) +{ + switch (counter_set) { + case KBASE_HWCNT_SET_PRIMARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG; + case KBASE_HWCNT_SET_SECONDARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2; + case KBASE_HWCNT_SET_TERTIARY: + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3; + default: + WARN(true, "Invalid counter set for CSG type: %d", counter_set); + return KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED; } } @@ -124,49 +154,89 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu const struct kbase_hwcnt_metadata **metadata) { struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; - size_t non_sc_block_count; + struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT] = {}; + size_t non_core_block_count; + size_t core_block_count; size_t sc_block_count; + size_t blk_idx = 0; - WARN_ON(!gpu_info); - WARN_ON(!metadata); + if (WARN_ON(!gpu_info)) + return -EINVAL; - /* Calculate number of block instances that aren't shader cores */ - non_sc_block_count = 2 + gpu_info->l2_count; + if (WARN_ON(!metadata)) + return -EINVAL; + + /* Calculate number of block instances that aren't cores */ + non_core_block_count = 2 + gpu_info->l2_count; /* Calculate number of block instances that are shader cores */ - sc_block_count = fls64(gpu_info->core_mask); + sc_block_count = (size_t)fls64(gpu_info->core_mask); + /* Determine the total number of cores */ + core_block_count = sc_block_count; + + + if (gpu_info->has_fw_counters) + non_core_block_count += 1 + gpu_info->csg_cnt; /* - * A system can have up to 64 shader cores, but the 64-bit - * availability mask can't physically represent that many cores as well - * as the other hardware blocks. - * Error out if there are more blocks than our implementation can + * Check we have enough bits to represent the number of cores that + * exist in the system. Error-out if there are more blocks than our implementation can * support. */ - if ((sc_block_count + non_sc_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) + if ((core_block_count + non_core_block_count) > KBASE_HWCNT_AVAIL_MASK_BITS) return -EINVAL; + /* The dump starts with, on supporting systems, the FW blocks, and as such, + * they should be taken into account first. + */ + if (gpu_info->has_fw_counters) { + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_fw_block_type(counter_set), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + } + + /* Some systems may support FW counters but not CSG counters, so the + * two are handled differently. + */ + if (gpu_info->csg_cnt > 0) { + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_csg_block_type(counter_set), + .inst_cnt = gpu_info->csg_cnt, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - + KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + } + /* One Front End block */ - kbasep_get_fe_block_type(&blks[0].type, counter_set, is_csf); - blks[0].inst_cnt = 1; - blks[0].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[0].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_fe_block_type(counter_set, is_csf), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; /* One Tiler block */ - kbasep_get_tiler_block_type(&blks[1].type, counter_set); - blks[1].inst_cnt = 1; - blks[1].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[1].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_tiler_block_type(counter_set), + .inst_cnt = 1, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; /* l2_count memsys blks */ - kbasep_get_memsys_block_type(&blks[2].type, counter_set); - blks[2].inst_cnt = gpu_info->l2_count; - blks[2].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[2].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_memsys_block_type(counter_set), + .inst_cnt = gpu_info->l2_count, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; /* - * There are as many shader cores in the system as there are bits set in + * There are as many cores in the system as there are bits set in * the core mask. However, the dump buffer memory requirements need to * take into account the fact that the core mask may be non-contiguous. * @@ -179,27 +249,36 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu * * We find the core mask's last set bit to determine the memory * requirements, and embed the core mask into the availability mask so - * we can determine later which shader cores physically exist. + * we can determine later which cores physically exist. */ - kbasep_get_sc_block_type(&blks[3].type, counter_set, is_csf); - blks[3].inst_cnt = sc_block_count; - blks[3].hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK; - blks[3].ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK; + blks[blk_idx++] = (struct kbase_hwcnt_block_description){ + .type = kbasep_get_sc_block_type(counter_set, is_csf), + .inst_cnt = sc_block_count, + .hdr_cnt = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + .ctr_cnt = gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK, + }; + - WARN_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 4); + /* Currently, we're only handling a maximum of seven blocks, and this needs + * to be changed whenever the number of blocks increases + */ + BUILD_BUG_ON(KBASE_HWCNT_V5_BLOCK_TYPE_COUNT != 7); - group.type = KBASE_HWCNT_GPU_GROUP_TYPE_V5; - group.blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; - group.blks = blks; + /* After assembling the block list in the code above, we should not end up with more + * elements than KBASE_HWCNT_V5_BLOCK_TYPE_COUNT. + */ + WARN_ON(blk_idx > KBASE_HWCNT_V5_BLOCK_TYPE_COUNT); - desc.grp_cnt = 1; - desc.grps = &group; + desc.blk_cnt = blk_idx; + desc.blks = blks; desc.clk_cnt = gpu_info->clk_cnt; /* The JM, Tiler, and L2s are always available, and are before cores */ - desc.avail_mask = (1ull << non_sc_block_count) - 1; - /* Embed the core mask directly in the availability mask */ - desc.avail_mask |= (gpu_info->core_mask << non_sc_block_count); + kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0); + kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX); + kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count, + gpu_info->core_mask); + return kbase_hwcnt_metadata_create(&desc, metadata); } @@ -215,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in { WARN_ON(!gpu_info); - return (2 + gpu_info->l2_count + fls64(gpu_info->core_mask)) * + return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) * gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES; } @@ -248,7 +327,10 @@ int kbase_hwcnt_jm_metadata_create(const struct kbase_hwcnt_gpu_info *gpu_info, * metadata since physical HW uses 32-bit per value but metadata * specifies 64-bit per value. */ - WARN_ON(dump_bytes * 2 != metadata->dump_buf_bytes); + if (WARN(dump_bytes * 2 != metadata->dump_buf_bytes, + "Dump buffer size expected to be %zu, instead is %zu", dump_bytes * 2, + metadata->dump_buf_bytes)) + return -EINVAL; *out_metadata = metadata; *out_dump_bytes = dump_bytes; @@ -291,72 +373,76 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat kbase_hwcnt_metadata_destroy(metadata); } -static bool is_block_type_shader(const u64 grp_type, const u64 blk_type, const size_t blk) +bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { - bool is_shader = false; - - /* Warn on unknown group type */ - if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) - return false; - if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2 || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3 || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED) - is_shader = true; + return true; - return is_shader; + return false; } -static bool is_block_type_l2_cache(const u64 grp_type, const u64 blk_type) +bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type) { - bool is_l2_cache = false; - - switch (grp_type) { - case KBASE_HWCNT_GPU_GROUP_TYPE_V5: - if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || - blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) - is_l2_cache = true; - break; - default: - /* Warn on unknown group type */ - WARN_ON(true); - } + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED) + return true; + + return false; +} + +bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type) +{ + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED) + return true; + + return false; +} - return is_l2_cache; +bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type) +{ + if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3 || + blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED) + return true; + + return false; } int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, u64 pm_core_mask, + u64 debug_core_mask, u64 max_core_mask, size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; const u64 *dump_src = src; size_t src_offset = 0; u64 core_mask = pm_core_mask; + u64 shader_present = curr_config->shader_present; /* Variables to deal with the current configuration */ - int l2_count = 0; + size_t l2_count = 0; if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const bool is_shader_core = is_block_type_shader( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type, blk); - const bool is_l2_cache = is_block_type_l2_cache( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); - const bool is_undefined = kbase_hwcnt_is_block_type_undefined( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const bool is_shader_core = kbase_hwcnt_is_block_type_shader(blk_type); + const bool is_l2_cache = kbase_hwcnt_is_block_type_memsys(blk_type); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); bool hw_res_available = true; /* @@ -383,45 +469,107 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { - u64 *dst_blk = - kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; - bool blk_powered; + bool blk_valid = (!is_undefined && hw_res_available); + + if (blk_valid) { + bool blk_powered; + blk_stt_t current_block_state = 0; + + if (!is_shader_core) { + /* The L2 block must be available at this point, or handled + * differently below. + * Every partition must have a FE and a tiler, so they + * must be implicitly available as part of the current + * configuration. + */ + blk_powered = true; + current_block_state |= KBASE_HWCNT_STATE_AVAILABLE; + } else { + /* Check the PM core mask to see if the shader core is + * powered up. + */ + blk_powered = core_mask & 1; + + /* Set availability bits based on whether the core is + * present in both the shader_present AND the core + * mask in sysFS. The core masks are shifted to the + * right at the end of the loop so always check the + * rightmost bit. + */ + if ((shader_present & debug_core_mask) & 0x1) + current_block_state |= KBASE_HWCNT_STATE_AVAILABLE; + else { + /* If this branch is taken, the shader core may + * be: + * * in the max configuration, but not enabled + * through the sysFS core mask + * * in the max configuration, but not in the + * current configuration + * * physically not present + */ + current_block_state |= + KBASE_HWCNT_STATE_UNAVAILABLE; + } + } - if (!is_shader_core) { - /* Under the current PM system, counters will - * only be enabled after all non shader core - * blocks are powered up. - */ - blk_powered = true; - } else { - /* Check the PM core mask to see if the shader - * core is powered up. + /* Note: KBASE_HWCNT_STATE_OFF for non-shader cores (L2, Tiler, JM) + * is handled on this backend's dump_disable function (since + * they are considered to always be powered here). */ - blk_powered = core_mask & 1; - } + current_block_state |= (blk_powered) ? KBASE_HWCNT_STATE_ON : + KBASE_HWCNT_STATE_OFF; - if (blk_powered && !is_undefined && hw_res_available) { - /* Only powered and defined blocks have valid data. */ if (accumulate) { - kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, - hdr_cnt, ctr_cnt); + /* Only update existing counter values if block was powered + * and valid + */ + if (blk_powered) + kbase_hwcnt_dump_buffer_block_accumulate( + dst_blk, src_blk, hdr_cnt, ctr_cnt); + + kbase_hwcnt_block_state_append(dst_blk_stt, + current_block_state); } else { - kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, - (hdr_cnt + ctr_cnt)); + if (blk_powered) { + kbase_hwcnt_dump_buffer_block_copy( + dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + } else { + /* src is garbage, so zero the dst */ + kbase_hwcnt_dump_buffer_block_zero( + dst_blk, (hdr_cnt + ctr_cnt)); + } + + kbase_hwcnt_block_state_set(dst_blk_stt, + current_block_state); + } + } else if (is_l2_cache && !is_undefined) { + /* Defined L2 can only reach here when the partition does not + * own it. Check that the L2 count is within the resource + * group or whole GPU's max L2 count, and if so, + * mark it as unavailable. + */ + if (l2_count <= max_l2_slices) { + kbase_hwcnt_block_state_set( + dst_blk_stt, KBASE_HWCNT_STATE_OFF | + KBASE_HWCNT_STATE_UNAVAILABLE); } + kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt)); } else { - /* Even though the block might be undefined, the - * user has enabled counter collection for it. - * We should not propagate garbage data. + /* Even though the block is undefined, the user has + * enabled counter collection for it. We should not propagate + * garbage data, or copy/accumulate the block states. */ if (accumulate) { /* No-op to preserve existing values */ } else { - /* src is garbage, so zero the dst */ + /* src is garbage, so zero the dst and reset block state */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); } } } @@ -429,66 +577,79 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, /* Just increase the src_offset if the HW is available */ if (hw_res_available) src_offset += (hdr_cnt + ctr_cnt); - if (is_shader_core) - core_mask = core_mask >> 1; + if (is_shader_core) { + /* Shift each core mask right by 1 */ + core_mask >>= 1; + debug_core_mask >>= 1; + max_core_mask >>= 1; + shader_present >>= 1; + } } return 0; } int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate) + blk_stt_t *src_block_stt, + const struct kbase_hwcnt_enable_map *dst_enable_map, + size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate) { const struct kbase_hwcnt_metadata *metadata; const u64 *dump_src = src; size_t src_offset = 0; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; + size_t blk_inst_count = 0; - if (!dst || !src || !dst_enable_map || (dst_enable_map->metadata != dst->metadata)) + if (!dst || !src || !src_block_stt || !dst_enable_map || + (dst_enable_map->metadata != dst->metadata)) return -EINVAL; metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - const size_t ctr_cnt = - kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); - const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const bool is_undefined = kbase_hwcnt_is_block_type_undefined( - kbase_hwcnt_metadata_group_type(metadata, grp), blk_type); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + const size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + const size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + const uint64_t blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const bool is_undefined = kbase_hwcnt_is_block_type_undefined(blk_type); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); /* * Skip block if no values in the destination block are enabled. */ - if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) { - u64 *dst_blk = - kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); const u64 *src_blk = dump_src + src_offset; if (!is_undefined) { if (accumulate) { kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + kbase_hwcnt_block_state_append( + dst_blk_stt, src_block_stt[blk_inst_count]); } else { kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + src_block_stt[blk_inst_count]); } } else { - /* Even though the block might be undefined, the - * user has enabled counter collection for it. - * We should not propagate garbage data. + /* Even though the block might be undefined, the user has enabled + * counter collection for it. We should not propagate garbage + * data, or copy/accumulate the block states. */ if (accumulate) { /* No-op to preserve existing values */ } else { - /* src is garbage, so zero the dst */ + /* src is garbage, so zero the dst and reset block state */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, (hdr_cnt + ctr_cnt)); + kbase_hwcnt_block_state_set(dst_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); } } } - + blk_inst_count++; src_offset += (hdr_cnt + ctr_cnt); } @@ -541,58 +702,79 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m u64 shader_bm[EM_COUNT] = { 0 }; u64 tiler_bm[EM_COUNT] = { 0 }; u64 mmu_l2_bm[EM_COUNT] = { 0 }; - size_t grp, blk, blk_inst; + u64 fw_bm[EM_COUNT] = { 0 }; + u64 csg_bm[EM_COUNT] = { 0 }; + size_t blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; metadata = src->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, grp, blk, blk_inst); - - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - size_t map_idx; - - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; - - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: - /* Nothing to do in this case. */ - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - fe_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - tiler_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - shader_bm[map_idx] |= blk_map[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - mmu_l2_bm[map_idx] |= blk_map[map_idx]; - break; - default: - WARN_ON(true); - } + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + const u64 *blk_map = kbase_hwcnt_enable_map_block_instance(src, blk, blk_inst); + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + size_t map_idx; + + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; + + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + fe_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + tiler_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + shader_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + mmu_l2_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: + fw_bm[map_idx] |= blk_map[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: + csg_bm[map_idx] |= blk_map[map_idx]; + break; + default: + WARN(true, "Unknown block type %llu", blk_type); } - } else { - WARN_ON(true); } } @@ -603,6 +785,8 @@ void kbase_hwcnt_gpu_enable_map_to_physical(struct kbase_hwcnt_physical_enable_m kbase_hwcnt_backend_gpu_block_map_to_physical(tiler_bm[EM_LO], tiler_bm[EM_HI]); dst->mmu_l2_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(mmu_l2_bm[EM_LO], mmu_l2_bm[EM_HI]); + dst->fw_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(fw_bm[EM_LO], fw_bm[EM_HI]); + dst->csg_bm = kbase_hwcnt_backend_gpu_block_map_to_physical(csg_bm[EM_LO], csg_bm[EM_HI]); } void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kbase_hwcnt_set src) @@ -625,72 +809,102 @@ void kbase_hwcnt_gpu_set_to_physical(enum kbase_hwcnt_physical_set *dst, enum kb void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst, const struct kbase_hwcnt_physical_enable_map *src) { - const struct kbase_hwcnt_metadata *metadata; + struct kbase_hwcnt_enable_cm cm = {}; - u64 fe_bm[EM_COUNT] = { 0 }; - u64 shader_bm[EM_COUNT] = { 0 }; - u64 tiler_bm[EM_COUNT] = { 0 }; - u64 mmu_l2_bm[EM_COUNT] = { 0 }; - size_t grp, blk, blk_inst; + if (WARN_ON(!src) || WARN_ON(!dst)) + return; + + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &cm.fe_bm[EM_LO], + &cm.fe_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &cm.shader_bm[EM_LO], + &cm.shader_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &cm.tiler_bm[EM_LO], + &cm.tiler_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &cm.mmu_l2_bm[EM_LO], + &cm.mmu_l2_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fw_bm, &cm.fw_bm[EM_LO], + &cm.fw_bm[EM_HI]); + kbasep_hwcnt_backend_gpu_block_map_from_physical(src->csg_bm, &cm.csg_bm[EM_LO], + &cm.csg_bm[EM_HI]); + + kbase_hwcnt_gpu_enable_map_from_cm(dst, &cm); +} + +void kbase_hwcnt_gpu_enable_map_from_cm(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_cm *src) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t blk, blk_inst; if (WARN_ON(!src) || WARN_ON(!dst)) return; metadata = dst->metadata; - kbasep_hwcnt_backend_gpu_block_map_from_physical(src->fe_bm, &fe_bm[EM_LO], &fe_bm[EM_HI]); - kbasep_hwcnt_backend_gpu_block_map_from_physical(src->shader_bm, &shader_bm[EM_LO], - &shader_bm[EM_HI]); - kbasep_hwcnt_backend_gpu_block_map_from_physical(src->tiler_bm, &tiler_bm[EM_LO], - &tiler_bm[EM_HI]); - kbasep_hwcnt_backend_gpu_block_map_from_physical(src->mmu_l2_bm, &mmu_l2_bm[EM_LO], - &mmu_l2_bm[EM_HI]); - - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, grp, blk); - u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); - - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - size_t map_idx; - - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; - - switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: - /* Nothing to do in this case. */ - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: - blk_map[map_idx] = fe_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: - blk_map[map_idx] = tiler_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: - blk_map[map_idx] = shader_bm[map_idx]; - break; - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: - case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: - blk_map[map_idx] = mmu_l2_bm[map_idx]; - break; - default: - WARN_ON(true); - } + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + const u64 blk_type = kbase_hwcnt_metadata_block_type(metadata, blk); + u64 *blk_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + size_t map_idx; + + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; + + switch ((enum kbase_hwcnt_gpu_v5_block_type)blk_type) { + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: + /* Nothing to do in this case. */ + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3: + blk_map[map_idx] = src->fe_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER: + blk_map[map_idx] = src->tiler_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3: + blk_map[map_idx] = src->shader_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: + blk_map[map_idx] = src->mmu_l2_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: + blk_map[map_idx] = src->fw_bm[map_idx]; + break; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: + fallthrough; + case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: + blk_map[map_idx] = src->csg_bm[map_idx]; + break; + default: + WARN(true, "Invalid block type %llu", blk_type); } - } else { - WARN_ON(true); } } } @@ -699,40 +913,34 @@ void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, const struct kbase_hwcnt_enable_map *enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!buf) || WARN_ON(!enable_map) || WARN_ON(buf->metadata != enable_map->metadata)) return; metadata = buf->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - const u64 grp_type = kbase_hwcnt_metadata_group_type(metadata, grp); - u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + u64 *buf_blk = kbase_hwcnt_dump_buffer_block_instance(buf, blk, blk_inst); const u64 *blk_map = - kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst); - if ((enum kbase_hwcnt_gpu_group_type)grp_type == KBASE_HWCNT_GPU_GROUP_TYPE_V5) { - const size_t map_stride = - kbase_hwcnt_metadata_block_enable_map_stride(metadata, grp, blk); - u64 prfcnt_bm[EM_COUNT] = { 0 }; - u32 prfcnt_en = 0; - size_t map_idx; + const size_t map_stride = + kbase_hwcnt_metadata_block_enable_map_stride(metadata, blk); + u64 prfcnt_bm[EM_COUNT] = { 0 }; + u32 prfcnt_en = 0; + size_t map_idx; - for (map_idx = 0; map_idx < map_stride; ++map_idx) { - if (WARN_ON(map_idx >= EM_COUNT)) - break; + for (map_idx = 0; map_idx < map_stride; ++map_idx) { + if (WARN_ON(map_idx >= EM_COUNT)) + break; - prfcnt_bm[map_idx] = blk_map[map_idx]; - } + prfcnt_bm[map_idx] = blk_map[map_idx]; + } - prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], - prfcnt_bm[EM_HI]); + prfcnt_en = kbase_hwcnt_backend_gpu_block_map_to_physical(prfcnt_bm[EM_LO], + prfcnt_bm[EM_HI]); - buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; - } else { - WARN_ON(true); - } + buf_blk[KBASE_HWCNT_V5_PRFCNT_EN_HEADER] = prfcnt_en; } } diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h index a49c31e..4339fdd 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KBASE_HWCNT_GPU_H_ #define _KBASE_HWCNT_GPU_H_ +#include "hwcnt/mali_kbase_hwcnt_types.h" + #include #include @@ -31,10 +33,10 @@ struct kbase_hwcnt_enable_map; struct kbase_hwcnt_dump_buffer; /* Hardware counter version 5 definitions, V5 is the only supported version. */ -#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 4 +#define KBASE_HWCNT_V5_BLOCK_TYPE_COUNT 7 #define KBASE_HWCNT_V5_HEADERS_PER_BLOCK 4 #define KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK 60 -#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ +#define KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK \ (KBASE_HWCNT_V5_HEADERS_PER_BLOCK + KBASE_HWCNT_V5_DEFAULT_COUNTERS_PER_BLOCK) /* FrontEnd block count in V5 GPU hardware counter. */ @@ -48,15 +50,6 @@ struct kbase_hwcnt_dump_buffer; /* Number of bytes for each counter value in hardware. */ #define KBASE_HWCNT_VALUE_HW_BYTES (sizeof(u32)) -/** - * enum kbase_hwcnt_gpu_group_type - GPU hardware counter group types, used to - * identify metadata groups. - * @KBASE_HWCNT_GPU_GROUP_TYPE_V5: GPU V5 group type. - */ -enum kbase_hwcnt_gpu_group_type { - KBASE_HWCNT_GPU_GROUP_TYPE_V5, -}; - /** * enum kbase_hwcnt_gpu_v5_block_type - GPU V5 hardware counter block types, * used to identify metadata blocks. @@ -79,6 +72,14 @@ enum kbase_hwcnt_gpu_group_type { * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS: Memsys block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2: Secondary Memsys block. * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED: Undefined Memsys block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW: FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2: Secondary FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3: Tertiary FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED: Undefined FW block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG: CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2: Secondary CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3: Tertiary CSG block. + * @KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED: Undefined CSG block. */ enum kbase_hwcnt_gpu_v5_block_type { KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE, @@ -94,6 +95,14 @@ enum kbase_hwcnt_gpu_v5_block_type { KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2, KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FW_UNDEFINED, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG2, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG3, + KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_CSG_UNDEFINED, }; /** @@ -117,12 +126,34 @@ enum kbase_hwcnt_set { * @shader_bm: Shader counters selection bitmask. * @tiler_bm: Tiler counters selection bitmask. * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @fw_bm: CSF firmware counters selection bitmask. + * @csg_bm: CSF CSG counters selection bitmask. */ struct kbase_hwcnt_physical_enable_map { u32 fe_bm; u32 shader_bm; u32 tiler_bm; u32 mmu_l2_bm; + u32 fw_bm; + u32 csg_bm; +}; + +/** + * struct kbase_hwcnt_enable_cm - 128-bit enable counter masks. + * @fe_bm: Front end (JM/CSHW) counters selection bitmask. + * @shader_bm: Shader counters selection bitmask. + * @tiler_bm: Tiler counters selection bitmask. + * @mmu_l2_bm: MMU_L2 counters selection bitmask. + * @fw_bm: CSF firmware counters selection bitmask. + * @csg_bm: CSF CSG counters selection bitmask. + */ +struct kbase_hwcnt_enable_cm { + u64 fe_bm[2]; + u64 shader_bm[2]; + u64 tiler_bm[2]; + u64 mmu_l2_bm[2]; + u64 fw_bm[2]; + u64 csg_bm[2]; }; /* @@ -140,14 +171,18 @@ enum kbase_hwcnt_physical_set { * @l2_count: L2 cache count. * @core_mask: Shader core mask. May be sparse. * @clk_cnt: Number of clock domains available. + * @csg_cnt: Number of CSGs available. * @prfcnt_values_per_block: Total entries (header + counters) of performance * counter per block. + * @has_fw_counters: Whether the GPU has FW counters available. */ struct kbase_hwcnt_gpu_info { size_t l2_count; u64 core_mask; u8 clk_cnt; + u8 csg_cnt; size_t prfcnt_values_per_block; + bool has_fw_counters; }; /** @@ -197,18 +232,12 @@ struct kbase_hwcnt_curr_config { /** * kbase_hwcnt_is_block_type_undefined() - Check if a block type is undefined. * - * @grp_type: Hardware counter group type. * @blk_type: Hardware counter block type. * * Return: true if the block type is undefined, else false. */ -static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t grp_type, - const uint64_t blk_type) +static inline bool kbase_hwcnt_is_block_type_undefined(const uint64_t blk_type) { - /* Warn on unknown group type */ - if (WARN_ON(grp_type != KBASE_HWCNT_GPU_GROUP_TYPE_V5)) - return false; - return (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED || blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED || @@ -264,16 +293,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat * kbase_hwcnt_jm_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of destination dump - * buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @pm_core_mask: PM state synchronized shaders core mask with the dump. - * @curr_config: Current allocated hardware resources to correctly map the - * source raw dump buffer to the destination dump buffer. - * @accumulate: True if counters in source should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of destination dump + * buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @pm_core_mask: PM state synchronized shaders core mask with the dump. + * @debug_core_mask: User-set mask of cores to be used by the GPU. + * @max_core_mask: Core mask of all cores allocated to the GPU (non + * virtualized platforms) or resource group (virtualized + * platforms). + * @max_l2_slices: Maximum number of L2 slices allocated to the GPU (non + * virtualised platforms) or resource group (virtualized + * platforms). + * @curr_config: Current allocated hardware resources to correctly map the + * source raw dump buffer to the destination dump buffer. + * @accumulate: True if counters in source should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_jm_metadata_create as was used to get @@ -283,19 +319,23 @@ void kbase_hwcnt_csf_metadata_destroy(const struct kbase_hwcnt_metadata *metadat */ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, const struct kbase_hwcnt_enable_map *dst_enable_map, - const u64 pm_core_mask, - const struct kbase_hwcnt_curr_config *curr_config, bool accumulate); + const u64 pm_core_mask, u64 debug_core_mask, u64 max_core_mask, + size_t max_l2_slices, const struct kbase_hwcnt_curr_config *curr_config, + bool accumulate); /** * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw * dump buffer in src into the dump buffer * abstraction in dst. - * @dst: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source raw dump buffer, of same length - * as dump_buf_bytes in the metadata of dst dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * @accumulate: True if counters in src should be accumulated into - * destination, rather than copied. + * @dst: Non-NULL pointer to destination dump buffer. + * @src: Non-NULL pointer to source raw dump buffer, of same length + * as dump_buf_bytes in the metadata of dst dump buffer. + * @src_block_stt: Non-NULL pointer to source block state buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. + * @accumulate: True if counters in src should be accumulated into + * destination, rather than copied. * * The dst and dst_enable_map MUST have been created from the same metadata as * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get @@ -304,7 +344,9 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, * Return: 0 on success, else error code. */ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src, - const struct kbase_hwcnt_enable_map *dst_enable_map, bool accumulate); + blk_stt_t *src_block_stt, + const struct kbase_hwcnt_enable_map *dst_enable_map, + size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate); /** * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block @@ -404,4 +446,23 @@ void kbase_hwcnt_gpu_enable_map_from_physical(struct kbase_hwcnt_enable_map *dst void kbase_hwcnt_gpu_patch_dump_headers(struct kbase_hwcnt_dump_buffer *buf, const struct kbase_hwcnt_enable_map *enable_map); +bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + +bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + +bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type); + +bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type); +/** + * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from + * counter selection bitmasks. + * @dst: Non-NULL pointer to destination enable map abstraction. + * @src: Non-NULL pointer to source counter selection bitmasks. + * + * The dst must have been created from a metadata returned from a call to + * kbase_hwcnt_jm_metadata_create or kbase_hwcnt_csf_metadata_create. + */ +void kbase_hwcnt_gpu_enable_map_from_cm(struct kbase_hwcnt_enable_map *dst, + const struct kbase_hwcnt_enable_cm *src); + #endif /* _KBASE_HWCNT_GPU_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c deleted file mode 100644 index 0cf2f94..0000000 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.c +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note -/* - * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#include "hwcnt/mali_kbase_hwcnt_gpu.h" -#include "hwcnt/mali_kbase_hwcnt_gpu_narrow.h" - -#include -#include -#include - -int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md) -{ - struct kbase_hwcnt_description desc; - struct kbase_hwcnt_group_description group; - struct kbase_hwcnt_block_description blks[KBASE_HWCNT_V5_BLOCK_TYPE_COUNT]; - size_t prfcnt_values_per_block; - size_t blk; - int err; - struct kbase_hwcnt_metadata_narrow *metadata_narrow; - - if (!dst_md_narrow || !src_md || !src_md->grp_metadata || - !src_md->grp_metadata[0].blk_metadata) - return -EINVAL; - - /* Only support 1 group count and KBASE_HWCNT_V5_BLOCK_TYPE_COUNT block - * count in the metadata. - */ - if ((kbase_hwcnt_metadata_group_count(src_md) != 1) || - (kbase_hwcnt_metadata_block_count(src_md, 0) != KBASE_HWCNT_V5_BLOCK_TYPE_COUNT)) - return -EINVAL; - - /* Get the values count in the first block. */ - prfcnt_values_per_block = kbase_hwcnt_metadata_block_values_count(src_md, 0, 0); - - /* check all blocks should have same values count. */ - for (blk = 1; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(src_md, 0, blk); - if (val_cnt != prfcnt_values_per_block) - return -EINVAL; - } - - /* Only support 64 and 128 entries per block. */ - if ((prfcnt_values_per_block != 64) && (prfcnt_values_per_block != 128)) - return -EINVAL; - - metadata_narrow = kmalloc(sizeof(*metadata_narrow), GFP_KERNEL); - if (!metadata_narrow) - return -ENOMEM; - - /* Narrow to 64 entries per block to keep API backward compatibility. */ - prfcnt_values_per_block = 64; - - for (blk = 0; blk < KBASE_HWCNT_V5_BLOCK_TYPE_COUNT; blk++) { - size_t blk_hdr_cnt = kbase_hwcnt_metadata_block_headers_count(src_md, 0, blk); - blks[blk] = (struct kbase_hwcnt_block_description){ - .type = kbase_hwcnt_metadata_block_type(src_md, 0, blk), - .inst_cnt = kbase_hwcnt_metadata_block_instance_count(src_md, 0, blk), - .hdr_cnt = blk_hdr_cnt, - .ctr_cnt = prfcnt_values_per_block - blk_hdr_cnt, - }; - } - - group = (struct kbase_hwcnt_group_description){ - .type = kbase_hwcnt_metadata_group_type(src_md, 0), - .blk_cnt = KBASE_HWCNT_V5_BLOCK_TYPE_COUNT, - .blks = blks, - }; - - desc = (struct kbase_hwcnt_description){ - .grp_cnt = kbase_hwcnt_metadata_group_count(src_md), - .avail_mask = src_md->avail_mask, - .clk_cnt = src_md->clk_cnt, - .grps = &group, - }; - - err = kbase_hwcnt_metadata_create(&desc, &metadata_narrow->metadata); - if (!err) { - /* Narrow down the buffer size to half as the narrowed metadata - * only supports 32-bit but the created metadata uses 64-bit for - * block entry. - */ - metadata_narrow->dump_buf_bytes = metadata_narrow->metadata->dump_buf_bytes >> 1; - *dst_md_narrow = metadata_narrow; - } else { - kfree(metadata_narrow); - } - - return err; -} - -void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow) -{ - if (!md_narrow) - return; - - kbase_hwcnt_metadata_destroy(md_narrow->metadata); - kfree(md_narrow); -} - -int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf) -{ - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - u8 *buf; - - if (!md_narrow || !dump_buf) - return -EINVAL; - - dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * md_narrow->metadata->clk_cnt; - - /* Make a single allocation for both dump_buf and clk_cnt_buf. */ - buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - *dump_buf = (struct kbase_hwcnt_dump_buffer_narrow){ - .md_narrow = md_narrow, - .dump_buf = (u32 *)buf, - .clk_cnt_buf = (u64 *)(buf + dump_buf_bytes), - }; - - return 0; -} - -void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf_narrow) -{ - if (!dump_buf_narrow) - return; - - kfree(dump_buf_narrow->dump_buf); - *dump_buf_narrow = (struct kbase_hwcnt_dump_buffer_narrow){ .md_narrow = NULL, - .dump_buf = NULL, - .clk_cnt_buf = NULL }; -} - -int kbase_hwcnt_dump_buffer_narrow_array_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) -{ - struct kbase_hwcnt_dump_buffer_narrow *buffers; - size_t buf_idx; - unsigned int order; - unsigned long addr; - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - size_t total_dump_buf_size; - - if (!md_narrow || !dump_bufs) - return -EINVAL; - - dump_buf_bytes = md_narrow->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * md_narrow->metadata->clk_cnt; - - /* Allocate memory for the dump buffer struct array */ - buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); - if (!buffers) - return -ENOMEM; - - /* Allocate pages for the actual dump buffers, as they tend to be fairly - * large. - */ - order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - - if (!addr) { - kfree(buffers); - return -ENOMEM; - } - - *dump_bufs = (struct kbase_hwcnt_dump_buffer_narrow_array){ - .page_addr = addr, - .page_order = order, - .buf_cnt = n, - .bufs = buffers, - }; - - total_dump_buf_size = dump_buf_bytes * n; - /* Set the buffer of each dump buf */ - for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t dump_buf_offset = dump_buf_bytes * buf_idx; - const size_t clk_cnt_buf_offset = - total_dump_buf_size + (clk_cnt_buf_bytes * buf_idx); - - buffers[buf_idx] = (struct kbase_hwcnt_dump_buffer_narrow){ - .md_narrow = md_narrow, - .dump_buf = (u32 *)(addr + dump_buf_offset), - .clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset), - }; - } - - return 0; -} - -void kbase_hwcnt_dump_buffer_narrow_array_free( - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs) -{ - if (!dump_bufs) - return; - - kfree(dump_bufs->bufs); - free_pages(dump_bufs->page_addr, dump_bufs->page_order); - memset(dump_bufs, 0, sizeof(*dump_bufs)); -} - -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, - const u64 *blk_em, size_t val_cnt) -{ - size_t val; - - for (val = 0; val < val_cnt; val++) { - bool val_enabled = kbase_hwcnt_enable_map_block_value_enabled(blk_em, val); - u32 src_val = (src_blk[val] > U32_MAX) ? U32_MAX : (u32)src_blk[val]; - - dst_blk[val] = val_enabled ? src_val : 0; - } -} - -void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map) -{ - const struct kbase_hwcnt_metadata_narrow *metadata_narrow; - size_t grp; - size_t clk; - - if (WARN_ON(!dst_narrow) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || - WARN_ON(dst_narrow->md_narrow->metadata == src->metadata) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_cnt != src->metadata->grp_cnt) || - WARN_ON(src->metadata->grp_cnt != 1) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != - src->metadata->grp_metadata[0].blk_cnt) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_cnt != - KBASE_HWCNT_V5_BLOCK_TYPE_COUNT) || - WARN_ON(dst_narrow->md_narrow->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt > - src->metadata->grp_metadata[0].blk_metadata[0].ctr_cnt)) - return; - - /* Don't use src metadata since src buffer is bigger than dst buffer. */ - metadata_narrow = dst_narrow->md_narrow; - - for (grp = 0; grp < kbase_hwcnt_metadata_narrow_group_count(metadata_narrow); grp++) { - size_t blk; - size_t blk_cnt = kbase_hwcnt_metadata_narrow_block_count(metadata_narrow, grp); - - for (blk = 0; blk < blk_cnt; blk++) { - size_t blk_inst; - size_t blk_inst_cnt = kbase_hwcnt_metadata_narrow_block_instance_count( - metadata_narrow, grp, blk); - - for (blk_inst = 0; blk_inst < blk_inst_cnt; blk_inst++) { - /* The narrowed down buffer is only 32-bit. */ - u32 *dst_blk = kbase_hwcnt_dump_buffer_narrow_block_instance( - dst_narrow, grp, blk, blk_inst); - const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance( - src, grp, blk, blk_inst); - const u64 *blk_em = kbase_hwcnt_enable_map_block_instance( - dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_narrow_block_values_count( - metadata_narrow, grp, blk); - /* Align upwards to include padding bytes */ - val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( - val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / - KBASE_HWCNT_VALUE_BYTES)); - - kbase_hwcnt_dump_buffer_block_copy_strict_narrow(dst_blk, src_blk, - blk_em, val_cnt); - } - } - } - - for (clk = 0; clk < metadata_narrow->metadata->clk_cnt; clk++) { - bool clk_enabled = - kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); - - dst_narrow->clk_cnt_buf[clk] = clk_enabled ? src->clk_cnt_buf[clk] : 0; - } -} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h deleted file mode 100644 index afd236d..0000000 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu_narrow.h +++ /dev/null @@ -1,330 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_HWCNT_GPU_NARROW_H_ -#define _KBASE_HWCNT_GPU_NARROW_H_ - -#include "hwcnt/mali_kbase_hwcnt_types.h" -#include - -struct kbase_device; -struct kbase_hwcnt_metadata; -struct kbase_hwcnt_enable_map; -struct kbase_hwcnt_dump_buffer; - -/** - * struct kbase_hwcnt_metadata_narrow - Narrow metadata describing the physical - * layout of narrow dump buffers. - * For backward compatibility, the narrow - * metadata only supports 64 counters per - * block and 32-bit per block entry. - * @metadata: Non-NULL pointer to the metadata before narrow down to - * 32-bit per block entry, it has 64 counters per block and - * 64-bit per value. - * @dump_buf_bytes: The size in bytes after narrow 64-bit to 32-bit per block - * entry. - */ -struct kbase_hwcnt_metadata_narrow { - const struct kbase_hwcnt_metadata *metadata; - size_t dump_buf_bytes; -}; - -/** - * struct kbase_hwcnt_dump_buffer_narrow - Hardware counter narrow dump buffer. - * @md_narrow: Non-NULL pointer to narrow metadata used to identify, and to - * describe the layout of the narrow dump buffer. - * @dump_buf: Non-NULL pointer to an array of u32 values, the array size - * is md_narrow->dump_buf_bytes. - * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed - * for each clock domain. - */ -struct kbase_hwcnt_dump_buffer_narrow { - const struct kbase_hwcnt_metadata_narrow *md_narrow; - u32 *dump_buf; - u64 *clk_cnt_buf; -}; - -/** - * struct kbase_hwcnt_dump_buffer_narrow_array - Hardware counter narrow dump - * buffer array. - * @page_addr: Address of first allocated page. A single allocation is used for - * all narrow dump buffers in the array. - * @page_order: The allocation order of the pages, the order is on a logarithmic - * scale. - * @buf_cnt: The number of allocated dump buffers. - * @bufs: Non-NULL pointer to the array of narrow dump buffer descriptors. - */ -struct kbase_hwcnt_dump_buffer_narrow_array { - unsigned long page_addr; - unsigned int page_order; - size_t buf_cnt; - struct kbase_hwcnt_dump_buffer_narrow *bufs; -}; - -/** - * kbase_hwcnt_metadata_narrow_group_count() - Get the number of groups from - * narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * - * Return: Number of hardware counter groups described by narrow metadata. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_group_count(const struct kbase_hwcnt_metadata_narrow *md_narrow) -{ - return kbase_hwcnt_metadata_group_count(md_narrow->metadata); -} - -/** - * kbase_hwcnt_metadata_narrow_group_type() - Get the arbitrary type of a group - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * - * Return: Type of the group grp. - */ -static inline u64 -kbase_hwcnt_metadata_narrow_group_type(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp) -{ - return kbase_hwcnt_metadata_group_type(md_narrow->metadata, grp); -} - -/** - * kbase_hwcnt_metadata_narrow_block_count() - Get the number of blocks in a - * group from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * - * Return: Number of blocks in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp) -{ - return kbase_hwcnt_metadata_block_count(md_narrow->metadata, grp); -} - -/** - * kbase_hwcnt_metadata_narrow_block_instance_count() - Get the number of - * instances of a block - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of instances of block blk in group grp. - */ -static inline size_t kbase_hwcnt_metadata_narrow_block_instance_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_instance_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_headers_count() - Get the number of counter - * headers from narrow - * metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of counter headers in each instance of block blk in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_headers_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_headers_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_counters_count() - Get the number of - * counters from narrow - * metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of counters in each instance of block blk in group grp. - */ -static inline size_t kbase_hwcnt_metadata_narrow_block_counters_count( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_block_counters_count(md_narrow->metadata, grp, blk); -} - -/** - * kbase_hwcnt_metadata_narrow_block_values_count() - Get the number of values - * from narrow metadata. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * - * Return: Number of headers plus counters in each instance of block blk - * in group grp. - */ -static inline size_t -kbase_hwcnt_metadata_narrow_block_values_count(const struct kbase_hwcnt_metadata_narrow *md_narrow, - size_t grp, size_t blk) -{ - return kbase_hwcnt_metadata_narrow_block_counters_count(md_narrow, grp, blk) + - kbase_hwcnt_metadata_narrow_block_headers_count(md_narrow, grp, blk); -} - -/** - * kbase_hwcnt_dump_buffer_narrow_block_instance() - Get the pointer to a - * narrowed block instance's - * dump buffer. - * @buf: Non-NULL pointer to narrow dump buffer. - * @grp: Index of the group in the narrow metadata. - * @blk: Index of the block in the group. - * @blk_inst: Index of the block instance in the block. - * - * Return: u32* to the dump buffer for the block instance. - */ -static inline u32 * -kbase_hwcnt_dump_buffer_narrow_block_instance(const struct kbase_hwcnt_dump_buffer_narrow *buf, - size_t grp, size_t blk, size_t blk_inst) -{ - return buf->dump_buf + buf->md_narrow->metadata->grp_metadata[grp].dump_buf_index + - buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + - (buf->md_narrow->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * - blk_inst); -} - -/** - * kbase_hwcnt_gpu_metadata_narrow_create() - Create HWC metadata with HWC - * entries per block truncated to - * 64 entries and block entry size - * narrowed down to 32-bit. - * - * @dst_md_narrow: Non-NULL pointer to where created narrow metadata is stored - * on success. - * @src_md: Non-NULL pointer to the HWC metadata used as the source to - * create dst_md_narrow. - * - * For backward compatibility of the interface to user clients, a new metadata - * with entries per block truncated to 64 and block entry size narrowed down - * to 32-bit will be created for dst_md_narrow. - * The total entries per block in src_md must be 64 or 128, if it's other - * values, function returns error since it's not supported. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_gpu_metadata_narrow_create(const struct kbase_hwcnt_metadata_narrow **dst_md_narrow, - const struct kbase_hwcnt_metadata *src_md); - -/** - * kbase_hwcnt_gpu_metadata_narrow_destroy() - Destroy a hardware counter narrow - * metadata object. - * @md_narrow: Pointer to hardware counter narrow metadata. - */ -void kbase_hwcnt_gpu_metadata_narrow_destroy(const struct kbase_hwcnt_metadata_narrow *md_narrow); - -/** - * kbase_hwcnt_dump_buffer_narrow_alloc() - Allocate a narrow dump buffer. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @dump_buf: Non-NULL pointer to narrow dump buffer to be initialised. Will be - * initialised to undefined values, so must be used as a copy - * destination, or cleared before use. - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_narrow_alloc(const struct kbase_hwcnt_metadata_narrow *md_narrow, - struct kbase_hwcnt_dump_buffer_narrow *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_narrow_free() - Free a narrow dump buffer. - * @dump_buf: Dump buffer to be freed. - * - * Can be safely called on an all-zeroed narrow dump buffer structure, or on an - * already freed narrow dump buffer. - */ -void kbase_hwcnt_dump_buffer_narrow_free(struct kbase_hwcnt_dump_buffer_narrow *dump_buf); - -/** - * kbase_hwcnt_dump_buffer_narrow_array_alloc() - Allocate an array of narrow - * dump buffers. - * @md_narrow: Non-NULL pointer to narrow metadata. - * @n: Number of narrow dump buffers to allocate - * @dump_bufs: Non-NULL pointer to a kbase_hwcnt_dump_buffer_narrow_array - * object to be initialised. - * - * A single zeroed contiguous page allocation will be used for all of the - * buffers inside the object, where: - * dump_bufs->bufs[n].dump_buf == page_addr + n * md_narrow.dump_buf_bytes - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_narrow_array_alloc( - const struct kbase_hwcnt_metadata_narrow *md_narrow, size_t n, - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_narrow_array_free() - Free a narrow dump buffer - * array. - * @dump_bufs: Narrow Dump buffer array to be freed. - * - * Can be safely called on an all-zeroed narrow dump buffer array structure, or - * on an already freed narrow dump buffer array. - */ -void kbase_hwcnt_dump_buffer_narrow_array_free( - struct kbase_hwcnt_dump_buffer_narrow_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_block_copy_strict_narrow() - Copy all enabled block - * values from source to - * destination. - * @dst_blk: Non-NULL pointer to destination block obtained from a call to - * kbase_hwcnt_dump_buffer_narrow_block_instance. - * @src_blk: Non-NULL pointer to source block obtained from a call to - * kbase_hwcnt_dump_buffer_block_instance. - * @blk_em: Non-NULL pointer to the block bitfield(s) obtained from a call to - * kbase_hwcnt_enable_map_block_instance. - * @val_cnt: Number of values in the block. - * - * After the copy, any disabled values in destination will be zero, the enabled - * values in destination will be saturated at U32_MAX if the corresponding - * source value is bigger than U32_MAX, or copy the value from source if the - * corresponding source value is less than or equal to U32_MAX. - */ -void kbase_hwcnt_dump_buffer_block_copy_strict_narrow(u32 *dst_blk, const u64 *src_blk, - const u64 *blk_em, size_t val_cnt); - -/** - * kbase_hwcnt_dump_buffer_copy_strict_narrow() - Copy all enabled values to a - * narrow dump buffer. - * @dst_narrow: Non-NULL pointer to destination dump buffer. - * @src: Non-NULL pointer to source dump buffer. - * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. - * - * After the operation, all non-enabled values (including padding bytes) will be - * zero. Slower than the non-strict variant. - * - * The enabled values in dst_narrow will be saturated at U32_MAX if the - * corresponding source value is bigger than U32_MAX, or copy the value from - * source if the corresponding source value is less than or equal to U32_MAX. - */ -void kbase_hwcnt_dump_buffer_copy_strict_narrow(struct kbase_hwcnt_dump_buffer_narrow *dst_narrow, - const struct kbase_hwcnt_dump_buffer *src, - const struct kbase_hwcnt_enable_map *dst_enable_map); - -#endif /* _KBASE_HWCNT_GPU_NARROW_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c index 763eb31..3d0ad5a 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,15 +27,15 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, const struct kbase_hwcnt_metadata **out_metadata) { char *buf; + size_t blk; struct kbase_hwcnt_metadata *metadata; - struct kbase_hwcnt_group_metadata *grp_mds; - size_t grp; - size_t enable_map_count; /* Number of u64 bitfields (inc padding) */ - size_t dump_buf_count; /* Number of u64 values (inc padding) */ - size_t avail_mask_bits; /* Number of availability mask bits */ - - size_t size; + struct kbase_hwcnt_block_metadata *blk_mds; + size_t enable_map_count = 0; /* Number of u64 bitfields (inc padding) */ + size_t dump_buf_count = 0; /* Number of u64 values (inc padding) */ + size_t avail_mask_bits = 0; + size_t state_count = 0; size_t offset; + size_t size; if (!desc || !out_metadata) return -EINVAL; @@ -50,13 +50,8 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, size = 0; size += sizeof(struct kbase_hwcnt_metadata); - /* Group metadata */ - size += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; - /* Block metadata */ - for (grp = 0; grp < desc->grp_cnt; grp++) { - size += sizeof(struct kbase_hwcnt_block_metadata) * desc->grps[grp].blk_cnt; - } + size += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt; /* Single allocation for the entire metadata */ buf = kmalloc(size, GFP_KERNEL); @@ -70,79 +65,59 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, metadata = (struct kbase_hwcnt_metadata *)(buf + offset); offset += sizeof(struct kbase_hwcnt_metadata); - /* Bump allocate the group metadata */ - grp_mds = (struct kbase_hwcnt_group_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_group_metadata) * desc->grp_cnt; - - enable_map_count = 0; - dump_buf_count = 0; - avail_mask_bits = 0; - - for (grp = 0; grp < desc->grp_cnt; grp++) { - size_t blk; - - const struct kbase_hwcnt_group_description *grp_desc = desc->grps + grp; - struct kbase_hwcnt_group_metadata *grp_md = grp_mds + grp; - - size_t group_enable_map_count = 0; - size_t group_dump_buffer_count = 0; - size_t group_avail_mask_bits = 0; - - /* Bump allocate this group's block metadata */ - struct kbase_hwcnt_block_metadata *blk_mds = - (struct kbase_hwcnt_block_metadata *)(buf + offset); - offset += sizeof(struct kbase_hwcnt_block_metadata) * grp_desc->blk_cnt; - - /* Fill in each block in the group's information */ - for (blk = 0; blk < grp_desc->blk_cnt; blk++) { - const struct kbase_hwcnt_block_description *blk_desc = grp_desc->blks + blk; - struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; - const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; - - blk_md->type = blk_desc->type; - blk_md->inst_cnt = blk_desc->inst_cnt; - blk_md->hdr_cnt = blk_desc->hdr_cnt; - blk_md->ctr_cnt = blk_desc->ctr_cnt; - blk_md->enable_map_index = group_enable_map_count; - blk_md->enable_map_stride = kbase_hwcnt_bitfield_count(n_values); - blk_md->dump_buf_index = group_dump_buffer_count; - blk_md->dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( + /* Bump allocate the block metadata */ + blk_mds = (struct kbase_hwcnt_block_metadata *)(buf + offset); + offset += sizeof(struct kbase_hwcnt_block_metadata) * desc->blk_cnt; + + /* Fill in each block */ + for (blk = 0; blk < desc->blk_cnt; blk++) { + const struct kbase_hwcnt_block_description *blk_desc = desc->blks + blk; + struct kbase_hwcnt_block_metadata *blk_md = blk_mds + blk; + const size_t n_values = blk_desc->hdr_cnt + blk_desc->ctr_cnt; + + *blk_md = (struct kbase_hwcnt_block_metadata){ + .type = blk_desc->type, + .inst_cnt = blk_desc->inst_cnt, + .hdr_cnt = blk_desc->hdr_cnt, + .ctr_cnt = blk_desc->ctr_cnt, + .enable_map_index = enable_map_count, + .enable_map_stride = kbase_hwcnt_bitfield_count(n_values), + .dump_buf_index = dump_buf_count, + .dump_buf_stride = KBASE_HWCNT_ALIGN_UPWARDS( n_values, - (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - blk_md->avail_mask_index = group_avail_mask_bits; - - group_enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; - group_dump_buffer_count += blk_md->dump_buf_stride * blk_md->inst_cnt; - group_avail_mask_bits += blk_md->inst_cnt; - } - - /* Fill in the group's information */ - grp_md->type = grp_desc->type; - grp_md->blk_cnt = grp_desc->blk_cnt; - grp_md->blk_metadata = blk_mds; - grp_md->enable_map_index = enable_map_count; - grp_md->dump_buf_index = dump_buf_count; - grp_md->avail_mask_index = avail_mask_bits; - - enable_map_count += group_enable_map_count; - dump_buf_count += group_dump_buffer_count; - avail_mask_bits += group_avail_mask_bits; + (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)), + .avail_mask_index = avail_mask_bits, + .blk_stt_index = state_count, + .blk_stt_stride = KBASE_HWCNT_BLOCK_STATE_STRIDE, + }; + + enable_map_count += blk_md->enable_map_stride * blk_md->inst_cnt; + dump_buf_count += blk_md->dump_buf_stride * blk_md->inst_cnt; + avail_mask_bits += blk_md->inst_cnt; + state_count += blk_md->inst_cnt * blk_md->blk_stt_stride; } /* Fill in the top level metadata's information */ - metadata->grp_cnt = desc->grp_cnt; - metadata->grp_metadata = grp_mds; - metadata->enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES; - metadata->dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES; - metadata->avail_mask = desc->avail_mask; - metadata->clk_cnt = desc->clk_cnt; - - WARN_ON(size != offset); + *metadata = (struct kbase_hwcnt_metadata){ + .blk_cnt = desc->blk_cnt, + .blk_metadata = blk_mds, + .enable_map_bytes = enable_map_count * KBASE_HWCNT_BITFIELD_BYTES, + .dump_buf_bytes = dump_buf_count * KBASE_HWCNT_VALUE_BYTES, + .blk_stt_bytes = state_count * KBASE_HWCNT_BLOCK_STATE_BYTES, + .clk_cnt = desc->clk_cnt, + }; + + kbase_hwcnt_cp_avail_mask(&metadata->avail_mask, &desc->avail_mask); + + if (WARN_ON(size != offset)) + return -EINVAL; + /* Due to the block alignment, there should be exactly one enable map * bit per 4 bytes in the dump buffer. */ - WARN_ON(metadata->dump_buf_bytes != - (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES)); + if (WARN_ON(metadata->dump_buf_bytes != + (metadata->enable_map_bytes * BITS_PER_BYTE * KBASE_HWCNT_VALUE_BYTES))) + return -EINVAL; *out_metadata = metadata; return 0; @@ -189,6 +164,7 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, { size_t dump_buf_bytes; size_t clk_cnt_buf_bytes; + size_t block_state_bytes; u8 *buf; if (!metadata || !dump_buf) @@ -196,15 +172,17 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, dump_buf_bytes = metadata->dump_buf_bytes; clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt; + block_state_bytes = metadata->blk_stt_bytes; - /* Make a single allocation for both dump_buf and clk_cnt_buf. */ - buf = kmalloc(dump_buf_bytes + clk_cnt_buf_bytes, GFP_KERNEL); + /* Make a single allocation for dump_buf, clk_cnt_buf and block_state_buf. */ + buf = kzalloc(dump_buf_bytes + clk_cnt_buf_bytes + block_state_bytes, GFP_KERNEL); if (!buf) return -ENOMEM; dump_buf->metadata = metadata; dump_buf->dump_buf = (u64 *)buf; dump_buf->clk_cnt_buf = (u64 *)(buf + dump_buf_bytes); + dump_buf->blk_stt_buf = (blk_stt_t *)(buf + dump_buf_bytes + clk_cnt_buf_bytes); return 0; } @@ -218,72 +196,11 @@ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf) memset(dump_buf, 0, sizeof(*dump_buf)); } -int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - struct kbase_hwcnt_dump_buffer *buffers; - size_t buf_idx; - unsigned int order; - unsigned long addr; - size_t dump_buf_bytes; - size_t clk_cnt_buf_bytes; - - if (!metadata || !dump_bufs) - return -EINVAL; - - dump_buf_bytes = metadata->dump_buf_bytes; - clk_cnt_buf_bytes = sizeof(*dump_bufs->bufs->clk_cnt_buf) * metadata->clk_cnt; - - /* Allocate memory for the dump buffer struct array */ - buffers = kmalloc_array(n, sizeof(*buffers), GFP_KERNEL); - if (!buffers) - return -ENOMEM; - - /* Allocate pages for the actual dump buffers, as they tend to be fairly - * large. - */ - order = get_order((dump_buf_bytes + clk_cnt_buf_bytes) * n); - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - - if (!addr) { - kfree(buffers); - return -ENOMEM; - } - - dump_bufs->page_addr = addr; - dump_bufs->page_order = order; - dump_bufs->buf_cnt = n; - dump_bufs->bufs = buffers; - - /* Set the buffer of each dump buf */ - for (buf_idx = 0; buf_idx < n; buf_idx++) { - const size_t dump_buf_offset = dump_buf_bytes * buf_idx; - const size_t clk_cnt_buf_offset = - (dump_buf_bytes * n) + (clk_cnt_buf_bytes * buf_idx); - - buffers[buf_idx].metadata = metadata; - buffers[buf_idx].dump_buf = (u64 *)(addr + dump_buf_offset); - buffers[buf_idx].clk_cnt_buf = (u64 *)(addr + clk_cnt_buf_offset); - } - - return 0; -} - -void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs) -{ - if (!dump_bufs) - return; - - kfree(dump_bufs->bufs); - free_pages(dump_bufs->page_addr, dump_bufs->page_order); - memset(dump_bufs, 0, sizeof(*dump_bufs)); -} - void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) @@ -291,21 +208,21 @@ void kbase_hwcnt_dump_buffer_zero(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); } memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * metadata->clk_cnt); + memset(dst->blk_stt_buf, 0, metadata->blk_stt_bytes); } void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) @@ -314,15 +231,15 @@ void kbase_hwcnt_dump_buffer_zero_strict(struct kbase_hwcnt_dump_buffer *dst) return; memset(dst->dump_buf, 0, dst->metadata->dump_buf_bytes); - memset(dst->clk_cnt_buf, 0, sizeof(*dst->clk_cnt_buf) * dst->metadata->clk_cnt); + memset(dst->blk_stt_buf, 0, dst->metadata->blk_stt_bytes); } void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || WARN_ON(dst->metadata != dst_enable_map->metadata)) @@ -330,23 +247,29 @@ void kbase_hwcnt_dump_buffer_zero_non_enabled(struct kbase_hwcnt_dump_buffer *ds metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); - if (kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst)) { + if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst)) { /* Block available, so only zero non-enabled values */ kbase_hwcnt_dump_buffer_block_zero_non_enabled(dst_blk, blk_em, val_cnt); + + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + } else { /* Block not available, so zero the entire thing */ kbase_hwcnt_dump_buffer_block_zero(dst_blk, val_cnt); + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } } } @@ -356,7 +279,7 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -366,24 +289,27 @@ void kbase_hwcnt_dump_buffer_copy(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; const u64 *src_blk; + blk_stt_t *dst_blk_stt; + const blk_stt_t *src_blk_stt; size_t val_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); - val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); + dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); kbase_hwcnt_dump_buffer_block_copy(dst_blk, src_blk, val_cnt); + kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] = src->clk_cnt_buf[clk]; } @@ -394,7 +320,7 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -404,23 +330,30 @@ void kbase_hwcnt_dump_buffer_copy_strict(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - const u64 *src_blk = - kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + const blk_stt_t *src_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + size_t val_cnt = kbase_hwcnt_metadata_block_values_count(metadata, blk); + /* Align upwards to include padding bytes */ val_cnt = KBASE_HWCNT_ALIGN_UPWARDS( val_cnt, (KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT / KBASE_HWCNT_VALUE_BYTES)); kbase_hwcnt_dump_buffer_block_copy_strict(dst_blk, src_blk, blk_em, val_cnt); + + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_copy(dst_blk_stt, src_blk_stt); + else + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { bool clk_enabled = kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk); @@ -433,7 +366,7 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -443,26 +376,29 @@ void kbase_hwcnt_dump_buffer_accumulate(struct kbase_hwcnt_dump_buffer *dst, metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { u64 *dst_blk; const u64 *src_blk; + blk_stt_t *dst_blk_stt; + const blk_stt_t *src_blk_stt; size_t hdr_cnt; size_t ctr_cnt; - if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, grp, blk, blk_inst)) + if (!kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) continue; - dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - src_blk = kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); - hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); + dst_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + src_blk_stt = kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); + hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); kbase_hwcnt_dump_buffer_block_accumulate(dst_blk, src_blk, hdr_cnt, ctr_cnt); + kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; } @@ -473,7 +409,7 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d const struct kbase_hwcnt_enable_map *dst_enable_map) { const struct kbase_hwcnt_metadata *metadata; - size_t grp, blk, blk_inst; + size_t blk, blk_inst; size_t clk; if (WARN_ON(!dst) || WARN_ON(!src) || WARN_ON(!dst_enable_map) || WARN_ON(dst == src) || @@ -483,15 +419,19 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d metadata = dst->metadata; - kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) - { - u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst); - const u64 *src_blk = - kbase_hwcnt_dump_buffer_block_instance(src, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst); + const u64 *src_blk = kbase_hwcnt_dump_buffer_block_instance(src, blk, blk_inst); const u64 *blk_em = - kbase_hwcnt_enable_map_block_instance(dst_enable_map, grp, blk, blk_inst); - size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); - size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk); + kbase_hwcnt_enable_map_block_instance(dst_enable_map, blk, blk_inst); + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + const blk_stt_t *src_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(src, blk, blk_inst); + + size_t hdr_cnt = kbase_hwcnt_metadata_block_headers_count(metadata, blk); + size_t ctr_cnt = kbase_hwcnt_metadata_block_counters_count(metadata, blk); + /* Align upwards to include padding bytes */ ctr_cnt = KBASE_HWCNT_ALIGN_UPWARDS( hdr_cnt + ctr_cnt, @@ -499,13 +439,41 @@ void kbase_hwcnt_dump_buffer_accumulate_strict(struct kbase_hwcnt_dump_buffer *d kbase_hwcnt_dump_buffer_block_accumulate_strict(dst_blk, src_blk, blk_em, hdr_cnt, ctr_cnt); + + if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) + kbase_hwcnt_block_state_accumulate(dst_blk_stt, src_blk_stt); + else + kbase_hwcnt_block_state_set(dst_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); } - kbase_hwcnt_metadata_for_each_clock(metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) dst->clk_cnt_buf[clk] += src->clk_cnt_buf[clk]; else dst->clk_cnt_buf[clk] = 0; } } + +void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + blk_stt_t blk_stt_val) +{ + const struct kbase_hwcnt_metadata *metadata; + size_t blk, blk_inst; + + if (WARN_ON(!dst) || WARN_ON(!dst_enable_map) || + WARN_ON(dst->metadata != dst_enable_map->metadata)) + return; + + metadata = dst->metadata; + + kbase_hwcnt_metadata_for_each_block(metadata, blk, blk_inst) { + if (kbase_hwcnt_metadata_block_instance_avail(metadata, blk, blk_inst) && + kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) { + blk_stt_t *dst_blk_stt = + kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst); + + *dst_blk_stt |= blk_stt_val; + } + } +} diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h index 5c5ada4..c7afe17 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_types.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -34,12 +34,8 @@ * Terminology: * * Hardware Counter System: - * A collection of hardware counter groups, making a full hardware counter + * A collection of hardware counter blocks, making a full hardware counter * system. - * Hardware Counter Group: - * A group of Hardware Counter Blocks (e.g. a t62x might have more than one - * core group, so has one counter group per core group, where each group - * may have a different number and layout of counter blocks). * Hardware Counter Block: * A block of hardware counters (e.g. shader block, tiler block). * Hardware Counter Block Instance: @@ -59,10 +55,16 @@ * * Enable Map: * An array of u64 bitfields, where each bit either enables exactly one - * block value, or is unused (padding). + * block value, or is unused (padding). Note that this is derived from + * the client configuration, and is not obtained from the hardware. * Dump Buffer: * An array of u64 values, where each u64 corresponds either to one block * value, or is unused (padding). + * Block State Buffer: + * An array of blk_stt_t values, where each blk_stt_t corresponds to one block + * instance and is used to track the on/off power state transitions, as well has + * hardware resource availability, and whether the block was operating + * in normal or protected mode. * Availability Mask: * A bitfield, where each bit corresponds to whether a block instance is * physically available (e.g. an MP3 GPU may have a sparse core mask of @@ -74,7 +76,6 @@ * Metadata: * Structure describing the physical layout of the enable map and dump buffers * for a specific hardware counter system. - * */ #ifndef _KBASE_HWCNT_TYPES_H_ @@ -98,10 +99,14 @@ */ #define KBASE_HWCNT_VALUE_BYTES (sizeof(u64)) +/* Number of elements in the avail_mask aray, in kbase_hwcnt_metadata */ +#define KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT 2 + /* Number of bits in an availability mask (i.e. max total number of block * instances supported in a Hardware Counter System) */ -#define KBASE_HWCNT_AVAIL_MASK_BITS (sizeof(u64) * BITS_PER_BYTE) +#define KBASE_HWCNT_AVAIL_MASK_BITS \ + (sizeof(u64) * KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT * BITS_PER_BYTE) /* Minimum alignment of each block of hardware counters */ #define KBASE_HWCNT_BLOCK_BYTE_ALIGNMENT (KBASE_HWCNT_BITFIELD_BITS * KBASE_HWCNT_VALUE_BYTES) @@ -114,9 +119,60 @@ * Return: Input value if already aligned to the specified boundary, or next * (incrementing upwards) aligned value. */ -#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ +#define KBASE_HWCNT_ALIGN_UPWARDS(value, alignment) \ (value + ((alignment - (value % alignment)) % alignment)) +typedef u8 blk_stt_t; + +/* Number of bytes storing the per-block state transition information. */ +#define KBASE_HWCNT_BLOCK_STATE_BYTES (sizeof(blk_stt_t)) + +/* Number of entries of blk_stt_t used to store the block state. */ +#define KBASE_HWCNT_BLOCK_STATE_STRIDE (1) + +/* Block state indicating that the hardware block state was indeterminable + * or not set during the sampling period. + */ +#define KBASE_HWCNT_STATE_UNKNOWN ((blk_stt_t)(0)) + +/* Block state indicating that the hardware block was on or transitioned to on + * during the sampling period. + */ +#define KBASE_HWCNT_STATE_ON ((blk_stt_t)(1u << 0)) + +/* Block state indicating that the hardware block was off or transitioned to off + * during the sampling period. + */ +#define KBASE_HWCNT_STATE_OFF ((blk_stt_t)(1u << 1)) + +/* Block state indicating that the hardware block was available to the current + * VM for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_AVAILABLE ((blk_stt_t)(1u << 2)) + +/* Block state indicating that the hardware block was unavailable to the current + * VM for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_UNAVAILABLE ((blk_stt_t)(1u << 3)) + +/* Block state indicating that the hardware block was operating in normal mode + * for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_NORMAL ((blk_stt_t)(1u << 4)) + +/* Block state indicating that the hardware block was operating in protected mode + * for some portion of the sampling period. + */ +#define KBASE_HWCNT_STATE_PROTECTED ((blk_stt_t)(1u << 5)) + +/* For a valid block state with the above masks, only a maximum of + * KBASE_HWCNT_STATE_BITS can be set. + */ +#define KBASE_HWCNT_STATE_BITS (6) + +/* Mask to detect malformed block state bitmaps. */ +#define KBASE_HWCNT_STATE_MASK ((blk_stt_t)((1u << KBASE_HWCNT_STATE_BITS) - 1)) + /** * struct kbase_hwcnt_block_description - Description of one or more identical, * contiguous, Hardware Counter Blocks. @@ -133,31 +189,25 @@ struct kbase_hwcnt_block_description { }; /** - * struct kbase_hwcnt_group_description - Description of one or more identical, - * contiguous Hardware Counter Groups. - * @type: The arbitrary identifier used to identify the type of the group. - * @blk_cnt: The number of types of Hardware Counter Block in the group. - * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, - * describing each type of Hardware Counter Block in the group. - */ -struct kbase_hwcnt_group_description { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_description *blks; + * struct kbase_hwcnt_avail_mask - Mask type for HW Counter availablility. + * @mask: Array of bitmask elements. + */ +struct kbase_hwcnt_avail_mask { + u64 mask[KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT]; }; -/** +/* * struct kbase_hwcnt_description - Description of a Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grps: Non-NULL pointer to an array of grp_cnt group descriptions, - * describing each Hardware Counter Group in the system. + * @blk_cnt: The number of Hardware Counter Blocks. + * @blks: Non-NULL pointer to an array of blk_cnt block descriptions, + * describing each Hardware Counter Blocks in the system. * @avail_mask: Flat Availability Mask for all block instances in the system. * @clk_cnt: The number of clock domains in the system. The maximum is 64. */ struct kbase_hwcnt_description { - size_t grp_cnt; - const struct kbase_hwcnt_group_description *grps; - u64 avail_mask; + size_t blk_cnt; + const struct kbase_hwcnt_block_description *blks; + struct kbase_hwcnt_avail_mask avail_mask; u8 clk_cnt; }; @@ -183,6 +233,12 @@ struct kbase_hwcnt_description { * @avail_mask_index: Index in bits into the parent's Availability Mask where * the Availability Masks of the Block Instances described * by this metadata start. + * @blk_stt_index: Index in bits into the parent's Block State Buffer + * where the Block State Masks of the Block Instances described + * by this metadata start. + * @blk_stt_stride: Stride in the underly block state tracking type between + * the Block State bytes corresponding to each of the + * Block Instances. */ struct kbase_hwcnt_block_metadata { u64 type; @@ -194,58 +250,148 @@ struct kbase_hwcnt_block_metadata { size_t dump_buf_index; size_t dump_buf_stride; size_t avail_mask_index; + size_t blk_stt_index; + size_t blk_stt_stride; }; /** - * struct kbase_hwcnt_group_metadata - Metadata describing the physical layout - * of a group of blocks in a Hardware - * Counter System's Dump Buffers and Enable - * Maps. - * @type: The arbitrary identifier used to identify the type of the - * group. - * @blk_cnt: The number of types of Hardware Counter Block in the - * group. - * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, - * describing the physical layout of each type of Hardware - * Counter Block in the group. - * @enable_map_index: Index in u64s into the parent's Enable Map where the - * Enable Maps of the blocks within the group described by - * this metadata start. - * @dump_buf_index: Index in u64s into the parent's Dump Buffer where the - * Dump Buffers of the blocks within the group described by - * metadata start. - * @avail_mask_index: Index in bits into the parent's Availability Mask where - * the Availability Masks of the blocks within the group - * described by this metadata start. - */ -struct kbase_hwcnt_group_metadata { - u64 type; - size_t blk_cnt; - const struct kbase_hwcnt_block_metadata *blk_metadata; - size_t enable_map_index; - size_t dump_buf_index; - size_t avail_mask_index; -}; + * kbase_hwcnt_set_avail_mask() - Set bitfield values into a large bitmask. Convenience function. + * + * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements + * @u0: Value of element 0. + * @u1: Value of element 1 + */ +static inline void kbase_hwcnt_set_avail_mask(struct kbase_hwcnt_avail_mask *avail_mask, u64 u0, + u64 u1) +{ + /* If KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT gets updated, we must modify the signature of + * kbase_hwcnt_set_avail_mask() so that all elements continue to be set. + */ + BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2); + + avail_mask->mask[0] = u0; + avail_mask->mask[1] = u1; +} + +/** + * kbase_hwcnt_avail_masks_equal() - Compare two HWC availability masks + * @avail_mask0: First mask to compare + * @avail_mask1: Second mask to compare + * + * Return: 1 if masks are equal. Otherwise, 0. + */ +static inline bool kbase_hwcnt_avail_masks_equal(const struct kbase_hwcnt_avail_mask *avail_mask0, + const struct kbase_hwcnt_avail_mask *avail_mask1) +{ + return (!memcmp(avail_mask0, avail_mask1, sizeof(*avail_mask0))); +} + +/** + * kbase_hwcnt_avail_masks_equal_values() - Compare two HWC availability masks + * @avail_mask: Kask to compare + * @u0: First element of mask to compare against + * @u1: Second element of mask to compare against + * + * Return: 1 if masks are equal. Otherwise, 0. + */ +static inline bool +kbase_hwcnt_avail_masks_equal_values(const struct kbase_hwcnt_avail_mask *avail_mask, u64 u0, + u64 u1) +{ + BUILD_BUG_ON(KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT != 2); + return ((avail_mask->mask[0] == u0) && (avail_mask->mask[1] == u1)); +} + +/** + * kbase_hwcnt_cp_avail_mask - Copy one avail mask into another + * @dst_avail_mask: Destination mask + * @src_avail_mask: Source Mask + */ +static inline void kbase_hwcnt_cp_avail_mask(struct kbase_hwcnt_avail_mask *dst_avail_mask, + const struct kbase_hwcnt_avail_mask *src_avail_mask) +{ + memcpy(dst_avail_mask, src_avail_mask, sizeof(*dst_avail_mask)); +} + +/** + * kbase_hwcnt_set_avail_mask_bits() - Set a bitfield value into a large bitmask + * + * @avail_mask: Pointer to destination HWC mask, which is comprised of an array of u64 elements + * @offset_in_bits: The offset into which to place the value in the bitmask. The value being + * placed is expected to be fully contained by the array of bitmask elements. + * @length_in_bits: The length of the value being placed in the bitmask. Assumed to be no more + * than 64 bits in length. + * @value: The source value to be written into the bitmask. + */ +static inline void kbase_hwcnt_set_avail_mask_bits(struct kbase_hwcnt_avail_mask *avail_mask, + size_t offset_in_bits, size_t length_in_bits, + u64 value) +{ + size_t arr_offset = offset_in_bits / 64; + size_t bits_set = 0; + + if (!length_in_bits) + return; + + WARN_ON(length_in_bits > 64); + if (WARN_ON((offset_in_bits + length_in_bits) > (KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT << 6))) + return; + + do { + size_t remaining_to_set = length_in_bits - bits_set; + size_t start_dest_bit_in_word = (offset_in_bits + bits_set) - (arr_offset * 64); + size_t bits_that_fit_into_this_word = + min(64 - start_dest_bit_in_word, remaining_to_set); + + uint64_t dest_mask, mask, source_mask; + uint64_t source_fragment; + + if (bits_that_fit_into_this_word == 64) { + mask = U64_MAX; + source_mask = U64_MAX; + dest_mask = U64_MAX; + } else { + mask = (1ULL << bits_that_fit_into_this_word) - 1; + source_mask = ((1ULL << (bits_that_fit_into_this_word)) - 1) << bits_set; + dest_mask = mask << start_dest_bit_in_word; + } + + source_fragment = (value & source_mask) >> bits_set; + + if (WARN_ON(arr_offset >= KBASE_HWCNT_AVAIL_MASK_ELEM_COUNT)) + break; + + avail_mask->mask[arr_offset] &= ~dest_mask; + avail_mask->mask[arr_offset] |= + ((source_fragment & mask) << start_dest_bit_in_word); + + arr_offset++; + bits_set += bits_that_fit_into_this_word; + } while (bits_set < length_in_bits); +} /** * struct kbase_hwcnt_metadata - Metadata describing the memory layout * of Dump Buffers and Enable Maps within a * Hardware Counter System. - * @grp_cnt: The number of Hardware Counter Groups. - * @grp_metadata: Non-NULL pointer to an array of grp_cnt group metadata, + * @blk_cnt: The number of Hardware Counter Blocks + * @blk_metadata: Non-NULL pointer to an array of blk_cnt block metadata, * describing the physical layout of each Hardware Counter - * Group in the system. + * Block in the system. * @enable_map_bytes: The size in bytes of an Enable Map needed for the system. * @dump_buf_bytes: The size in bytes of a Dump Buffer needed for the system. + * @blk_stt_bytes: The size in bytes of a Block State Buffer needed for + * the system. * @avail_mask: The Availability Mask for the system. * @clk_cnt: The number of clock domains in the system. */ struct kbase_hwcnt_metadata { - size_t grp_cnt; - const struct kbase_hwcnt_group_metadata *grp_metadata; + size_t blk_cnt; + const struct kbase_hwcnt_block_metadata *blk_metadata; size_t enable_map_bytes; size_t dump_buf_bytes; - u64 avail_mask; + size_t blk_stt_bytes; + struct kbase_hwcnt_avail_mask avail_mask; u8 clk_cnt; }; @@ -257,7 +403,7 @@ struct kbase_hwcnt_metadata { * @hwcnt_enable_map: Non-NULL pointer of size metadata->enable_map_bytes to an * array of u64 bitfields, each bit of which enables one hardware * counter. - * @clk_enable_map: An array of u64 bitfields, each bit of which enables cycle + * @clk_enable_map: A u64 bitfield, each bit of which enables cycle * counter for a given clock domain. */ struct kbase_hwcnt_enable_map { @@ -274,27 +420,14 @@ struct kbase_hwcnt_enable_map { * metadata->dump_buf_bytes. * @clk_cnt_buf: A pointer to an array of u64 values for cycle count elapsed * for each clock domain. + * @blk_stt_buf: A pointer to an array of blk_stt_t values holding block state + * information for each block. */ struct kbase_hwcnt_dump_buffer { const struct kbase_hwcnt_metadata *metadata; u64 *dump_buf; u64 *clk_cnt_buf; -}; - -/** - * struct kbase_hwcnt_dump_buffer_array - Hardware Counter Dump Buffer array. - * @page_addr: Address of allocated pages. A single allocation is used for all - * Dump Buffers in the array. - * @page_order: The allocation order of the pages, the order is on a logarithmic - * scale. - * @buf_cnt: The number of allocated Dump Buffers. - * @bufs: Non-NULL pointer to the array of Dump Buffers. - */ -struct kbase_hwcnt_dump_buffer_array { - unsigned long page_addr; - unsigned int page_order; - size_t buf_cnt; - struct kbase_hwcnt_dump_buffer *bufs; + blk_stt_t *blk_stt_buf; }; /** @@ -316,232 +449,229 @@ int kbase_hwcnt_metadata_create(const struct kbase_hwcnt_description *desc, void kbase_hwcnt_metadata_destroy(const struct kbase_hwcnt_metadata *metadata); /** - * kbase_hwcnt_metadata_group_count() - Get the number of groups. - * @metadata: Non-NULL pointer to metadata. - * - * Return: Number of hardware counter groups described by metadata. + * kbase_hwcnt_block_state_set() - Set one or more block states + * for a block instance. + * @blk_stt: Pointer to destination block state instance + * @stt: Block state bitmask */ -static inline size_t kbase_hwcnt_metadata_group_count(const struct kbase_hwcnt_metadata *metadata) +static inline void kbase_hwcnt_block_state_set(blk_stt_t *blk_stt, blk_stt_t stt) { - if (WARN_ON(!metadata)) - return 0; + if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK)) + return; - return metadata->grp_cnt; + *blk_stt = stt; } /** - * kbase_hwcnt_metadata_group_type() - Get the arbitrary type of a group. - * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * - * Return: Type of the group grp. + * kbase_hwcnt_block_state_append() - Adds one or more block states + * onto a block instance. + * @blk_stt: Pointer to destination block state instance + * @stt: Block state bitmask */ -static inline u64 kbase_hwcnt_metadata_group_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp) +static inline void kbase_hwcnt_block_state_append(blk_stt_t *blk_stt, blk_stt_t stt) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) - return 0; + if (WARN_ON(stt & ~KBASE_HWCNT_STATE_MASK)) + return; - return metadata->grp_metadata[grp].type; + *blk_stt |= stt; +} + +/** + * kbase_hwcnt_block_state_copy() - Copy block state between two block + * state instances. + * @dst_blk_stt: Pointer to destination block state instance + * @src_blk_stt: Pointer to source block state instance. + */ +static inline void kbase_hwcnt_block_state_copy(blk_stt_t *dst_blk_stt, + const blk_stt_t *src_blk_stt) +{ + kbase_hwcnt_block_state_set(dst_blk_stt, *src_blk_stt); +} + +/** + * kbase_hwcnt_block_state_accumulate() - Accumulate block state between two block + * state instances. + * @dst_blk_stt: Pointer to destination block state instance + * @src_blk_stt: Pointer to source block state instance. + */ +static inline void kbase_hwcnt_block_state_accumulate(blk_stt_t *dst_blk_stt, + const blk_stt_t *src_blk_stt) +{ + kbase_hwcnt_block_state_append(dst_blk_stt, *src_blk_stt); } /** - * kbase_hwcnt_metadata_block_count() - Get the number of blocks in a group. + * kbase_hwcnt_metadata_block_count() - Get the number of blocks in the metadata. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. * - * Return: Number of blocks in group grp. + * Return: Number of blocks in the metadata. */ -static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata, - size_t grp) +static inline size_t kbase_hwcnt_metadata_block_count(const struct kbase_hwcnt_metadata *metadata) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt)) + if (WARN_ON(!metadata)) return 0; - return metadata->grp_metadata[grp].blk_cnt; + return metadata->blk_cnt; } /** * kbase_hwcnt_metadata_block_type() - Get the arbitrary type of a block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block. * - * Return: Type of the block blk in group grp. + * Return: Type of the block blk. */ static inline u64 kbase_hwcnt_metadata_block_type(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) + size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].type; + return metadata->blk_metadata[blk].type; } /** * kbase_hwcnt_metadata_block_instance_count() - Get the number of instances of * a block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of instances of block blk in group grp. + * Return: Number of instances of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_instance_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt; + return metadata->blk_metadata[blk].inst_cnt; } /** * kbase_hwcnt_metadata_block_headers_count() - Get the number of counter * headers. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of counter headers in each instance of block blk in group grp. + * Return: Number of counter headers in each instance of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_headers_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].hdr_cnt; + return metadata->blk_metadata[blk].hdr_cnt; } /** * kbase_hwcnt_metadata_block_counters_count() - Get the number of counters. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: Number of counters in each instance of block blk in group grp. + * Return: Number of counters in each instance of block blk. */ static inline size_t -kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_counters_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].ctr_cnt; + return metadata->blk_metadata[blk].ctr_cnt; } /** * kbase_hwcnt_metadata_block_enable_map_stride() - Get the enable map stride. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * - * Return: enable map stride in each instance of block blk in group grp. + * Return: enable map stride in each instance of block blk. */ static inline size_t kbase_hwcnt_metadata_block_enable_map_stride(const struct kbase_hwcnt_metadata *metadata, - size_t grp, size_t blk) + size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride; + return metadata->blk_metadata[blk].enable_map_stride; } /** * kbase_hwcnt_metadata_block_values_count() - Get the number of values. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * * Return: Number of headers plus counters in each instance of block blk - * in group grp. + * in the metadata. */ static inline size_t -kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_values_count(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return kbase_hwcnt_metadata_block_counters_count(metadata, grp, blk) + - kbase_hwcnt_metadata_block_headers_count(metadata, grp, blk); + return kbase_hwcnt_metadata_block_counters_count(metadata, blk) + + kbase_hwcnt_metadata_block_headers_count(metadata, blk); } /** * kbase_hwcnt_metadata_for_each_block() - Iterate over each block instance in * the metadata. * @md: Non-NULL pointer to metadata. - * @grp: size_t variable used as group iterator. * @blk: size_t variable used as block iterator. * @blk_inst: size_t variable used as block instance iterator. * - * Iteration order is group, then block, then block instance (i.e. linearly - * through memory). + * Iteration order is block, then block instance (i.e. linearly through memory). */ -#define kbase_hwcnt_metadata_for_each_block(md, grp, blk, blk_inst) \ - for ((grp) = 0; (grp) < kbase_hwcnt_metadata_group_count((md)); (grp)++) \ - for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md), (grp)); (blk)++) \ - for ((blk_inst) = 0; \ - (blk_inst) < \ - kbase_hwcnt_metadata_block_instance_count((md), (grp), (blk)); \ - (blk_inst)++) +#define kbase_hwcnt_metadata_for_each_block(md, blk, blk_inst) \ + for ((blk) = 0; (blk) < kbase_hwcnt_metadata_block_count((md)); (blk)++) \ + for ((blk_inst) = 0; \ + (blk_inst) < kbase_hwcnt_metadata_block_instance_count((md), (blk)); \ + (blk_inst)++) /** * kbase_hwcnt_metadata_block_avail_bit() - Get the bit index into the avail * mask corresponding to the block. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * * Return: The bit index into the avail mask for the block. */ static inline size_t -kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk) +kbase_hwcnt_metadata_block_avail_bit(const struct kbase_hwcnt_metadata *metadata, size_t blk) { - if (WARN_ON(!metadata) || WARN_ON(grp >= metadata->grp_cnt) || - WARN_ON(blk >= metadata->grp_metadata[grp].blk_cnt)) + if (WARN_ON(!metadata) || WARN_ON(blk >= metadata->blk_cnt)) return 0; - return metadata->grp_metadata[grp].avail_mask_index + - metadata->grp_metadata[grp].blk_metadata[blk].avail_mask_index; + return metadata->blk_metadata[blk].avail_mask_index; } /** * kbase_hwcnt_metadata_block_instance_avail() - Check if a block instance is * available. * @metadata: Non-NULL pointer to metadata. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: true if the block instance is available, else false. */ static inline bool -kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t grp, - size_t blk, size_t blk_inst) +kbase_hwcnt_metadata_block_instance_avail(const struct kbase_hwcnt_metadata *metadata, size_t blk, + size_t blk_inst) { size_t bit; + size_t mask_index; u64 mask; if (WARN_ON(!metadata)) return false; - bit = kbase_hwcnt_metadata_block_avail_bit(metadata, grp, blk) + blk_inst; - mask = 1ull << bit; + bit = kbase_hwcnt_metadata_block_avail_bit(metadata, blk) + blk_inst; + mask_index = bit >> 6; + mask = 1ull << (bit & 0x3f); - return (metadata->avail_mask & mask) != 0; + return (metadata->avail_mask.mask[mask_index] & mask) != 0; } /** @@ -568,31 +698,28 @@ void kbase_hwcnt_enable_map_free(struct kbase_hwcnt_enable_map *enable_map); * kbase_hwcnt_enable_map_block_instance() - Get the pointer to a block * instance's enable map. * @map: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: u64* to the bitfield(s) used as the enable map for the * block instance. */ static inline u64 *kbase_hwcnt_enable_map_block_instance(const struct kbase_hwcnt_enable_map *map, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { if (WARN_ON(!map) || WARN_ON(!map->hwcnt_enable_map)) return NULL; - if (WARN_ON(!map->metadata) || WARN_ON(grp >= map->metadata->grp_cnt) || - WARN_ON(blk >= map->metadata->grp_metadata[grp].blk_cnt) || - WARN_ON(blk_inst >= map->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + if (WARN_ON(!map->metadata) || WARN_ON(blk >= map->metadata->blk_cnt) || + WARN_ON(blk_inst >= map->metadata->blk_metadata[blk].inst_cnt)) return map->hwcnt_enable_map; - return map->hwcnt_enable_map + map->metadata->grp_metadata[grp].enable_map_index + - map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_index + - (map->metadata->grp_metadata[grp].blk_metadata[blk].enable_map_stride * blk_inst); + return map->hwcnt_enable_map + map->metadata->blk_metadata[blk].enable_map_index + + (map->metadata->blk_metadata[blk].enable_map_stride * blk_inst); } /** - * kbase_hwcnt_bitfield_count() - Calculate the number of u64 bitfields required + * kbase_hwcnt_bitfield_count - Calculate the number of u64 bitfields required * to have at minimum one bit per value. * @val_cnt: Number of values. * @@ -604,24 +731,22 @@ static inline size_t kbase_hwcnt_bitfield_count(size_t val_cnt) } /** - * kbase_hwcnt_enable_map_block_disable_all() - Disable all values in a block. + * kbase_hwcnt_enable_map_block_disable_all - Disable all values in a block. * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. */ static inline void kbase_hwcnt_enable_map_block_disable_all(struct kbase_hwcnt_enable_map *dst, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; - u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); if (WARN_ON(!dst)) return; - val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); memset(block_enable_map, 0, bitfld_cnt * KBASE_HWCNT_BITFIELD_BYTES); @@ -645,23 +770,21 @@ static inline void kbase_hwcnt_enable_map_disable_all(struct kbase_hwcnt_enable_ /** * kbase_hwcnt_enable_map_block_enable_all() - Enable all values in a block. * @dst: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. */ static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_enable_map *dst, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { size_t val_cnt; size_t bitfld_cnt; - u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(dst, grp, blk, blk_inst); + u64 *const block_enable_map = kbase_hwcnt_enable_map_block_instance(dst, blk, blk_inst); size_t bitfld_idx; if (WARN_ON(!dst)) return; - val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(dst->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { @@ -682,13 +805,13 @@ static inline void kbase_hwcnt_enable_map_block_enable_all(struct kbase_hwcnt_en */ static inline void kbase_hwcnt_enable_map_enable_all(struct kbase_hwcnt_enable_map *dst) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; if (WARN_ON(!dst) || WARN_ON(!dst->metadata)) return; - kbase_hwcnt_metadata_for_each_block(dst->metadata, grp, blk, blk_inst) - kbase_hwcnt_enable_map_block_enable_all(dst, grp, blk, blk_inst); + kbase_hwcnt_metadata_for_each_block(dst->metadata, blk, blk_inst) + kbase_hwcnt_enable_map_block_enable_all(dst, blk, blk_inst); dst->clk_enable_map = (1ull << dst->metadata->clk_cnt) - 1; } @@ -751,27 +874,26 @@ static inline void kbase_hwcnt_enable_map_union(struct kbase_hwcnt_enable_map *d * kbase_hwcnt_enable_map_block_enabled() - Check if any values in a block * instance are enabled. * @enable_map: Non-NULL pointer to enable map. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: true if any values in the block are enabled, else false. */ static inline bool -kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t grp, - size_t blk, size_t blk_inst) +kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable_map, size_t blk, + size_t blk_inst) { bool any_enabled = false; size_t val_cnt; size_t bitfld_cnt; const u64 *const block_enable_map = - kbase_hwcnt_enable_map_block_instance(enable_map, grp, blk, blk_inst); + kbase_hwcnt_enable_map_block_instance(enable_map, blk, blk_inst); size_t bitfld_idx; if (WARN_ON(!enable_map)) return false; - val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, grp, blk); + val_cnt = kbase_hwcnt_metadata_block_values_count(enable_map->metadata, blk); bitfld_cnt = kbase_hwcnt_bitfield_count(val_cnt); for (bitfld_idx = 0; bitfld_idx < bitfld_cnt; bitfld_idx++) { @@ -796,7 +918,7 @@ kbase_hwcnt_enable_map_block_enabled(const struct kbase_hwcnt_enable_map *enable static inline bool kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_map) { - size_t grp, blk, blk_inst; + size_t blk, blk_inst; u64 clk_enable_map_mask; if (WARN_ON(!enable_map) || WARN_ON(!enable_map->metadata)) @@ -807,9 +929,8 @@ kbase_hwcnt_enable_map_any_enabled(const struct kbase_hwcnt_enable_map *enable_m if (enable_map->metadata->clk_cnt > 0 && (enable_map->clk_enable_map & clk_enable_map_mask)) return true; - kbase_hwcnt_metadata_for_each_block(enable_map->metadata, grp, blk, blk_inst) - { - if (kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst)) + kbase_hwcnt_metadata_for_each_block(enable_map->metadata, blk, blk_inst) { + if (kbase_hwcnt_enable_map_block_enabled(enable_map, blk, blk_inst)) return true; } @@ -869,9 +990,8 @@ static inline void kbase_hwcnt_enable_map_block_disable_value(u64 *bitfld, size_ /** * kbase_hwcnt_dump_buffer_alloc() - Allocate a dump buffer. * @metadata: Non-NULL pointer to metadata describing the system. - * @dump_buf: Non-NULL pointer to dump buffer to be initialised. Will be - * initialised to undefined values, so must be used as a copy dest, - * or cleared before use. + * @dump_buf: Non-NULL pointer to a zero-initialized dump buffer. + * The memory will be zero allocated * * Return: 0 on success, else error code. */ @@ -887,54 +1007,52 @@ int kbase_hwcnt_dump_buffer_alloc(const struct kbase_hwcnt_metadata *metadata, */ void kbase_hwcnt_dump_buffer_free(struct kbase_hwcnt_dump_buffer *dump_buf); -/** - * kbase_hwcnt_dump_buffer_array_alloc() - Allocate an array of dump buffers. - * @metadata: Non-NULL pointer to metadata describing the system. - * @n: Number of dump buffers to allocate - * @dump_bufs: Non-NULL pointer to dump buffer array to be initialised. - * - * A single zeroed contiguous page allocation will be used for all of the - * buffers inside the array, where: - * dump_bufs[n].dump_buf == page_addr + n * metadata.dump_buf_bytes - * - * Return: 0 on success, else error code. - */ -int kbase_hwcnt_dump_buffer_array_alloc(const struct kbase_hwcnt_metadata *metadata, size_t n, - struct kbase_hwcnt_dump_buffer_array *dump_bufs); - -/** - * kbase_hwcnt_dump_buffer_array_free() - Free a dump buffer array. - * @dump_bufs: Dump buffer array to be freed. - * - * Can be safely called on an all-zeroed dump buffer array structure, or on an - * already freed dump buffer array. - */ -void kbase_hwcnt_dump_buffer_array_free(struct kbase_hwcnt_dump_buffer_array *dump_bufs); - /** * kbase_hwcnt_dump_buffer_block_instance() - Get the pointer to a block * instance's dump buffer. * @buf: Non-NULL pointer to dump buffer. - * @grp: Index of the group in the metadata. - * @blk: Index of the block in the group. + * @blk: Index of the block in the metadata. * @blk_inst: Index of the block instance in the block. * * Return: u64* to the dump buffer for the block instance. */ static inline u64 *kbase_hwcnt_dump_buffer_block_instance(const struct kbase_hwcnt_dump_buffer *buf, - size_t grp, size_t blk, size_t blk_inst) + size_t blk, size_t blk_inst) { if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) return NULL; - if (WARN_ON(!buf->metadata) || WARN_ON(grp >= buf->metadata->grp_cnt) || - WARN_ON(blk >= buf->metadata->grp_metadata[grp].blk_cnt) || - WARN_ON(blk_inst >= buf->metadata->grp_metadata[grp].blk_metadata[blk].inst_cnt)) + if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) || + WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt)) return buf->dump_buf; - return buf->dump_buf + buf->metadata->grp_metadata[grp].dump_buf_index + - buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_index + - (buf->metadata->grp_metadata[grp].blk_metadata[blk].dump_buf_stride * blk_inst); + return buf->dump_buf + buf->metadata->blk_metadata[blk].dump_buf_index + + (buf->metadata->blk_metadata[blk].dump_buf_stride * blk_inst); +} + +/** + * kbase_hwcnt_dump_buffer_block_state_instance() - Get the pointer to a block + * instance's block state mask. + * @buf: Non-NULL pointer to dump buffer. + * @blk: Index of the block in the metadata. + * @blk_inst: Index of the block instance in the block. + * + * Return: blk_stt_t* to the block state mask of the block instance in the dump + * buffer. + */ +static inline blk_stt_t * +kbase_hwcnt_dump_buffer_block_state_instance(const struct kbase_hwcnt_dump_buffer *buf, size_t blk, + size_t blk_inst) +{ + if (WARN_ON(!buf) || WARN_ON(!buf->dump_buf)) + return NULL; + + if (WARN_ON(!buf->metadata) || WARN_ON(blk >= buf->metadata->blk_cnt) || + WARN_ON(blk_inst >= buf->metadata->blk_metadata[blk].inst_cnt)) + return buf->blk_stt_buf; + + return buf->blk_stt_buf + buf->metadata->blk_metadata[blk].blk_stt_index + + (buf->metadata->blk_metadata[blk].blk_stt_stride * blk_inst); } /** @@ -1228,4 +1346,19 @@ static inline bool kbase_hwcnt_clk_enable_map_enabled(const u64 clk_enable_map, return false; } +/** + * kbase_hwcnt_dump_buffer_block_state_update() - Update the enabled block instances' block states + * in dst. After the operation, all non-enabled or + * unavailable block instances will be unchanged. + * @dst: Non-NULL pointer to dump buffer. + * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values. + * @blk_stt_val: Mask of block states to update. Block states not set in this mask will still be + * preserved in dst. + * + * The dst and dst_enable_map MUST have been created from the same metadata. + */ +void kbase_hwcnt_dump_buffer_block_state_update(struct kbase_hwcnt_dump_buffer *dst, + const struct kbase_hwcnt_enable_map *dst_enable_map, + blk_stt_t blk_stt_val); + #endif /* _KBASE_HWCNT_TYPES_H_ */ diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c index d618764..89cca45 100644 --- a/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c +++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_virtualizer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -255,7 +255,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ @@ -264,7 +264,7 @@ static int kbasep_hwcnt_virtualizer_client_add(struct kbase_hwcnt_virtualizer *h &hvirt->scratch_buf); /* Accumulate into only existing clients' accumulation bufs */ if (!errcode) - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); } @@ -315,7 +315,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize struct kbase_hwcnt_virtualizer_client *pos; /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_disable_all(&hvirt->scratch_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); /* Set the counters with the new union enable map */ errcode = kbase_hwcnt_accumulator_set_counters(hvirt->accum, &hvirt->scratch_map, @@ -323,7 +323,7 @@ static void kbasep_hwcnt_virtualizer_client_remove(struct kbase_hwcnt_virtualize &hvirt->scratch_buf); /* Accumulate into remaining clients' accumulation bufs */ if (!errcode) { - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -373,7 +373,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( /* Make the scratch enable map the union of all enable maps */ kbase_hwcnt_enable_map_copy(&hvirt->scratch_map, enable_map); - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) /* Ignore the enable map of the selected client */ if (pos != hvcli) kbase_hwcnt_enable_map_union(&hvirt->scratch_map, &pos->enable_map); @@ -385,7 +385,7 @@ static int kbasep_hwcnt_virtualizer_client_set_counters( return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) if (pos != hvcli) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -503,7 +503,7 @@ static int kbasep_hwcnt_virtualizer_client_dump(struct kbase_hwcnt_virtualizer * return errcode; /* Accumulate into all accumulation bufs except the selected client's */ - list_for_each_entry (pos, &hvirt->clients, node) + list_for_each_entry(pos, &hvirt->clients, node) if (pos != hvcli) kbasep_hwcnt_virtualizer_client_accumulate(pos, &hvirt->scratch_buf); @@ -724,7 +724,7 @@ void kbase_hwcnt_virtualizer_term(struct kbase_hwcnt_virtualizer *hvirt) if (WARN_ON(hvirt->client_count != 0)) { struct kbase_hwcnt_virtualizer_client *pos, *n; - list_for_each_entry_safe (pos, n, &hvirt->clients, node) + list_for_each_entry_safe(pos, n, &hvirt->clients, node) kbase_hwcnt_virtualizer_client_destroy(pos); } -- cgit v1.2.3