diff options
Diffstat (limited to 'mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c')
-rw-r--r-- | mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c | 397 |
1 files changed, 333 insertions, 64 deletions
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c index 27acfc6..d7911ae 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c @@ -44,6 +44,9 @@ #define HWCNT_BACKEND_WATCHDOG_TIMER_INTERVAL_MS ((u32)1000) #endif /* IS_FPGA && !NO_MALI */ +/* Used to check for a sample in which all counters in the block are disabled */ +#define HWCNT_BLOCK_EMPTY_SAMPLE (2) + /** * enum kbase_hwcnt_backend_csf_dump_state - HWC CSF backend dumping states. * @@ -172,15 +175,16 @@ struct kbase_hwcnt_backend_csf_info { /** * struct kbase_hwcnt_csf_physical_layout - HWC sample memory physical layout - * information. - * @hw_block_cnt: Total number of hardware counters blocks. The hw counters blocks are - * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. - * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. + * information, as defined by the spec. * @fe_cnt: Front end block count. * @tiler_cnt: Tiler block count. * @mmu_l2_cnt: Memory system (MMU and L2 cache) block count. * @shader_cnt: Shader Core block count. - * @fw_block_cnt: Total number of firmware counters blocks. + * @fw_block_cnt: Total number of firmware counter blocks, with a single + * global FW block and a block per CSG. + * @hw_block_cnt: Total number of hardware counter blocks. The hw counters blocks are + * sub-categorized into 4 classes: front-end, tiler, memory system, and shader. + * hw_block_cnt = fe_cnt + tiler_cnt + mmu_l2_cnt + shader_cnt. * @block_cnt: Total block count (sum of all counter blocks: hw_block_cnt + fw_block_cnt). * @shader_avail_mask: Bitmap of all shader cores in the system. * @enable_mask_offset: Offset in array elements of enable mask in each block @@ -190,12 +194,12 @@ struct kbase_hwcnt_backend_csf_info { * @values_per_block: For any block, the number of counters in total (header + payload). */ struct kbase_hwcnt_csf_physical_layout { - u8 hw_block_cnt; u8 fe_cnt; u8 tiler_cnt; u8 mmu_l2_cnt; u8 shader_cnt; u8 fw_block_cnt; + u8 hw_block_cnt; u8 block_cnt; u64 shader_avail_mask; size_t enable_mask_offset; @@ -220,6 +224,13 @@ struct kbase_hwcnt_csf_physical_layout { * @old_sample_buf: HWC sample buffer to save the previous values * for delta calculation, size * prfcnt_info.dump_bytes. + * @block_states: Pointer to array of block_state values for all + * blocks. + * @to_user_block_states: Block state buffer for client user. + * @accum_all_blk_stt: Block state to accumulate for all known blocks + * on next sample. + * @sampled_all_blk_stt: Block State to accumulate for all known blocks + * into the current sample. * @watchdog_last_seen_insert_idx: The insert index which watchdog has last * seen, to check any new firmware automatic * samples generated during the watchdog @@ -243,6 +254,8 @@ struct kbase_hwcnt_csf_physical_layout { * @hwc_dump_work: Worker to accumulate samples. * @hwc_threshold_work: Worker for consuming available samples when * threshold interrupt raised. + * @num_l2_slices: Current number of L2 slices allocated to the GPU. + * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU. */ struct kbase_hwcnt_backend_csf { struct kbase_hwcnt_backend_csf_info *info; @@ -253,6 +266,10 @@ struct kbase_hwcnt_backend_csf { u64 *to_user_buf; u64 *accum_buf; u32 *old_sample_buf; + blk_stt_t *block_states; + blk_stt_t *to_user_block_states; + blk_stt_t accum_all_blk_stt; + blk_stt_t sampled_all_blk_stt; u32 watchdog_last_seen_insert_idx; struct kbase_hwcnt_backend_csf_if_ring_buf *ring_buf; void *ring_buf_cpu_base; @@ -265,15 +282,45 @@ struct kbase_hwcnt_backend_csf { struct workqueue_struct *hwc_dump_workq; struct work_struct hwc_dump_work; struct work_struct hwc_threshold_work; + size_t num_l2_slices; + u64 shader_present_bitmap; }; static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info) { - WARN_ON(!csf_info); + if (WARN_ON(!csf_info)) + return false; + csf_info->csf_if->assert_lock_held(csf_info->csf_if->ctx); return (csf_info->backend != NULL); } +void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface, + size_t num_l2_slices, u64 shader_present_bitmap) +{ + struct kbase_hwcnt_backend_csf_info *csf_info; + + if (!iface) + return; + + csf_info = (struct kbase_hwcnt_backend_csf_info *)iface->info; + + /* Early out if the backend does not exist. */ + if (!csf_info || !csf_info->backend) + return; + + if (WARN_ON(csf_info->backend->enable_state != KBASE_HWCNT_BACKEND_CSF_DISABLED)) + return; + + if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) || + WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) != + shader_present_bitmap)) + return; + + csf_info->backend->num_l2_slices = num_l2_slices; + csf_info->backend->shader_present_bitmap = shader_present_bitmap; +} + /** * kbasep_hwcnt_backend_csf_cc_initial_sample() - Initialize cycle count * tracking. @@ -295,8 +342,7 @@ kbasep_hwcnt_backend_csf_cc_initial_sample(struct kbase_hwcnt_backend_csf *backe backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(enable_map->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, clk)) backend_csf->prev_cycle_count[clk] = cycle_counts[clk]; } @@ -317,8 +363,7 @@ static void kbasep_hwcnt_backend_csf_cc_update(struct kbase_hwcnt_backend_csf *b backend_csf->info->csf_if->get_gpu_cycle_count(backend_csf->info->csf_if->ctx, cycle_counts, backend_csf->clk_enable_map); - kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(backend_csf->info->metadata, clk) { if (kbase_hwcnt_clk_enable_map_enabled(backend_csf->clk_enable_map, clk)) { backend_csf->cycle_count_elapsed[clk] = cycle_counts[clk] - backend_csf->prev_cycle_count[clk]; @@ -340,29 +385,29 @@ static u64 kbasep_hwcnt_backend_csf_timestamp_ns(struct kbase_hwcnt_backend *bac /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to * guarantee headers are - * enabled if any counter is - * required. + * enabled. *@phys_enable_map: HWC physical enable map to be processed. */ -static void -kbasep_hwcnt_backend_csf_process_enable_map(struct kbase_hwcnt_physical_enable_map *phys_enable_map) +void kbasep_hwcnt_backend_csf_process_enable_map( + struct kbase_hwcnt_physical_enable_map *phys_enable_map) { WARN_ON(!phys_enable_map); - /* Enable header if any counter is required from user, the header is - * controlled by bit 0 of the enable mask. + /* Unconditionally enable each block header and first counter, + * the header is controlled by bit 0 of the enable mask. */ - if (phys_enable_map->fe_bm) - phys_enable_map->fe_bm |= 1; + phys_enable_map->fe_bm |= 3; - if (phys_enable_map->tiler_bm) - phys_enable_map->tiler_bm |= 1; + phys_enable_map->tiler_bm |= 3; - if (phys_enable_map->mmu_l2_bm) - phys_enable_map->mmu_l2_bm |= 1; + phys_enable_map->mmu_l2_bm |= 3; + + phys_enable_map->shader_bm |= 3; + + phys_enable_map->fw_bm |= 3; + + phys_enable_map->csg_bm |= 3; - if (phys_enable_map->shader_bm) - phys_enable_map->shader_bm |= 1; } static void kbasep_hwcnt_backend_csf_init_layout( @@ -371,32 +416,35 @@ static void kbasep_hwcnt_backend_csf_init_layout( { size_t shader_core_cnt; size_t values_per_block; - size_t fw_blocks_count; - size_t hw_blocks_count; + size_t fw_block_cnt; + size_t hw_block_cnt; + size_t core_cnt; + WARN_ON(!prfcnt_info); WARN_ON(!phys_layout); - shader_core_cnt = fls64(prfcnt_info->core_mask); + shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask); values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES; - fw_blocks_count = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); - hw_blocks_count = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size); + hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size); + + core_cnt = shader_core_cnt; /* The number of hardware counters reported by the GPU matches the legacy guess-work we * have done in the past */ - WARN_ON(hw_blocks_count != KBASE_HWCNT_V5_FE_BLOCK_COUNT + - KBASE_HWCNT_V5_TILER_BLOCK_COUNT + - prfcnt_info->l2_count + shader_core_cnt); + WARN_ON(hw_block_cnt != KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT + + prfcnt_info->l2_count + core_cnt); *phys_layout = (struct kbase_hwcnt_csf_physical_layout){ .fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT, .tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT, .mmu_l2_cnt = prfcnt_info->l2_count, .shader_cnt = shader_core_cnt, - .fw_block_cnt = fw_blocks_count, - .hw_block_cnt = hw_blocks_count, - .block_cnt = fw_blocks_count + hw_blocks_count, + .fw_block_cnt = fw_block_cnt, + .hw_block_cnt = hw_block_cnt, + .block_cnt = fw_block_cnt + hw_block_cnt, .shader_avail_mask = prfcnt_info->core_mask, .headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK, .values_per_block = values_per_block, @@ -409,10 +457,14 @@ static void kbasep_hwcnt_backend_csf_reset_internal_buffers(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; memset(backend_csf->to_user_buf, 0, user_buf_bytes); memset(backend_csf->accum_buf, 0, user_buf_bytes); memset(backend_csf->old_sample_buf, 0, backend_csf->info->prfcnt_info.dump_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); + memset(backend_csf->to_user_block_states, 0, block_state_bytes); } static void @@ -450,40 +502,130 @@ kbasep_hwcnt_backend_csf_zero_all_prfcnt_en_header(struct kbase_hwcnt_backend_cs static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backend_csf *backend_csf) { size_t user_buf_bytes = backend_csf->info->metadata->dump_buf_bytes; + size_t block_state_bytes = backend_csf->phys_layout.block_cnt * + KBASE_HWCNT_BLOCK_STATE_BYTES * KBASE_HWCNT_BLOCK_STATE_STRIDE; /* Copy the data into the sample and wait for the user to get it. */ memcpy(backend_csf->to_user_buf, backend_csf->accum_buf, user_buf_bytes); + memcpy(backend_csf->to_user_block_states, backend_csf->block_states, block_state_bytes); /* After copied data into user sample, clear the accumulator values to * prepare for the next accumulator, such as the next request or * threshold. */ memset(backend_csf->accum_buf, 0, user_buf_bytes); + memset(backend_csf->block_states, 0, block_state_bytes); +} + +/** + * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with + * information from a sample. + * @phys_layout: Physical memory layout information of HWC + * sample buffer. + * @enable_mask: Counter enable mask for the block whose state is being updated. + * @enable_state: The CSF backend internal enabled state. + * @exiting_protm: Whether or not the sample is taken when the GPU is exiting + * protected mode. + * @block_idx: Index of block within the ringbuffer. + * @block_state: Pointer to existing block state of the block whose state is being + * updated. + * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling. + */ +static void kbasep_hwcnt_backend_csf_update_block_state( + const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm, + size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode) +{ + /* Offset of shader core blocks from the start of the HW blocks in the sample */ + size_t shader_core_block_offset = + (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt); + bool is_shader_core_block; + + is_shader_core_block = block_idx >= shader_core_block_offset; + + /* Set power bits for the block state for the block, for the sample */ + switch (enable_state) { + /* Disabled states */ + case KBASE_HWCNT_BACKEND_CSF_DISABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_DISABLED_WAIT_FOR_WORKER: + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_OFF); + break; + /* Enabled states */ + case KBASE_HWCNT_BACKEND_CSF_ENABLED: + case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_DISABLED: + if (!is_shader_core_block) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_ON); + else if (!exiting_protm) { + /* When not exiting protected mode, a zero enable mask on a shader core + * counter block indicates the block was powered off for the sample, and + * a non-zero counter enable mask indicates the block was powered on for + * the sample. + */ + kbase_hwcnt_block_state_append(block_state, + (enable_mask ? KBASE_HWCNT_STATE_ON : + KBASE_HWCNT_STATE_OFF)); + } + break; + /* Error states */ + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR_WAIT_FOR_WORKER: + case KBASE_HWCNT_BACKEND_CSF_UNRECOVERABLE_ERROR: + default: + /* Do nothing */ + break; + } + + /* The following four cases apply to a block state in either normal mode or protected mode: + * 1. GPU executing in normal mode: Only set normal mode bit. + * 2. First sample request after GPU enters protected mode: Set both normal mode and + * protected mode bit. In this case, there will at least be one sample to accumulate + * in the ring buffer which was automatically triggered before GPU entered protected + * mode. + * 3. Subsequent sample requests while GPU remains in protected mode: Only set protected + * mode bit. In this case, the ring buffer should be empty and dump should return 0s but + * block state should be updated accordingly. This case is not handled here. + * 4. Samples requested after GPU exits protected mode: Set both protected mode and normal + * mode bits. + */ + if (exiting_protm || fw_in_protected_mode) + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_PROTECTED | + KBASE_HWCNT_STATE_NORMAL); + else + kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL); } static void kbasep_hwcnt_backend_csf_accumulate_sample( const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes, - u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, bool clearing_samples) + u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf, + blk_stt_t *const block_states, bool clearing_samples, + enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode) { size_t block_idx; const u32 *old_block = old_sample_buf; const u32 *new_block = new_sample_buf; u64 *acc_block = accum_buf; + /* Flag to indicate whether current sample is exiting protected mode. */ + bool exiting_protm = false; const size_t values_per_block = phys_layout->values_per_block; - /* Performance counter blocks for firmware are stored before blocks for hardware. - * We skip over the firmware's performance counter blocks (counters dumping is not - * supported for firmware blocks, only hardware ones). + /* The block pointers now point to the first HW block, which is always a CSHW/front-end + * block. The counter enable mask for this block can be checked to determine whether this + * sample is taken after leaving protected mode - this is the only scenario where the CSHW + * block counter enable mask has only the first bit set, and no others. In this case, + * the values in this sample would not be meaningful, so they don't need to be accumulated. */ - old_block += values_per_block * phys_layout->fw_block_cnt; - new_block += values_per_block * phys_layout->fw_block_cnt; + exiting_protm = (new_block[phys_layout->enable_mask_offset] == 1); - for (block_idx = phys_layout->fw_block_cnt; block_idx < phys_layout->block_cnt; - block_idx++) { + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset]; const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset]; + /* Update block state with information of the current sample */ + kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask, + enable_state, exiting_protm, block_idx, + &block_states[block_idx], + fw_in_protected_mode); - if (new_enable_mask == 0) { + if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { /* Hardware block was unavailable or we didn't turn on * any counters. Do nothing. */ @@ -492,7 +634,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * enabled. We need to update the accumulation buffer. */ size_t ctr_idx; - /* Unconditionally copy the headers. */ for (ctr_idx = 0; ctr_idx < phys_layout->headers_per_block; ctr_idx++) { acc_block[ctr_idx] = new_block[ctr_idx]; @@ -517,8 +658,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( * saturating at their maximum value. */ if (!clearing_samples) { - if (old_enable_mask == 0) { - /* Hardware block was previously + if (!(old_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) { + /* Block was previously * unavailable. Accumulate the new * counters only, as we know previous * values are zeroes. @@ -545,15 +686,14 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample( } } } + old_block += values_per_block; new_block += values_per_block; acc_block += values_per_block; } - WARN_ON(old_block != old_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); - WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES) - - (values_per_block * phys_layout->fw_block_cnt)); + WARN_ON(acc_block != accum_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES)); (void)dump_bytes; } @@ -569,10 +709,23 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples; u32 *old_sample_buf = backend_csf->old_sample_buf; u32 *new_sample_buf = old_sample_buf; + const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout; + + if (extract_index_to_start == insert_index_to_stop) { + /* No samples to accumulate but block states need to be updated for dump. */ + size_t block_idx; - if (extract_index_to_start == insert_index_to_stop) - /* No samples to accumulate. Early out. */ + for (block_idx = 0; block_idx < phys_layout->block_cnt; block_idx++) { + /* Set protected mode bit for block state if GPU is in protected mode, + * otherwise set the normal mode bit. + */ + kbase_hwcnt_block_state_append(&backend_csf->block_states[block_idx], + backend_csf->info->fw_in_protected_mode ? + KBASE_HWCNT_STATE_PROTECTED : + KBASE_HWCNT_STATE_NORMAL); + } return; + } /* Sync all the buffers to CPU side before read the data. */ backend_csf->info->csf_if->ring_buf_sync(backend_csf->info->csf_if->ctx, @@ -587,11 +740,10 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe const u32 buf_idx = raw_idx & (ring_buf_cnt - 1); new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes]; - - kbasep_hwcnt_backend_csf_accumulate_sample(&backend_csf->phys_layout, - buf_dump_bytes, backend_csf->accum_buf, - old_sample_buf, new_sample_buf, - clearing_samples); + kbasep_hwcnt_backend_csf_accumulate_sample( + phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf, + new_sample_buf, backend_csf->block_states, clearing_samples, + backend_csf->enable_state, backend_csf->info->fw_in_protected_mode); old_sample_buf = new_sample_buf; } @@ -875,6 +1027,8 @@ kbasep_hwcnt_backend_csf_get_physical_enable(struct kbase_hwcnt_backend_csf *bac enable->shader_bm = phys_enable_map.shader_bm; enable->tiler_bm = phys_enable_map.tiler_bm; enable->mmu_l2_bm = phys_enable_map.mmu_l2_bm; + enable->fw_bm = phys_enable_map.fw_bm; + enable->csg_bm = phys_enable_map.csg_bm; enable->counter_set = phys_counter_set; enable->clk_enable_map = enable_map->clk_enable_map; } @@ -893,6 +1047,17 @@ kbasep_hwcnt_backend_csf_dump_enable_nolock(struct kbase_hwcnt_backend *backend, backend_csf->info->csf_if->assert_lock_held(backend_csf->info->csf_if->ctx); + /* Enabling counters is an indication that the power may have previously been off for all + * blocks. + * + * In any case, the counters would not have been counting recently, so an 'off' block state + * is an approximation for this. + * + * This will be transferred to the dump only after a dump_wait(), or dump_disable() in + * cases where the caller requested such information. This is to handle when a + * dump_enable() happens in between dump_wait() and dump_get(). + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); kbasep_hwcnt_backend_csf_get_physical_enable(backend_csf, enable_map, &enable); /* enable_state should be DISABLED before we transfer it to enabled */ @@ -956,13 +1121,19 @@ static void kbasep_hwcnt_backend_csf_wait_enable_transition_complete( } /* CSF backend implementation of kbase_hwcnt_backend_dump_disable_fn */ -static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend) +static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *backend, + struct kbase_hwcnt_dump_buffer *dump_buffer, + const struct kbase_hwcnt_enable_map *enable_map) { unsigned long flags = 0UL; struct kbase_hwcnt_backend_csf *backend_csf = (struct kbase_hwcnt_backend_csf *)backend; bool do_disable = false; - WARN_ON(!backend_csf); + if (WARN_ON(!backend_csf || + (dump_buffer && (backend_csf->info->metadata != dump_buffer->metadata)) || + (enable_map && (backend_csf->info->metadata != enable_map->metadata)) || + (dump_buffer && !enable_map))) + return; backend_csf->info->csf_if->lock(backend_csf->info->csf_if->ctx, &flags); @@ -1048,6 +1219,42 @@ static void kbasep_hwcnt_backend_csf_dump_disable(struct kbase_hwcnt_backend *ba * for next enable. */ kbasep_hwcnt_backend_csf_reset_internal_buffers(backend_csf); + + /* Disabling HWCNT is an indication that blocks have been powered off. This is important to + * know for L2, CSHW, and Tiler blocks, as this is currently the only way a backend can + * know if they are being powered off. + * + * In any case, even if they weren't really powered off, we won't be counting whilst + * disabled. + * + * Update the block state information in the block state accumulator to show this, so that + * in the next dump blocks will have been seen as powered off for some of the time. + */ + kbase_hwcnt_block_state_append(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF); + + if (dump_buffer) { + /* In some use-cases, the caller will need the information whilst the counters are + * disabled, but will not be able to call into the backend to dump them. Instead, + * they have an opportunity here to request them to be accumulated into their + * buffer immediately. + * + * This consists of taking a sample of the accumulated block state (as though a + * real dump_get() had happened), then transfer ownership of that to the caller + * (i.e. erasing our copy of it). + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map, + backend_csf->sampled_all_blk_stt); + /* Now the block state has been passed out into the caller's own accumulation + * buffer, clear our own accumulated and sampled block state - ownership has been + * transferred. + */ + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } } /* CSF backend implementation of kbase_hwcnt_backend_dump_request_fn */ @@ -1183,6 +1390,16 @@ static int kbasep_hwcnt_backend_csf_dump_wait(struct kbase_hwcnt_backend *backen backend_csf->info->csf_if->unlock(backend_csf->info->csf_if->ctx, flags); + /* Now that we've completed a sample, also sample+clear the accumulated block state. + * + * This is to ensure that a dump_enable() that happens in between dump_wait() and + * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block + * state is reported at the actual time that counters are being sampled. + */ + kbase_hwcnt_block_state_accumulate(&backend_csf->sampled_all_blk_stt, + &backend_csf->accum_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + return errcode; } @@ -1223,8 +1440,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend return -EINVAL; /* Extract elapsed cycle count for each clock domain if enabled. */ - kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) - { + kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) { if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk)) continue; @@ -1238,7 +1454,20 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend * as it is undefined to call this function without a prior succeeding * one to dump_wait(). */ - ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, dst_enable_map, accumulate); + ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf, + backend_csf->to_user_block_states, dst_enable_map, + backend_csf->num_l2_slices, + backend_csf->shader_present_bitmap, accumulate); + + /* If no error occurred (zero ret value), then update block state for all blocks in the + * accumulation with the current sample's block state. + */ + if (!ret) { + kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map, + backend_csf->sampled_all_blk_stt); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, + KBASE_HWCNT_STATE_UNKNOWN); + } return ret; } @@ -1269,6 +1498,12 @@ static void kbasep_hwcnt_backend_csf_destroy(struct kbase_hwcnt_backend_csf *bac kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; + + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; + kfree(backend_csf); } @@ -1285,6 +1520,7 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * { struct kbase_hwcnt_backend_csf *backend_csf = NULL; int errcode = -ENOMEM; + size_t block_state_bytes; WARN_ON(!csf_info); WARN_ON(!out_backend); @@ -1308,6 +1544,17 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * if (!backend_csf->to_user_buf) goto err_alloc_user_sample_buf; + /* Allocate space to store block state values for each block */ + block_state_bytes = backend_csf->phys_layout.block_cnt * KBASE_HWCNT_BLOCK_STATE_BYTES * + KBASE_HWCNT_BLOCK_STATE_STRIDE; + backend_csf->block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->block_states) + goto err_alloc_block_states_buf; + + backend_csf->to_user_block_states = kzalloc(block_state_bytes, GFP_KERNEL); + if (!backend_csf->to_user_block_states) + goto err_alloc_user_block_state_buf; + errcode = csf_info->csf_if->ring_buf_alloc(csf_info->csf_if->ctx, csf_info->ring_buf_cnt, &backend_csf->ring_buf_cpu_base, &backend_csf->ring_buf); @@ -1343,6 +1590,8 @@ static int kbasep_hwcnt_backend_csf_create(struct kbase_hwcnt_backend_csf_info * complete_all(&backend_csf->dump_completed); backend_csf->user_requested = false; backend_csf->watchdog_last_seen_insert_idx = 0; + kbase_hwcnt_block_state_set(&backend_csf->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); + kbase_hwcnt_block_state_set(&backend_csf->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN); *out_backend = backend_csf; return 0; @@ -1351,6 +1600,12 @@ err_alloc_workqueue: backend_csf->info->csf_if->ring_buf_free(backend_csf->info->csf_if->ctx, backend_csf->ring_buf); err_ring_buf_alloc: + kfree(backend_csf->to_user_block_states); + backend_csf->to_user_block_states = NULL; +err_alloc_user_block_state_buf: + kfree(backend_csf->block_states); + backend_csf->block_states = NULL; +err_alloc_block_states_buf: kfree(backend_csf->to_user_buf); backend_csf->to_user_buf = NULL; err_alloc_user_sample_buf: @@ -1417,7 +1672,7 @@ static void kbasep_hwcnt_backend_csf_term(struct kbase_hwcnt_backend *backend) if (!backend) return; - kbasep_hwcnt_backend_csf_dump_disable(backend); + kbasep_hwcnt_backend_csf_dump_disable(backend, NULL, NULL); /* Set the backend in csf_info to NULL so we won't handle any external * notification anymore since we are terminating. @@ -1828,7 +2083,21 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface * if (csf_info->prfcnt_info.clk_cnt > BASE_MAX_NR_CLOCKS_REGULATORS) return -EIO; + /* We should reject initializing the metadata for any malformed + * firmware size. The legitimate firmware sizes are as follows: + * 1. fw_size == 0 on older GPUs + * 2. fw_size == block_size on GPUs that support FW counters but not CSG counters + * 3. fw_size == (1 + #CSG) * block size on GPUs that support CSG counters + */ + if ((csf_info->prfcnt_info.prfcnt_fw_size != 0) && + (csf_info->prfcnt_info.prfcnt_fw_size != csf_info->prfcnt_info.prfcnt_block_size) && + (csf_info->prfcnt_info.prfcnt_fw_size != + ((csf_info->prfcnt_info.csg_count + 1) * csf_info->prfcnt_info.prfcnt_block_size))) + return -EINVAL; + + gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0; gpu_info.l2_count = csf_info->prfcnt_info.l2_count; + gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count; gpu_info.core_mask = csf_info->prfcnt_info.core_mask; gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt; gpu_info.prfcnt_values_per_block = |