7 files changed, 119 insertions, 89 deletions
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index d7911ae..d1290ca 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
 
 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_types.h"
 
 #include <linux/log2.h>
 #include <linux/kernel.h>
@@ -255,7 +254,8 @@ struct kbase_hwcnt_csf_physical_layout {
  * @hwc_threshold_work:         Worker for consuming available samples when
  *                              threshold interrupt raised.
  * @num_l2_slices:              Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap:      Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask:   The common mask between the debug_core_mask
+ *                              and the shader_present_bitmap.
  */
 struct kbase_hwcnt_backend_csf {
 	struct kbase_hwcnt_backend_csf_info *info;
@@ -283,7 +283,7 @@ struct kbase_hwcnt_backend_csf {
 	struct work_struct hwc_dump_work;
 	struct work_struct hwc_threshold_work;
 	size_t num_l2_slices;
-	u64 shader_present_bitmap;
+	u64 powered_shader_core_mask;
 };
 
 static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
@@ -296,7 +296,7 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c
 }
 
 void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
-						 size_t num_l2_slices, u64 shader_present_bitmap)
+						 size_t num_l2_slices, u64 powered_shader_core_mask)
 {
 	struct kbase_hwcnt_backend_csf_info *csf_info;
 
@@ -313,12 +313,12 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte
 		return;
 
 	if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) ||
-	    WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) !=
-		    shader_present_bitmap))
+	    WARN_ON((powered_shader_core_mask & csf_info->backend->phys_layout.shader_avail_mask) !=
+		    powered_shader_core_mask))
 		return;
 
 	csf_info->backend->num_l2_slices = num_l2_slices;
-	csf_info->backend->shader_present_bitmap = shader_present_bitmap;
+	csf_info->backend->powered_shader_core_mask = powered_shader_core_mask;
 }
 
 /**
@@ -424,7 +424,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
 	WARN_ON(!prfcnt_info);
 	WARN_ON(!phys_layout);
 
-	shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask);
+	shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask);
 	values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
 	fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
 	hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
@@ -445,7 +445,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
 		.fw_block_cnt = fw_block_cnt,
 		.hw_block_cnt = hw_block_cnt,
 		.block_cnt = fw_block_cnt + hw_block_cnt,
-		.shader_avail_mask = prfcnt_info->core_mask,
+		.shader_avail_mask = prfcnt_info->sc_core_mask,
 		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
 		.values_per_block = values_per_block,
 		.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
@@ -517,34 +517,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe
 	memset(backend_csf->block_states, 0, block_state_bytes);
 }
 
-/**
- * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
- *						   information from a sample.
- * @phys_layout:                Physical memory layout information of HWC
- *                              sample buffer.
- * @enable_mask:                Counter enable mask for the block whose state is being updated.
- * @enable_state:               The CSF backend internal enabled state.
- * @exiting_protm:              Whether or not the sample is taken when the GPU is exiting
- *                              protected mode.
- * @block_idx:                  Index of block within the ringbuffer.
- * @block_state:                Pointer to existing block state of the block whose state is being
- *                              updated.
- * @fw_in_protected_mode:       Whether or not GPU is in protected mode during sampling.
- */
-static void kbasep_hwcnt_backend_csf_update_block_state(
-	const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask,
-	enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm,
-	size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode)
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+						 const u32 enable_mask, bool exiting_protm,
+						 size_t block_idx, blk_stt_t *const block_state,
+						 bool fw_in_protected_mode)
 {
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
 	/* Offset of shader core blocks from the start of the HW blocks in the sample */
 	size_t shader_core_block_offset =
-		(size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt);
+		(size_t)(phys_layout->block_cnt - phys_layout->shader_cnt);
 	bool is_shader_core_block;
 
-	is_shader_core_block = block_idx >= shader_core_block_offset;
+	is_shader_core_block = (block_idx >= shader_core_block_offset);
 
 	/* Set power bits for the block state for the block, for the sample */
-	switch (enable_state) {
+	switch (backend->enable_state) {
 	/* Disabled states */
 	case KBASE_HWCNT_BACKEND_CSF_DISABLED:
 	case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
@@ -592,21 +579,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state(
 								    KBASE_HWCNT_STATE_NORMAL);
 	else
 		kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL);
+
+	/* powered_shader_core_mask stored in the backend is a combination of
+	 * the shader present and the debug core mask, so explicit checking of the
+	 * core mask is not required here.
+	 */
+	if (is_shader_core_block) {
+		u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset);
+
+		WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64);
+
+		if (current_shader_core & backend->info->backend->powered_shader_core_mask)
+			kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
+		else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask)
+			kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE);
+		else
+			WARN_ON_ONCE(true);
+	}
+	else
+		kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
 }
 
-static void kbasep_hwcnt_backend_csf_accumulate_sample(
-	const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
-	u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf,
-	blk_stt_t *const block_states, bool clearing_samples,
-	enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode)
+static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend,
+						       const u32 *old_sample_buf,
+						       const u32 *new_sample_buf)
 {
+	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
+	const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes;
+	const size_t values_per_block = phys_layout->values_per_block;
+	blk_stt_t *const block_states = backend->block_states;
+	const bool fw_in_protected_mode = backend->info->fw_in_protected_mode;
+	const bool clearing_samples = backend->info->prfcnt_info.clearing_samples;
+	u64 *accum_buf = backend->accum_buf;
+
 	size_t block_idx;
 	const u32 *old_block = old_sample_buf;
 	const u32 *new_block = new_sample_buf;
 	u64 *acc_block = accum_buf;
 	/* Flag to indicate whether current sample is exiting protected mode. */
 	bool exiting_protm = false;
-	const size_t values_per_block = phys_layout->values_per_block;
 
 	/* The block pointers now point to the first HW block, which is always a CSHW/front-end
 	 * block. The counter enable mask for this block can be checked to determine whether this
@@ -620,9 +631,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
 		const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
 		const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
 		/* Update block state with information of the current sample */
-		kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask,
-							    enable_state, exiting_protm, block_idx,
-							    &block_states[block_idx],
+		kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm,
+							    block_idx, &block_states[block_idx],
 							    fw_in_protected_mode);
 
 		if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
@@ -706,7 +716,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 	u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
 	const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
 	const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
-	bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
 	u32 *old_sample_buf = backend_csf->old_sample_buf;
 	u32 *new_sample_buf = old_sample_buf;
 	const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout;
@@ -740,10 +749,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
 		const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
 
 		new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
-		kbasep_hwcnt_backend_csf_accumulate_sample(
-			phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf,
-			new_sample_buf, backend_csf->block_states, clearing_samples,
-			backend_csf->enable_state, backend_csf->info->fw_in_protected_mode);
+		kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf,
+							   new_sample_buf);
 
 		old_sample_buf = new_sample_buf;
 	}
@@ -1457,7 +1464,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
 	ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
 				       backend_csf->to_user_block_states, dst_enable_map,
 				       backend_csf->num_l2_slices,
-				       backend_csf->shader_present_bitmap, accumulate);
+				       backend_csf->powered_shader_core_mask, accumulate);
 
 	/* If no error occurred (zero ret value), then update block state for all blocks in the
 	 * accumulation with the current sample's block state.
@@ -2098,7 +2105,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *
 	gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0;
 	gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
 	gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count;
-	gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+	gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask;
 	gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
 	gpu_info.prfcnt_values_per_block =
 		csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
index 2487db2..1b4e16d 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -30,8 +30,10 @@
 #include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
 #include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
 #include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
 
 struct kbase_hwcnt_physical_enable_map;
+struct kbase_hwcnt_backend_csf;
 
 /**
  * kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac
  *                                                 this function is called.
  * @iface: Non-NULL pointer to HWC backend interface.
  * @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ *                            and the shader_present_bitmap.
  */
 void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
 						 size_t num_l2_slices,
-						 uint64_t shader_present_bitmap);
+						 uint64_t powered_shader_core_mask);
 
 /** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
  *                                                  guarantee headers are
@@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa
  */
 void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
 
+/**
+ * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
+ *                              information from a sample.
+ * @backend:                    CSF hardware counter backend.
+ * @enable_mask:                Counter enable mask for the block whose state is being updated.
+ * @exiting_protm:              Whether or not the sample is taken when the GPU is exiting
+ *                              protected mode.
+ * @block_idx:                  Index of block within the ringbuffer.
+ * @block_state:                Pointer to existing block state of the block whose state is being
+ *                              updated.
+ * @fw_in_protected_mode:       Whether or not GPU is in protected mode during sampling.
+ */
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+						 const u32 enable_mask, bool exiting_protm,
+						 size_t block_idx, blk_stt_t *const block_state,
+						 bool fw_in_protected_mode);
+
 #endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
index 65bb965..4ee2c8a 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable {
  * @prfcnt_block_size: Bytes of each performance counter block.
  * @l2_count:          The MMU L2 cache count.
  * @csg_count:         The total number of CSGs in the system
- * @core_mask:         Shader core mask.
+ * @sc_core_mask:         Shader core mask.
  * @clk_cnt:           Clock domain count in the system.
  * @clearing_samples:  Indicates whether counters are cleared after each sample
  *                     is taken.
@@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
 	size_t prfcnt_block_size;
 	size_t l2_count;
 	u32 csg_count;
-	u64 core_mask;
+	u64 sc_core_mask;
 	u8 clk_cnt;
 	bool clearing_samples;
 };
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 1b7a116..fe81ce1 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -229,7 +229,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 
 	*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
 		.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
-		.core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+		.sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
 		.prfcnt_hw_size =
 			KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
 		.prfcnt_fw_size =
@@ -290,12 +290,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
 		.dump_bytes = fw_ctx->buf_bytes,
 		.prfcnt_block_size = prfcnt_block_size,
 		.l2_count = kbdev->gpu_props.num_l2_slices,
-		.core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
+		.sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
 		.csg_count = fw_block_count > 1 ? csg_count : 0,
 		.clk_cnt = fw_ctx->clk_cnt,
 		.clearing_samples = true,
 	};
 
+
 	/* Block size must be multiple of counter size. */
 	WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
 	/* Total size must be multiple of block size. */
@@ -513,10 +514,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 			fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages,
 			MCU_AS_NR));
 
+		/* Clear the dump ring_buf content to zeros */
+		memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE);
 		vunmap(fw_ring_buf->cpu_dump_base);
 
+		/* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag
+		 * as true when freeing the pages to the Global pool.
+		 */
 		kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-					  fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
+					  fw_ring_buf->num_pages, fw_ring_buf->phys, true, false);
 
 		kfree(fw_ring_buf->phys);
 
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 4df7dd4..8b337eb 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
 #endif
 
 	info->l2_count = l2_count;
-	info->core_mask = core_mask;
+	info->sc_core_mask = core_mask;
 	info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
 
 	/* Determine the number of available clock domains. */
@@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
 	WARN_ON(!gpu_info);
 	WARN_ON(!phys_layout);
 
-	shader_core_cnt = fls64(gpu_info->core_mask);
+	shader_core_cnt = fls64(gpu_info->sc_core_mask);
 
 	*phys_layout = (struct kbase_hwcnt_jm_physical_layout){
 		.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
@@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
 		.shader_cnt = shader_core_cnt,
 		.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
 			     gpu_info->l2_count + shader_core_cnt,
-		.shader_avail_mask = gpu_info->core_mask,
+		.shader_avail_mask = gpu_info->sc_core_mask,
 		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
 		.values_per_block = gpu_info->prfcnt_values_per_block,
 		.counters_per_block =
@@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
 
 	enable = (struct kbase_instr_hwcnt_enable)
 	{
-		.fe_bm = phys_enable_map.fe_bm,
-		.shader_bm = phys_enable_map.shader_bm,
-		.tiler_bm = phys_enable_map.tiler_bm,
-		.mmu_l2_bm = phys_enable_map.mmu_l2_bm,
+		.fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm,
+		.tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
 		.counter_set = phys_counter_set,
 #if IS_ENABLED(CONFIG_MALI_NO_MALI)
 		/* The dummy model needs the CPU mapping. */
-		.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
+			.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
 #else
 		.dump_buffer = backend_jm->gpu_dump_va,
 #endif /* CONFIG_MALI_NO_MALI */
@@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
 
 	backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
 	backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
-	backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask;
+	backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask;
 
 	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
 
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
index 5da5645..ffe8449 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
 	/* Calculate number of block instances that aren't cores */
 	non_core_block_count = 2 + gpu_info->l2_count;
 	/* Calculate number of block instances that are shader cores */
-	sc_block_count = (size_t)fls64(gpu_info->core_mask);
+	sc_block_count = (size_t)fls64(gpu_info->sc_core_mask);
 	/* Determine the total number of cores */
 	core_block_count = sc_block_count;
 
@@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
 	kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0);
 	kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX);
 	kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count,
-					gpu_info->core_mask);
+					gpu_info->sc_core_mask);
 
 
 	return kbase_hwcnt_metadata_create(&desc, metadata);
@@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in
 {
 	WARN_ON(!gpu_info);
 
-	return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) *
+	return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) *
 	       gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
 }
 
@@ -384,6 +384,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b
 	return false;
 }
 
+
 bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
 {
 	if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
@@ -466,9 +467,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 		else
 			hw_res_available = true;
 
-		/*
-		 * Skip block if no values in the destination block are enabled.
-		 */
+		/* Skip block if no values in the destination block are enabled. */
 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
 			u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
 			const u64 *src_blk = dump_src + src_offset;
@@ -592,7 +591,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			     blk_stt_t *src_block_stt,
 			     const struct kbase_hwcnt_enable_map *dst_enable_map,
-			     size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate)
+			     size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate)
 {
 	const struct kbase_hwcnt_metadata *metadata;
 	const u64 *dump_src = src;
@@ -614,9 +613,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 		blk_stt_t *dst_blk_stt =
 			kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
 
-		/*
-		 * Skip block if no values in the destination block are enabled.
-		 */
+		/* Skip block if no values in the destination block are enabled. */
 		if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
 			u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
 			const u64 *src_blk = dump_src + src_offset;
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
index 4339fdd..570aad7 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set {
 /**
  * struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
  * @l2_count:                L2 cache count.
- * @core_mask:               Shader core mask. May be sparse.
+ * @sc_core_mask:               Shader core mask. May be sparse.
  * @clk_cnt:                 Number of clock domains available.
  * @csg_cnt:                 Number of CSGs available.
  * @prfcnt_values_per_block: Total entries (header + counters) of performance
@@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set {
  */
 struct kbase_hwcnt_gpu_info {
 	size_t l2_count;
-	u64 core_mask;
+	u64 sc_core_mask;
 	u8 clk_cnt;
 	u8 csg_cnt;
 	size_t prfcnt_values_per_block;
@@ -327,15 +327,16 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
  * kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
  *                              dump buffer in src into the dump buffer
  *                              abstraction in dst.
- * @dst:                   Non-NULL pointer to destination dump buffer.
- * @src:                   Non-NULL pointer to source raw dump buffer, of same length
- *                         as dump_buf_bytes in the metadata of dst dump buffer.
- * @src_block_stt:         Non-NULL pointer to source block state buffer.
- * @dst_enable_map:        Non-NULL pointer to enable map specifying enabled values.
- * @num_l2_slices:         Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
- * @accumulate:            True if counters in src should be accumulated into
- *                         destination, rather than copied.
+ * @dst:                      Non-NULL pointer to destination dump buffer.
+ * @src:                      Non-NULL pointer to source raw dump buffer, of same length
+ *                            as dump_buf_bytes in the metadata of dst dump buffer.
+ * @src_block_stt:            Non-NULL pointer to source block state buffer.
+ * @dst_enable_map:           Non-NULL pointer to enable map specifying enabled values.
+ * @num_l2_slices:            Current number of L2 slices allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ *                            and the shader_present_bitmap.
+ * @accumulate:               True if counters in src should be accumulated into
+ *                            destination, rather than copied.
  *
  * The dst and dst_enable_map MUST have been created from the same metadata as
  * returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
@@ -346,7 +347,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
 			     blk_stt_t *src_block_stt,
 			     const struct kbase_hwcnt_enable_map *dst_enable_map,
-			     size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate);
+			     size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate);
 
 /**
  * kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -453,6 +454,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b
 bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
 
 bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
 /**
  * kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from
  *                                        counter selection bitmasks.