// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
 *
 * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU license.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */

#include "hwcnt/backend/mali_kbase_hwcnt_backend_jm.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
#include "hwcnt/mali_kbase_hwcnt_types.h"
#include "mali_kbase.h"
#include "backend/gpu/mali_kbase_pm_ca.h"
#include "mali_kbase_hwaccess_instr.h"
#include "mali_kbase_hwaccess_time.h"
#include "mali_kbase_ccswe.h"
#include "backend/gpu/mali_kbase_model_linux.h"
#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"

#include "backend/gpu/mali_kbase_pm_internal.h"

/**
 * struct kbase_hwcnt_backend_jm_info - Information used to create an instance
 *                                      of a JM hardware counter backend.
 * @kbdev:          KBase device.
 * @counter_set:    The performance counter set to use.
 * @metadata:       Hardware counter metadata.
 * @dump_bytes:     Bytes of GPU memory required to perform a
 *                  hardware counter dump.
 * @hwcnt_gpu_info: Hardware counter block information.
 */
struct kbase_hwcnt_backend_jm_info {
	struct kbase_device *kbdev;
	enum kbase_hwcnt_set counter_set;
	const struct kbase_hwcnt_metadata *metadata;
	size_t dump_bytes;
	struct kbase_hwcnt_gpu_info hwcnt_gpu_info;
};

/**
 * struct kbase_hwcnt_jm_physical_layout - HWC sample memory physical layout
 *                                         information.
 * @fe_cnt:             Front end block count.
 * @tiler_cnt:          Tiler block count.
 * @mmu_l2_cnt:         Memory system(MMU and L2 cache) block count.
 * @shader_cnt:         Shader Core block count.
 * @block_cnt:          Total block count (sum of all other block counts).
 * @shader_avail_mask:  Bitmap of all shader cores in the system.
 * @enable_mask_offset: Offset in array elements of enable mask in each block
 *                      starting from the beginning of block.
 * @headers_per_block:  Header size per block.
 * @counters_per_block: Counters size per block.
 * @values_per_block:   Total size per block.
 */
struct kbase_hwcnt_jm_physical_layout {
	u8 fe_cnt;
	u8 tiler_cnt;
	u8 mmu_l2_cnt;
	u8 shader_cnt;
	u8 block_cnt;
	u64 shader_avail_mask;
	size_t enable_mask_offset;
	size_t headers_per_block;
	size_t counters_per_block;
	size_t values_per_block;
};

/**
 * struct kbase_hwcnt_backend_jm - Instance of a JM hardware counter backend.
 * @info:                Info used to create the backend.
 * @kctx:                KBase context used for GPU memory allocation and
 *                       counter dumping.
 * @gpu_dump_va:         GPU hardware counter dump buffer virtual address.
 * @cpu_dump_va:         CPU mapping of gpu_dump_va.
 * @vmap:                Dump buffer vmap.
 * @to_user_buf:         HWC sample buffer for client user, size
 *                       metadata.dump_buf_bytes.
 * @enabled:             True if dumping has been enabled, else false.
 * @accum_all_blk_stt:   Block State to accumulate on next sample, for all types
 *                       of block.
 * @sampled_all_blk_stt: Block State to accumulate into the current sample, for
 *                       all types of block.
 * @debug_core_mask:     User-set mask of shader cores that can be used.
 * @pm_core_mask:        PM state sync-ed shaders core mask for the enabled
 *                       dumping.
 * @curr_config:         Current allocated hardware resources to correctly map the
 *                       source raw dump buffer to the destination dump buffer.
 * @max_core_mask:       Core mask of all cores allocated to the GPU (non
 *                       virtualized platforms) or resource group (virtualized
 *                       platforms).
 * @max_l2_slices:       Maximum number of L2 slices allocated to the GPU (non
 *                       virtualized platforms) or resource group (virtualized
 *                       platforms).
 * @clk_enable_map:      The enable map specifying enabled clock domains.
 * @cycle_count_elapsed: Cycle count elapsed for a given sample period.
 *                       The top clock cycle, index 0, is read directly from
 *                       hardware, but the other clock domains need to be
 *                       calculated with software estimation.
 * @prev_cycle_count:    Previous cycle count to calculate the cycle count for
 *                       sample period.
 * @rate_listener:       Clock rate listener callback state.
 * @ccswe_shader_cores:  Shader cores cycle count software estimator.
 * @phys_layout:         Physical memory layout information of HWC sample buffer.
 */
struct kbase_hwcnt_backend_jm {
	const struct kbase_hwcnt_backend_jm_info *info;
	struct kbase_context *kctx;
	u64 gpu_dump_va;
	void *cpu_dump_va;
	struct kbase_vmap_struct *vmap;
	u64 *to_user_buf;
	bool enabled;
	blk_stt_t accum_all_blk_stt;
	blk_stt_t sampled_all_blk_stt;
	u64 debug_core_mask;
	u64 pm_core_mask;
	struct kbase_hwcnt_curr_config curr_config;
	u64 max_core_mask;
	size_t max_l2_slices;
	u64 clk_enable_map;
	u64 cycle_count_elapsed[BASE_MAX_NR_CLOCKS_REGULATORS];
	u64 prev_cycle_count[BASE_MAX_NR_CLOCKS_REGULATORS];
	struct kbase_clk_rate_listener rate_listener;
	struct kbase_ccswe ccswe_shader_cores;
	struct kbase_hwcnt_jm_physical_layout phys_layout;
};

/**
 * kbasep_hwcnt_backend_jm_gpu_info_init() - Initialise an info structure used
 *                                           to create the hwcnt metadata.
 * @kbdev: Non-NULL pointer to kbase device.
 * @info:  Non-NULL pointer to data structure to be filled in.
 *
 * The initialised info struct will only be valid for use while kbdev is valid.
 *
 * Return: 0 on success, else error code.
 */
static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
						 struct kbase_hwcnt_gpu_info *info)
{
	size_t clk, l2_count, core_mask;

	if (!kbdev || !info)
		return -EINVAL;

#if IS_ENABLED(CONFIG_MALI_NO_MALI)
	l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS;
	core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1;
#else
	l2_count = kbdev->gpu_props.num_l2_slices;
	core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
#endif

	info->l2_count = l2_count;
	info->sc_core_mask = core_mask;
	info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;

	/* Determine the number of available clock domains. */
	for (clk = 0; clk < BASE_MAX_NR_CLOCKS_REGULATORS; clk++) {
		if (kbdev->pm.clk_rtm.clks[clk] == NULL)
			break;
	}
	info->clk_cnt = clk;

	return 0;
}

static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_info *gpu_info,
						struct kbase_hwcnt_jm_physical_layout *phys_layout)
{
	u8 shader_core_cnt;

	WARN_ON(!gpu_info);
	WARN_ON(!phys_layout);

	shader_core_cnt = fls64(gpu_info->sc_core_mask);

	*phys_layout = (struct kbase_hwcnt_jm_physical_layout){
		.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
		.tiler_cnt = KBASE_HWCNT_V5_TILER_BLOCK_COUNT,
		.mmu_l2_cnt = gpu_info->l2_count,
		.shader_cnt = shader_core_cnt,
		.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
			     gpu_info->l2_count + shader_core_cnt,
		.shader_avail_mask = gpu_info->sc_core_mask,
		.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
		.values_per_block = gpu_info->prfcnt_values_per_block,
		.counters_per_block =
			gpu_info->prfcnt_values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
		.enable_mask_offset = KBASE_HWCNT_V5_PRFCNT_EN_HEADER,
	};
}

static void
kbasep_hwcnt_backend_jm_dump_sample(const struct kbase_hwcnt_backend_jm *const backend_jm)
{
	size_t block_idx;
	const u32 *new_sample_buf = backend_jm->cpu_dump_va;
	const u32 *new_block = new_sample_buf;
	u64 *dst_buf = backend_jm->to_user_buf;
	u64 *dst_block = dst_buf;
	const size_t values_per_block = backend_jm->phys_layout.values_per_block;
	const size_t dump_bytes = backend_jm->info->dump_bytes;

	for (block_idx = 0; block_idx < backend_jm->phys_layout.block_cnt; block_idx++) {
		size_t ctr_idx;

		for (ctr_idx = 0; ctr_idx < values_per_block; ctr_idx++)
			dst_block[ctr_idx] = new_block[ctr_idx];

		new_block += values_per_block;
		dst_block += values_per_block;
	}

	WARN_ON(new_block != new_sample_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
	WARN_ON(dst_block != dst_buf + (dump_bytes / KBASE_HWCNT_VALUE_HW_BYTES));
}

/**
 * kbasep_hwcnt_backend_jm_on_freq_change() - On freq change callback
 *
 * @rate_listener:    Callback state
 * @clk_index:        Clock index
 * @clk_rate_hz:      Clock frequency(hz)
 */
static void kbasep_hwcnt_backend_jm_on_freq_change(struct kbase_clk_rate_listener *rate_listener,
						   u32 clk_index, u32 clk_rate_hz)
{
	struct kbase_hwcnt_backend_jm *backend_jm =
		container_of(rate_listener, struct kbase_hwcnt_backend_jm, rate_listener);
	u64 timestamp_ns;

	if (clk_index != KBASE_CLOCK_DOMAIN_SHADER_CORES)
		return;

	timestamp_ns = ktime_get_raw_ns();
	kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, clk_rate_hz);
}

/**
 * kbasep_hwcnt_backend_jm_cc_enable() - Enable cycle count tracking
 *
 * @backend_jm:      Non-NULL pointer to backend.
 * @enable_map:   Non-NULL pointer to enable map specifying enabled counters.
 * @timestamp_ns: Timestamp(ns) when HWCNT were enabled.
 */
static void kbasep_hwcnt_backend_jm_cc_enable(struct kbase_hwcnt_backend_jm *backend_jm,
					      const struct kbase_hwcnt_enable_map *enable_map,
					      u64 timestamp_ns)
{
	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
	u64 clk_enable_map = enable_map->clk_enable_map;
	u64 cycle_count;

	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
		/* turn on the cycle counter */
		kbase_pm_request_gpu_cycle_counter_l2_is_on(kbdev);
		/* Read cycle count for top clock domain. */
		kbase_backend_get_gpu_time_norequest(kbdev, &cycle_count, NULL, NULL);

		backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_TOP] = cycle_count;
	}

	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
		/* software estimation for non-top clock domains */
		struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
		const struct kbase_clk_data *clk_data = rtm->clks[KBASE_CLOCK_DOMAIN_SHADER_CORES];
		u32 cur_freq;
		unsigned long flags;

		spin_lock_irqsave(&rtm->lock, flags);

		cur_freq = (u32)clk_data->clock_val;
		kbase_ccswe_reset(&backend_jm->ccswe_shader_cores);
		kbase_ccswe_freq_change(&backend_jm->ccswe_shader_cores, timestamp_ns, cur_freq);

		kbase_clk_rate_trace_manager_subscribe_no_lock(rtm, &backend_jm->rate_listener);

		spin_unlock_irqrestore(&rtm->lock, flags);

		/* ccswe was reset. The estimated cycle is zero. */
		backend_jm->prev_cycle_count[KBASE_CLOCK_DOMAIN_SHADER_CORES] = 0;
	}

	/* Keep clk_enable_map for dump_request. */
	backend_jm->clk_enable_map = clk_enable_map;
}

/**
 * kbasep_hwcnt_backend_jm_cc_disable() - Disable cycle count tracking
 *
 * @backend_jm:      Non-NULL pointer to backend.
 */
static void kbasep_hwcnt_backend_jm_cc_disable(struct kbase_hwcnt_backend_jm *backend_jm)
{
	struct kbase_device *kbdev = backend_jm->kctx->kbdev;
	struct kbase_clk_rate_trace_manager *rtm = &kbdev->pm.clk_rtm;
	u64 clk_enable_map = backend_jm->clk_enable_map;

	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_TOP)) {
		/* turn off the cycle counter */
		kbase_pm_release_gpu_cycle_counter(kbdev);
	}

	if (kbase_hwcnt_clk_enable_map_enabled(clk_enable_map, KBASE_CLOCK_DOMAIN_SHADER_CORES)) {
		kbase_clk_rate_trace_manager_unsubscribe(rtm, &backend_jm->rate_listener);
	}
}

/**
 * kbasep_hwcnt_gpu_update_curr_config() - Update the destination buffer with
 *                                        current config information.
 * @kbdev:       Non-NULL pointer to kbase device.
 * @curr_config: Non-NULL pointer to return the current configuration of
 *               hardware allocated to the GPU.
 *
 * The current configuration information is used for architectures where the
 * max_config interface is available from the Arbiter. In this case the current
 * allocated hardware is not always the same, so the current config information
 * is used to correctly map the current allocated resources to the memory layout
 * that is copied to the user space.
 *
 * Return: 0 on success, else error code.
 */
static int kbasep_hwcnt_gpu_update_curr_config(struct kbase_device *kbdev,
					       struct kbase_hwcnt_curr_config *curr_config)
{
	if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
		return -EINVAL;

	lockdep_assert_held(&kbdev->hwaccess_lock);

	curr_config->num_l2_slices = kbdev->gpu_props.curr_config.l2_slices;
	curr_config->shader_present = kbdev->gpu_props.curr_config.shader_present;
	return 0;
}

/* JM backend implementation of kbase_hwcnt_backend_timestamp_ns_fn */
static u64 kbasep_hwcnt_backend_jm_timestamp_ns(struct kbase_hwcnt_backend *backend)
{
	(void)backend;
	return ktime_get_raw_ns();
}

/* JM backend implementation of kbase_hwcnt_backend_dump_enable_nolock_fn */
static int
kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
					   const struct kbase_hwcnt_enable_map *enable_map)
{
	int errcode;
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
	struct kbase_context *kctx;
	struct kbase_device *kbdev;
	struct kbase_hwcnt_physical_enable_map phys_enable_map = { 0 };
	enum kbase_hwcnt_physical_set phys_counter_set;
	struct kbase_instr_hwcnt_enable enable = { 0 };
	u64 timestamp_ns;

	if (!backend_jm || !enable_map || backend_jm->enabled ||
	    (enable_map->metadata != backend_jm->info->metadata))
		return -EINVAL;

	kctx = backend_jm->kctx;
	kbdev = backend_jm->kctx->kbdev;

	lockdep_assert_held(&kbdev->hwaccess_lock);

	kbase_hwcnt_gpu_enable_map_to_physical(&phys_enable_map, enable_map);

	kbase_hwcnt_gpu_set_to_physical(&phys_counter_set, backend_jm->info->counter_set);

	enable = (struct kbase_instr_hwcnt_enable)
	{
		.fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm,
		.tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
		.counter_set = phys_counter_set,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
		/* The dummy model needs the CPU mapping. */
			.dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
#else
		.dump_buffer = backend_jm->gpu_dump_va,
#endif /* CONFIG_MALI_NO_MALI */
		.dump_buffer_bytes = backend_jm->info->dump_bytes,
	};

	timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);

	/* Update the current configuration information. */
	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);
	if (errcode)
		goto error;

	errcode = kbase_instr_hwcnt_enable_internal(kbdev, kctx, &enable);
	if (errcode)
		goto error;

	backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
	backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
	backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask;

	backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);

	backend_jm->enabled = true;
	/* Enabling counters is an indication that the power may have previously been off for all
	 * blocks.
	 *
	 * In any case, the counters would not have been counting recently, so an 'off' block state
	 * is an approximation for this.
	 *
	 * This will be transferred to the dump only after a dump_wait(), or dump_disable() in
	 * cases where the caller requested such information. This is to handle when a
	 * dump_enable() happens in between dump_wait() and dump_get().
	 */
	kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);

	kbasep_hwcnt_backend_jm_cc_enable(backend_jm, enable_map, timestamp_ns);

	return 0;
error:
	return errcode;
}

/* JM backend implementation of kbase_hwcnt_backend_dump_enable_fn */
static int kbasep_hwcnt_backend_jm_dump_enable(struct kbase_hwcnt_backend *backend,
					       const struct kbase_hwcnt_enable_map *enable_map)
{
	unsigned long flags;
	int errcode;
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
	struct kbase_device *kbdev;

	if (!backend_jm)
		return -EINVAL;

	kbdev = backend_jm->kctx->kbdev;

	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

	errcode = kbasep_hwcnt_backend_jm_dump_enable_nolock(backend, enable_map);

	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

	return errcode;
}

/* JM backend implementation of kbase_hwcnt_backend_dump_disable_fn */
static void kbasep_hwcnt_backend_jm_dump_disable(struct kbase_hwcnt_backend *backend,
						 struct kbase_hwcnt_dump_buffer *dump_buffer,
						 const struct kbase_hwcnt_enable_map *enable_map)
{
	int errcode;
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;

	if (WARN_ON(!backend_jm ||
		    (dump_buffer && (backend_jm->info->metadata != dump_buffer->metadata)) ||
		    (enable_map && (backend_jm->info->metadata != enable_map->metadata)) ||
		    (dump_buffer && !enable_map)))
		return;
	/* No WARN needed here, but still return early if backend is already disabled */
	if (!backend_jm->enabled)
		return;

	kbasep_hwcnt_backend_jm_cc_disable(backend_jm);

	errcode = kbase_instr_hwcnt_disable_internal(backend_jm->kctx);
	WARN_ON(errcode);

	/* Disabling HWCNT is an indication that blocks have been powered off. This is important to
	 * know for L2 and Tiler blocks, as this is currently the only way a backend can know if
	 * they are being powered off.
	 *
	 * In any case, even if they weren't really powered off, we won't be counting whilst
	 * disabled.
	 *
	 * Update the block state information in the block state accumulator to show this, so that
	 * in the next dump blocks will have been seen as powered off for some of the time.
	 */
	kbase_hwcnt_block_state_append(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_OFF);

	if (dump_buffer) {
		/* In some use-cases, the caller will need the information whilst the counters are
		 * disabled, but will not be able to call into the backend to dump them. Instead,
		 * they have an opportunity here to request them to be accumulated into their
		 * buffer immediately.
		 *
		 * This consists of taking a sample of the accumulated block state (as though a
		 * real dump_get() had happened), then transfer ownership of that to the caller
		 * (i.e. erasing our copy of it).
		 */
		kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
						   &backend_jm->accum_all_blk_stt);
		kbase_hwcnt_dump_buffer_block_state_update(dump_buffer, enable_map,
							   backend_jm->sampled_all_blk_stt);
		/* Now the block state has been passed out into the caller's own accumulation
		 * buffer, clear our own accumulated and sampled block state - ownership has been
		 * transferred.
		 */
		kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt,
					    KBASE_HWCNT_STATE_UNKNOWN);
		kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt,
					    KBASE_HWCNT_STATE_UNKNOWN);
	}

	backend_jm->enabled = false;
}

/* JM backend implementation of kbase_hwcnt_backend_dump_clear_fn */
static int kbasep_hwcnt_backend_jm_dump_clear(struct kbase_hwcnt_backend *backend)
{
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;

	if (!backend_jm || !backend_jm->enabled)
		return -EINVAL;

	return kbase_instr_hwcnt_clear(backend_jm->kctx);
}

/* JM backend implementation of kbase_hwcnt_backend_dump_request_fn */
static int kbasep_hwcnt_backend_jm_dump_request(struct kbase_hwcnt_backend *backend,
						u64 *dump_time_ns)
{
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
	struct kbase_device *kbdev;
	const struct kbase_hwcnt_metadata *metadata;
	u64 current_cycle_count;
	size_t clk;
	int ret;

	if (!backend_jm || !backend_jm->enabled || !dump_time_ns)
		return -EINVAL;

	kbdev = backend_jm->kctx->kbdev;
	metadata = backend_jm->info->metadata;

	/* Disable pre-emption, to make the timestamp as accurate as possible */
	preempt_disable();
	{
		*dump_time_ns = kbasep_hwcnt_backend_jm_timestamp_ns(backend);
		ret = kbase_instr_hwcnt_request_dump(backend_jm->kctx);

		kbase_hwcnt_metadata_for_each_clock(metadata, clk) {
			if (!kbase_hwcnt_clk_enable_map_enabled(backend_jm->clk_enable_map, clk))
				continue;

			if (clk == KBASE_CLOCK_DOMAIN_TOP) {
				/* Read cycle count for top clock domain. */
				kbase_backend_get_gpu_time_norequest(kbdev, &current_cycle_count,
								     NULL, NULL);
			} else {
				/*
				 * Estimate cycle count for non-top clock
				 * domain.
				 */
				current_cycle_count = kbase_ccswe_cycle_at(
					&backend_jm->ccswe_shader_cores, *dump_time_ns);
			}
			backend_jm->cycle_count_elapsed[clk] =
				current_cycle_count - backend_jm->prev_cycle_count[clk];

			/*
			 * Keep the current cycle count for later calculation.
			 */
			backend_jm->prev_cycle_count[clk] = current_cycle_count;
		}
	}
	preempt_enable();

	return ret;
}

/* JM backend implementation of kbase_hwcnt_backend_dump_wait_fn */
static int kbasep_hwcnt_backend_jm_dump_wait(struct kbase_hwcnt_backend *backend)
{
	int errcode;
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;

	if (!backend_jm || !backend_jm->enabled)
		return -EINVAL;

	errcode = kbase_instr_hwcnt_wait_for_dump(backend_jm->kctx);
	if (errcode)
		return errcode;

	/* Now that we've completed a sample, also sample+clear the accumulated block state.
	 *
	 * This is to ensure that a dump_enable() that happens in between dump_wait() and
	 * dump_get() is reported on the _next_ dump, not the _current_ dump. That is, the block
	 * state is reported at the actual time that counters are being sampled.
	 */
	kbase_hwcnt_block_state_accumulate(&backend_jm->sampled_all_blk_stt,
					   &backend_jm->accum_all_blk_stt);
	kbase_hwcnt_block_state_set(&backend_jm->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);

	return errcode;
}

/* JM backend implementation of kbase_hwcnt_backend_dump_get_fn */
static int kbasep_hwcnt_backend_jm_dump_get(struct kbase_hwcnt_backend *backend,
					    struct kbase_hwcnt_dump_buffer *dst,
					    const struct kbase_hwcnt_enable_map *dst_enable_map,
					    bool accumulate)
{
	struct kbase_hwcnt_backend_jm *backend_jm = (struct kbase_hwcnt_backend_jm *)backend;
	size_t clk;
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
	struct kbase_device *kbdev;
	unsigned long flags;
#endif /* CONFIG_MALI_NO_MALI */
	int errcode;

	if (!backend_jm || !dst || !dst_enable_map ||
	    (backend_jm->info->metadata != dst->metadata) ||
	    (dst_enable_map->metadata != dst->metadata))
		return -EINVAL;

	/* Invalidate the kernel buffer before reading from it. */
	kbase_sync_mem_regions(backend_jm->kctx, backend_jm->vmap, KBASE_SYNC_TO_CPU);

	/* Dump sample to the internal 64-bit user buffer. */
	kbasep_hwcnt_backend_jm_dump_sample(backend_jm);

	/* Extract elapsed cycle count for each clock domain if enabled. */
	kbase_hwcnt_metadata_for_each_clock(dst_enable_map->metadata, clk) {
		if (!kbase_hwcnt_clk_enable_map_enabled(dst_enable_map->clk_enable_map, clk))
			continue;

		/* Reset the counter to zero if accumulation is off. */
		if (!accumulate)
			dst->clk_cnt_buf[clk] = 0;
		dst->clk_cnt_buf[clk] += backend_jm->cycle_count_elapsed[clk];
	}

#if IS_ENABLED(CONFIG_MALI_NO_MALI)
	kbdev = backend_jm->kctx->kbdev;

	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);

	/* Update the current configuration information. */
	errcode = kbasep_hwcnt_gpu_update_curr_config(kbdev, &backend_jm->curr_config);

	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

	if (errcode)
		return errcode;
#endif /* CONFIG_MALI_NO_MALI */
	errcode = kbase_hwcnt_jm_dump_get(dst, backend_jm->to_user_buf, dst_enable_map,
					  backend_jm->pm_core_mask, backend_jm->debug_core_mask,
					  backend_jm->max_core_mask, backend_jm->max_l2_slices,
					  &backend_jm->curr_config, accumulate);

	if (errcode)
		return errcode;

	kbase_hwcnt_dump_buffer_block_state_update(dst, dst_enable_map,
						   backend_jm->sampled_all_blk_stt);
	kbase_hwcnt_block_state_set(&backend_jm->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
	return errcode;
}

/**
 * kbasep_hwcnt_backend_jm_dump_alloc() - Allocate a GPU dump buffer.
 * @info:        Non-NULL pointer to JM backend info.
 * @kctx:        Non-NULL pointer to kbase context.
 * @gpu_dump_va: Non-NULL pointer to where GPU dump buffer virtual address
 *               is stored on success.
 *
 * Return: 0 on success, else error code.
 */
static int kbasep_hwcnt_backend_jm_dump_alloc(const struct kbase_hwcnt_backend_jm_info *info,
					      struct kbase_context *kctx, u64 *gpu_dump_va)
{
	struct kbase_va_region *reg;
	u64 flags;
	u64 nr_pages;

	/* Calls to this function are inherently asynchronous, with respect to
	 * MMU operations.
	 */
	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;

	WARN_ON(!info);
	WARN_ON(!kctx);
	WARN_ON(!gpu_dump_va);

	flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_WR | BASEP_MEM_PERMANENT_KERNEL_MAPPING |
		BASE_MEM_CACHED_CPU | BASE_MEM_UNCACHED_GPU;

	nr_pages = PFN_UP(info->dump_bytes);

	reg = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, gpu_dump_va, mmu_sync_info);

	if (!reg)
		return -ENOMEM;

	return 0;
}

/**
 * kbasep_hwcnt_backend_jm_dump_free() - Free an allocated GPU dump buffer.
 * @kctx:        Non-NULL pointer to kbase context.
 * @gpu_dump_va: GPU dump buffer virtual address.
 */
static void kbasep_hwcnt_backend_jm_dump_free(struct kbase_context *kctx, u64 gpu_dump_va)
{
	WARN_ON(!kctx);
	if (gpu_dump_va)
		kbase_mem_free(kctx, gpu_dump_va);
}

/**
 * kbasep_hwcnt_backend_jm_destroy() - Destroy a JM backend.
 * @backend: Pointer to JM backend to destroy.
 *
 * Can be safely called on a backend in any state of partial construction.
 */
static void kbasep_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_jm *backend)
{
	if (!backend)
		return;

	if (backend->kctx) {
		struct kbase_context *kctx = backend->kctx;
		struct kbase_device *kbdev = kctx->kbdev;

		if (backend->cpu_dump_va)
			kbase_phy_alloc_mapping_put(kctx, backend->vmap);

		if (backend->gpu_dump_va)
			kbasep_hwcnt_backend_jm_dump_free(kctx, backend->gpu_dump_va);

		kbasep_js_release_privileged_ctx(kbdev, kctx);
		kbase_destroy_context(kctx);
	}

	kfree(backend->to_user_buf);

	kfree(backend);
}

/**
 * kbasep_hwcnt_backend_jm_create() - Create a JM backend.
 * @info:        Non-NULL pointer to backend info.
 * @out_backend: Non-NULL pointer to where backend is stored on success.
 *
 * Return: 0 on success, else error code.
 */
static int kbasep_hwcnt_backend_jm_create(const struct kbase_hwcnt_backend_jm_info *info,
					  struct kbase_hwcnt_backend_jm **out_backend)
{
	int errcode;
	struct kbase_device *kbdev;
	struct kbase_hwcnt_backend_jm *backend = NULL;

	WARN_ON(!info);
	WARN_ON(!out_backend);

	kbdev = info->kbdev;

	backend = kzalloc(sizeof(*backend), GFP_KERNEL);
	if (!backend)
		goto alloc_error;

	backend->info = info;
	kbasep_hwcnt_backend_jm_init_layout(&info->hwcnt_gpu_info, &backend->phys_layout);

	backend->kctx = kbase_create_context(kbdev, true,
					     BASE_CONTEXT_SYSTEM_MONITOR_SUBMIT_DISABLED, 0, NULL);
	if (!backend->kctx)
		goto alloc_error;

	kbasep_js_schedule_privileged_ctx(kbdev, backend->kctx);

	errcode = kbasep_hwcnt_backend_jm_dump_alloc(info, backend->kctx, &backend->gpu_dump_va);
	if (errcode)
		goto error;

	backend->cpu_dump_va =
		kbase_phy_alloc_mapping_get(backend->kctx, backend->gpu_dump_va, &backend->vmap);
	if (!backend->cpu_dump_va || !backend->vmap)
		goto alloc_error;

	backend->to_user_buf = kzalloc(info->metadata->dump_buf_bytes, GFP_KERNEL);
	if (!backend->to_user_buf)
		goto alloc_error;

	kbase_ccswe_init(&backend->ccswe_shader_cores);
	backend->rate_listener.notify = kbasep_hwcnt_backend_jm_on_freq_change;
	kbase_hwcnt_block_state_set(&backend->accum_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);
	kbase_hwcnt_block_state_set(&backend->sampled_all_blk_stt, KBASE_HWCNT_STATE_UNKNOWN);

	*out_backend = backend;
	return 0;

alloc_error:
	errcode = -ENOMEM;
error:
	kbasep_hwcnt_backend_jm_destroy(backend);
	return errcode;
}

/* JM backend implementation of kbase_hwcnt_backend_metadata_fn */
static const struct kbase_hwcnt_metadata *
kbasep_hwcnt_backend_jm_metadata(const struct kbase_hwcnt_backend_info *info)
{
	if (!info)
		return NULL;

	return ((const struct kbase_hwcnt_backend_jm_info *)info)->metadata;
}

/* JM backend implementation of kbase_hwcnt_backend_init_fn */
static int kbasep_hwcnt_backend_jm_init(const struct kbase_hwcnt_backend_info *info,
					struct kbase_hwcnt_backend **out_backend)
{
	int errcode;
	struct kbase_hwcnt_backend_jm *backend = NULL;

	if (!info || !out_backend)
		return -EINVAL;

	errcode = kbasep_hwcnt_backend_jm_create((const struct kbase_hwcnt_backend_jm_info *)info,
						 &backend);
	if (errcode)
		return errcode;

	*out_backend = (struct kbase_hwcnt_backend *)backend;

	return 0;
}

/* JM backend implementation of kbase_hwcnt_backend_term_fn */
static void kbasep_hwcnt_backend_jm_term(struct kbase_hwcnt_backend *backend)
{
	if (!backend)
		return;

	kbasep_hwcnt_backend_jm_dump_disable(backend, NULL, NULL);
	kbasep_hwcnt_backend_jm_destroy((struct kbase_hwcnt_backend_jm *)backend);
}

/**
 * kbasep_hwcnt_backend_jm_info_destroy() - Destroy a JM backend info.
 * @info: Pointer to info to destroy.
 *
 * Can be safely called on a backend info in any state of partial construction.
 */
static void kbasep_hwcnt_backend_jm_info_destroy(const struct kbase_hwcnt_backend_jm_info *info)
{
	if (!info)
		return;

	kbase_hwcnt_jm_metadata_destroy(info->metadata);
	kfree(info);
}

/**
 * kbasep_hwcnt_backend_jm_info_create() - Create a JM backend info.
 * @kbdev: Non_NULL pointer to kbase device.
 * @out_info: Non-NULL pointer to where info is stored on success.
 *
 * Return: 0 on success, else error code.
 */
static int kbasep_hwcnt_backend_jm_info_create(struct kbase_device *kbdev,
					       const struct kbase_hwcnt_backend_jm_info **out_info)
{
	int errcode = -ENOMEM;
	struct kbase_hwcnt_backend_jm_info *info = NULL;

	WARN_ON(!kbdev);
	WARN_ON(!out_info);

	info = kzalloc(sizeof(*info), GFP_KERNEL);
	if (!info)
		return errcode;

	info->kbdev = kbdev;

#if defined(CONFIG_MALI_PRFCNT_SET_SECONDARY)
	info->counter_set = KBASE_HWCNT_SET_SECONDARY;
#elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
	info->counter_set = KBASE_HWCNT_SET_TERTIARY;
#else
	/* Default to primary */
	info->counter_set = KBASE_HWCNT_SET_PRIMARY;
#endif

	errcode = kbasep_hwcnt_backend_jm_gpu_info_init(kbdev, &info->hwcnt_gpu_info);
	if (errcode)
		goto error;

	errcode = kbase_hwcnt_jm_metadata_create(&info->hwcnt_gpu_info, info->counter_set,
						 &info->metadata, &info->dump_bytes);
	if (errcode)
		goto error;

	*out_info = info;

	return 0;
error:
	kbasep_hwcnt_backend_jm_info_destroy(info);
	return errcode;
}

int kbase_hwcnt_backend_jm_create(struct kbase_device *kbdev,
				  struct kbase_hwcnt_backend_interface *iface)
{
	int errcode;
	const struct kbase_hwcnt_backend_jm_info *info = NULL;

	if (!kbdev || !iface)
		return -EINVAL;

	errcode = kbasep_hwcnt_backend_jm_info_create(kbdev, &info);

	if (errcode)
		return errcode;

	iface->info = (struct kbase_hwcnt_backend_info *)info;
	iface->metadata = kbasep_hwcnt_backend_jm_metadata;
	iface->init = kbasep_hwcnt_backend_jm_init;
	iface->term = kbasep_hwcnt_backend_jm_term;
	iface->timestamp_ns = kbasep_hwcnt_backend_jm_timestamp_ns;
	iface->dump_enable = kbasep_hwcnt_backend_jm_dump_enable;
	iface->dump_enable_nolock = kbasep_hwcnt_backend_jm_dump_enable_nolock;
	iface->dump_disable = kbasep_hwcnt_backend_jm_dump_disable;
	iface->dump_clear = kbasep_hwcnt_backend_jm_dump_clear;
	iface->dump_request = kbasep_hwcnt_backend_jm_dump_request;
	iface->dump_wait = kbasep_hwcnt_backend_jm_dump_wait;
	iface->dump_get = kbasep_hwcnt_backend_jm_dump_get;

	return 0;
}

void kbase_hwcnt_backend_jm_destroy(struct kbase_hwcnt_backend_interface *iface)
{
	if (!iface)
		return;

	kbasep_hwcnt_backend_jm_info_destroy(
		(const struct kbase_hwcnt_backend_jm_info *)iface->info);
	memset(iface, 0, sizeof(*iface));
}