// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * */ /* * Metrics for power management */ #include #include #include #include #if MALI_USE_CSF #include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h" #include #else #include #endif /* !MALI_USE_CSF */ #include #include #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) || !MALI_USE_CSF /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly * under 11s. Exceeding this will cause overflow */ #define KBASE_PM_TIME_SHIFT 8 #endif #if MALI_USE_CSF /* To get the GPU_ACTIVE value in nano seconds unit */ #define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9) #endif /* * Possible state transitions * ON -> ON | OFF | STOPPED * STOPPED -> ON | OFF * OFF -> ON * * * ┌─e─┐┌────────────f─────────────┐ * │ v│ v * └───ON ──a──> STOPPED ──b──> OFF * ^^ │ │ * │└──────c─────┘ │ * │ │ * └─────────────d─────────────┘ * * Transition effects: * a. None * b. Timer expires without restart * c. Timer is not stopped, timer period is unaffected * d. Timer must be restarted * e. Callback is executed and the timer is restarted * f. Timer is cancelled, or the callback is waited on if currently executing. This is called during * tear-down and should not be subject to a race from an OFF->ON transition */ enum dvfs_metric_timer_state { TIMER_OFF, TIMER_STOPPED, TIMER_ON }; #ifdef CONFIG_MALI_MIDGARD_DVFS static enum hrtimer_restart dvfs_callback(struct hrtimer *timer) { struct kbasep_pm_metrics_state *metrics; if (WARN_ON(!timer)) return HRTIMER_NORESTART; metrics = container_of(timer, struct kbasep_pm_metrics_state, timer); /* Transition (b) to fully off if timer was stopped, don't restart the timer in this case */ if (atomic_cmpxchg(&metrics->timer_state, TIMER_STOPPED, TIMER_OFF) != TIMER_ON) return HRTIMER_NORESTART; kbase_pm_get_dvfs_action(metrics->kbdev); /* Set the new expiration time and restart (transition e) */ hrtimer_forward_now(timer, HR_TIMER_DELAY_MSEC(metrics->kbdev->pm.dvfs_period)); return HRTIMER_RESTART; } #endif /* CONFIG_MALI_MIDGARD_DVFS */ int kbasep_pm_metrics_init(struct kbase_device *kbdev) { #if MALI_USE_CSF struct kbase_ipa_control_perf_counter perf_counter; int err; /* One counter group */ const size_t NUM_PERF_COUNTERS = 1; KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR; /* Normalize values by GPU frequency */ perf_counter.gpu_norm = true; /* We need the GPU_ACTIVE counter, which is in the CSHW group */ perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW; /* We need the GPU_ACTIVE counter */ perf_counter.idx = GPU_ACTIVE_CNT_IDX; err = kbase_ipa_control_register(kbdev, &perf_counter, NUM_PERF_COUNTERS, &kbdev->pm.backend.metrics.ipa_control_client); if (err) { dev_err(kbdev->dev, "Failed to register IPA with kbase_ipa_control: err=%d", err); return -1; } #else KBASE_DEBUG_ASSERT(kbdev != NULL); kbdev->pm.backend.metrics.kbdev = kbdev; kbdev->pm.backend.metrics.time_period_start = ktime_get_raw(); #endif spin_lock_init(&kbdev->pm.backend.metrics.lock); #ifdef CONFIG_MALI_MIDGARD_DVFS hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); kbdev->pm.backend.metrics.timer.function = dvfs_callback; kbdev->pm.backend.metrics.initialized = true; atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); kbase_pm_metrics_start(kbdev); #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF /* The sanity check on the GPU_ACTIVE performance counter * is skipped for Juno platforms that have timing problems. */ kbdev->pm.backend.metrics.skip_gpu_active_sanity_check = of_machine_is_compatible("arm,juno"); #endif return 0; } KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init); void kbasep_pm_metrics_term(struct kbase_device *kbdev) { #ifdef CONFIG_MALI_MIDGARD_DVFS KBASE_DEBUG_ASSERT(kbdev != NULL); /* Cancel the timer, and block if the callback is currently executing (transition f) */ kbdev->pm.backend.metrics.initialized = false; atomic_set(&kbdev->pm.backend.metrics.timer_state, TIMER_OFF); hrtimer_cancel(&kbdev->pm.backend.metrics.timer); #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if MALI_USE_CSF kbase_ipa_control_unregister(kbdev, kbdev->pm.backend.metrics.ipa_control_client); #else CSTD_UNUSED(kbdev); #endif } KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term); /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this * function */ #if MALI_USE_CSF #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev) { int err; u64 gpu_active_counter; u64 protected_time; ktime_t now; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); /* Query IPA_CONTROL for the latest GPU-active and protected-time * info. */ err = kbase_ipa_control_query(kbdev, kbdev->pm.backend.metrics.ipa_control_client, &gpu_active_counter, 1, &protected_time); /* Read the timestamp after reading the GPU_ACTIVE counter value. * This ensures the time gap between the 2 reads is consistent for * a meaningful comparison between the increment of GPU_ACTIVE and * elapsed time. The lock taken inside kbase_ipa_control_query() * function can cause lot of variation. */ now = ktime_get_raw(); if (err) { dev_err(kbdev->dev, "Failed to query the increment of GPU_ACTIVE counter: err=%d", err); } else { u64 diff_ns; s64 diff_ns_signed; u32 ns_time; ktime_t diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); diff_ns_signed = ktime_to_ns(diff); if (diff_ns_signed < 0) return false; /* * The GPU internal counter is updated every IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * milliseconds. If an update occurs prematurely and the counter has not been * updated, the same counter value will be obtained, resulting in a difference * of zero. To handle this scenario, we will skip the update if the difference * is zero and the update occurred less than 1.5 times the internal update period * (IPA_CONTROL_TIMER_DEFAULT_VALUE_MS). Ideally, we should check the counter * update timestamp in the GPU internal register to ensure accurate updates. */ if (gpu_active_counter == 0 && diff_ns_signed < IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2) return false; diff_ns = (u64)diff_ns_signed; #if !IS_ENABLED(CONFIG_MALI_NO_MALI) /* The GPU_ACTIVE counter shouldn't clock-up more time than has * actually elapsed - but still some margin needs to be given * when doing the comparison. There could be some drift between * the CPU and GPU clock. * * Can do the check only in a real driver build, as an arbitrary * value for GPU_ACTIVE can be fed into dummy model in no_mali * configuration which may not correspond to the real elapsed * time. */ if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) { /* The margin is scaled to allow for the worst-case * scenario where the samples are maximally separated, * plus a small offset for sampling errors. */ u64 const MARGIN_NS = IPA_CONTROL_TIMER_DEFAULT_VALUE_MS * NSEC_PER_MSEC * 3 / 2; if (gpu_active_counter > (diff_ns + MARGIN_NS)) { dev_info( kbdev->dev, "GPU activity takes longer than time interval: %llu ns > %llu ns", (unsigned long long)gpu_active_counter, (unsigned long long)diff_ns); } } #endif /* Calculate time difference in units of 256ns */ ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT); /* Add protected_time to gpu_active_counter so that time in * protected mode is included in the apparent GPU active time, * then convert it from units of 1ns to units of 256ns, to * match what JM GPUs use. The assumption is made here that the * GPU is 100% busy while in protected mode, so we should add * this since the GPU can't (and thus won't) update these * counters while it's actually in protected mode. * * Perform the add after dividing each value down, to reduce * the chances of overflows. */ protected_time >>= KBASE_PM_TIME_SHIFT; gpu_active_counter >>= KBASE_PM_TIME_SHIFT; gpu_active_counter += protected_time; /* Ensure the following equations don't go wrong if ns_time is * slightly larger than gpu_active_counter somehow */ gpu_active_counter = MIN(gpu_active_counter, ns_time); kbdev->pm.backend.metrics.values.time_busy += gpu_active_counter; kbdev->pm.backend.metrics.values.time_idle += ns_time - gpu_active_counter; /* Also make time in protected mode available explicitly, * so users of this data have this info, too. */ kbdev->pm.backend.metrics.values.time_in_protm += protected_time; } kbdev->pm.backend.metrics.time_period_start = now; return true; } #endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */ #else static bool kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev, ktime_t now) { ktime_t diff; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); diff = ktime_sub(now, kbdev->pm.backend.metrics.time_period_start); if (ktime_to_ns(diff) < 0) return false; if (kbdev->pm.backend.metrics.gpu_active) { u32 ns_time = (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); kbdev->pm.backend.metrics.values.time_busy += ns_time; if (kbdev->pm.backend.metrics.active_cl_ctx[0]) kbdev->pm.backend.metrics.values.busy_cl[0] += ns_time; if (kbdev->pm.backend.metrics.active_cl_ctx[1]) kbdev->pm.backend.metrics.values.busy_cl[1] += ns_time; if (kbdev->pm.backend.metrics.active_gl_ctx[0]) kbdev->pm.backend.metrics.values.busy_gl += ns_time; if (kbdev->pm.backend.metrics.active_gl_ctx[1]) kbdev->pm.backend.metrics.values.busy_gl += ns_time; if (kbdev->pm.backend.metrics.active_gl_ctx[2]) kbdev->pm.backend.metrics.values.busy_gl += ns_time; } else { kbdev->pm.backend.metrics.values.time_idle += (u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT); } kbdev->pm.backend.metrics.time_period_start = now; return true; } #endif /* MALI_USE_CSF */ #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev, struct kbasep_pm_metrics *last, struct kbasep_pm_metrics *diff) { struct kbasep_pm_metrics *cur = &kbdev->pm.backend.metrics.values; unsigned long flags; spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); #if MALI_USE_CSF if (!kbase_pm_get_dvfs_utilisation_calc(kbdev)) { #else if (!kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get_raw())) { #endif spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); return; } memset(diff, 0, sizeof(*diff)); diff->time_busy = cur->time_busy - last->time_busy; diff->time_idle = cur->time_idle - last->time_idle; #if MALI_USE_CSF diff->time_in_protm = cur->time_in_protm - last->time_in_protm; #else diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0]; diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1]; diff->busy_gl = cur->busy_gl - last->busy_gl; #endif *last = *cur; spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics); #endif #ifdef CONFIG_MALI_MIDGARD_DVFS void kbase_pm_get_dvfs_action(struct kbase_device *kbdev) { int utilisation; struct kbasep_pm_metrics *diff; #if !MALI_USE_CSF int busy; int util_gl_share; int util_cl_share[2]; #endif KBASE_DEBUG_ASSERT(kbdev != NULL); diff = &kbdev->pm.backend.metrics.dvfs_diff; kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff); utilisation = (100 * diff->time_busy) / max(diff->time_busy + diff->time_idle, 1u); #if !MALI_USE_CSF busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u); util_gl_share = (100 * diff->busy_gl) / busy; util_cl_share[0] = (100 * diff->busy_cl[0]) / busy; util_cl_share[1] = (100 * diff->busy_cl[1]) / busy; kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share); #else /* Note that, at present, we don't pass protected-mode time to the * platform here. It's unlikely to be useful, however, as the platform * probably just cares whether the GPU is busy or not; time in * protected mode is already added to busy-time at this point, though, * so we should be good. */ kbase_platform_dvfs_event(kbdev, utilisation); #endif } bool kbase_pm_metrics_is_active(struct kbase_device *kbdev) { KBASE_DEBUG_ASSERT(kbdev != NULL); return atomic_read(&kbdev->pm.backend.metrics.timer_state) == TIMER_ON; } KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active); void kbase_pm_metrics_start(struct kbase_device *kbdev) { struct kbasep_pm_metrics_state *metrics = &kbdev->pm.backend.metrics; if (unlikely(!metrics->initialized)) return; /* Transition to ON, from a stopped state (transition c) */ if (atomic_xchg(&metrics->timer_state, TIMER_ON) == TIMER_OFF) /* Start the timer only if it's been fully stopped (transition d)*/ hrtimer_start(&metrics->timer, HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period), HRTIMER_MODE_REL); } void kbase_pm_metrics_stop(struct kbase_device *kbdev) { if (unlikely(!kbdev->pm.backend.metrics.initialized)) return; /* Timer is Stopped if its currently on (transition a) */ atomic_cmpxchg(&kbdev->pm.backend.metrics.timer_state, TIMER_ON, TIMER_STOPPED); } #endif /* CONFIG_MALI_MIDGARD_DVFS */ #if !MALI_USE_CSF /** * kbase_pm_metrics_active_calc - Update PM active counts based on currently * running atoms * @kbdev: Device pointer * * The caller must hold kbdev->pm.backend.metrics.lock */ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev) { unsigned int js; lockdep_assert_held(&kbdev->pm.backend.metrics.lock); kbdev->pm.backend.metrics.active_gl_ctx[0] = 0; kbdev->pm.backend.metrics.active_gl_ctx[1] = 0; kbdev->pm.backend.metrics.active_gl_ctx[2] = 0; kbdev->pm.backend.metrics.active_cl_ctx[0] = 0; kbdev->pm.backend.metrics.active_cl_ctx[1] = 0; kbdev->pm.backend.metrics.gpu_active = false; for (js = 0; js < BASE_JM_MAX_NR_SLOTS; js++) { struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0); /* Head atom may have just completed, so if it isn't running * then try the next atom */ if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED) katom = kbase_gpu_inspect(kbdev, js, 1); if (katom && katom->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) { if (katom->core_req & BASE_JD_REQ_ONLY_COMPUTE) { u32 device_nr = (katom->core_req & BASE_JD_REQ_SPECIFIC_COHERENT_GROUP) ? katom->device_nr : 0; if (!WARN_ON(device_nr >= 2)) kbdev->pm.backend.metrics.active_cl_ctx[device_nr] = 1; } else { kbdev->pm.backend.metrics.active_gl_ctx[js] = 1; trace_sysgraph(SGR_ACTIVE, 0, js); } kbdev->pm.backend.metrics.gpu_active = true; } else { trace_sysgraph(SGR_INACTIVE, 0, js); } } } /* called when job is submitted to or removed from a GPU slot */ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp) { unsigned long flags; ktime_t now; lockdep_assert_held(&kbdev->hwaccess_lock); spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags); if (!timestamp) { now = ktime_get_raw(); timestamp = &now; } /* Track how much of time has been spent busy or idle. For JM GPUs, * this also evaluates how long CL and/or GL jobs have been busy for. */ kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp); kbase_pm_metrics_active_calc(kbdev); spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags); } #endif /* !MALI_USE_CSF */